Line 1... |
Line 1... |
/////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
//
|
//
|
// Filename: fftgen.cpp
|
// Filename: fftgen.cpp
|
//
|
//
|
// Project: A Doubletime Pipelined FFT
|
// Project: A Doubletime Pipelined FFT
|
//
|
//
|
Line 21... |
Line 21... |
// make in the documents directory to build it.
|
// make in the documents directory to build it.
|
//
|
//
|
// Creator: Dan Gisselquist, Ph.D.
|
// Creator: Dan Gisselquist, Ph.D.
|
// Gisselquist Tecnology, LLC
|
// Gisselquist Tecnology, LLC
|
//
|
//
|
///////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
//
|
//
|
// Copyright (C) 2015, Gisselquist Technology, LLC
|
// Copyright (C) 2015, Gisselquist Technology, LLC
|
//
|
//
|
// This program is free software (firmware): you can redistribute it and/or
|
// This program is free software (firmware): you can redistribute it and/or
|
// modify it under the terms of the GNU General Public License as published
|
// modify it under the terms of the GNU General Public License as published
|
Line 44... |
Line 44... |
//
|
//
|
// License: GPL, v3, as defined and found on www.gnu.org,
|
// License: GPL, v3, as defined and found on www.gnu.org,
|
// http://www.gnu.org/licenses/gpl.html
|
// http://www.gnu.org/licenses/gpl.html
|
//
|
//
|
//
|
//
|
///////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
//
|
//
|
//
|
//
|
#include <stdio.h>
|
#include <stdio.h>
|
#include <stdlib.h>
|
#include <stdlib.h>
|
#include <unistd.h>
|
#include <unistd.h>
|
Line 62... |
Line 62... |
#define DEF_NBITSIN 16
|
#define DEF_NBITSIN 16
|
#define DEF_COREDIR "fft-core"
|
#define DEF_COREDIR "fft-core"
|
#define DEF_XTRACBITS 4
|
#define DEF_XTRACBITS 4
|
#define DEF_NMPY 0
|
#define DEF_NMPY 0
|
#define DEF_XTRAPBITS 0
|
#define DEF_XTRAPBITS 0
|
|
#define USE_OLD_MULTIPLY false
|
|
|
|
// To coordinate testing, it helps to have some defines in our header file that
|
|
// are common with the default parameters found within the various subroutines.
|
|
// We'll define those common parameters here. These values, however, have no
|
|
// effect on anything other than bench testing. They do, though, allow us to
|
|
// bench test exact copies of what is going on within the FFT when necessary
|
|
// in order to find problems.
|
|
// First, parameters for the new multiply based upon the bi-multiply structure
|
|
// (2-bits/2-tableau rows at a time).
|
|
#define TST_LONGBIMPY_AW 16
|
|
#define TST_LONGBIMPY_BW 20 // Leave undefined to match AW
|
|
|
|
// We also include parameters for the shift add multiply
|
|
#define TST_SHIFTADDMPY_AW 16
|
|
#define TST_SHIFTADDMPY_BW 20 // Leave undefined to match AW
|
|
|
|
// Now for parameters matching the butterfly
|
|
#define TST_BUTTERFLY_IWIDTH 16
|
|
#define TST_BUTTERFLY_CWIDTH 20
|
|
#define TST_BUTTERFLY_OWIDTH 17
|
|
|
|
// Now for parameters matching the qtrstage
|
|
#define TST_QTRSTAGE_IWIDTH 16
|
|
#define TST_QTRSTAGE_LGWIDTH 8
|
|
|
|
// Parameters for the dblstage
|
|
#define TST_DBLSTAGE_IWIDTH 16
|
|
#define TST_DBLSTAGE_SHIFT 0
|
|
|
|
// Now for parameters matching the dblreverse stage
|
|
#define TST_DBLREVERSE_LGSIZE 5
|
|
|
typedef enum {
|
typedef enum {
|
RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
|
RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
|
} ROUND_T;
|
} ROUND_T;
|
|
|
const char cpyleft[] =
|
const char cpyleft[] =
|
"///////////////////////////////////////////////////////////////////////////\n"
|
"////////////////////////////////////////////////////////////////////////////////\n"
|
"//\n"
|
"//\n"
|
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
|
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
|
"//\n"
|
"//\n"
|
"// This program is free software (firmware): you can redistribute it and/or\n"
|
"// This program is free software (firmware): you can redistribute it and/or\n"
|
"// modify it under the terms of the GNU General Public License as published\n"
|
"// modify it under the terms of the GNU General Public License as published\n"
|
Line 91... |
Line 123... |
"//\n"
|
"//\n"
|
"// License: GPL, v3, as defined and found on www.gnu.org,\n"
|
"// License: GPL, v3, as defined and found on www.gnu.org,\n"
|
"// http://www.gnu.org/licenses/gpl.html\n"
|
"// http://www.gnu.org/licenses/gpl.html\n"
|
"//\n"
|
"//\n"
|
"//\n"
|
"//\n"
|
"///////////////////////////////////////////////////////////////////////////\n";
|
"////////////////////////////////////////////////////////////////////////////////\n";
|
const char prjname[] = "A Doubletime Pipelined FFT";
|
const char prjname[] = "A Doubletime Pipelined FFT";
|
const char creator[] = "// Creator: Dan Gisselquist, Ph.D.\n"
|
const char creator[] = "// Creator: Dan Gisselquist, Ph.D.\n"
|
"// Gisselquist Tecnology, LLC\n";
|
"// Gisselquist Tecnology, LLC\n";
|
|
|
int lgval(int vl) {
|
int lgval(int vl) {
|
Line 115... |
Line 147... |
}
|
}
|
|
|
int bflydelay(int nbits, int xtra) {
|
int bflydelay(int nbits, int xtra) {
|
int cbits = nbits + xtra;
|
int cbits = nbits + xtra;
|
int delay;
|
int delay;
|
|
|
|
if (USE_OLD_MULTIPLY) {
|
if (nbits+1<cbits)
|
if (nbits+1<cbits)
|
delay = nbits+4;
|
delay = nbits+4;
|
else
|
else
|
delay = cbits+3;
|
delay = cbits+3;
|
|
} else {
|
|
int na=nbits+2, nb=cbits+1;
|
|
if (nb<na) {
|
|
int tmp = nb;
|
|
nb = na; na = tmp;
|
|
} delay = ((na)/2+(na&1)+2);
|
|
}
|
return delay;
|
return delay;
|
}
|
}
|
|
|
int lgdelay(int nbits, int xtra) {
|
int lgdelay(int nbits, int xtra) {
|
// The butterfly code needs to compare a valid address, of this
|
// The butterfly code needs to compare a valid address, of this
|
Line 513... |
Line 554... |
(dbg)?"_dbg":"", prjname, creator);
|
(dbg)?"_dbg":"", prjname, creator);
|
fprintf(fp, "%s", cpyleft);
|
fprintf(fp, "%s", cpyleft);
|
|
|
fprintf(fp,
|
fprintf(fp,
|
"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
|
"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
|
"\tparameter IWIDTH=16, OWIDTH=IWIDTH+1;\n"
|
"\tparameter IWIDTH=%d, OWIDTH=IWIDTH+1;\n"
|
"\t// Parameters specific to the core that should be changed when this\n"
|
"\t// Parameters specific to the core that should be changed when this\n"
|
"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller \n"
|
"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller \n"
|
"\t// spans must use the fftdoubles stage.\n"
|
"\t// spans must use the fftdoubles stage.\n"
|
"\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0;\n"
|
"\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"
|
"\tinput\t i_clk, i_rst, i_ce, i_sync;\n"
|
"\tinput\t i_clk, i_rst, i_ce, i_sync;\n"
|
"\tinput\t [(2*IWIDTH-1):0] i_data;\n"
|
"\tinput\t [(2*IWIDTH-1):0] i_data;\n"
|
"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"
|
"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"
|
"\toutput\treg o_sync;\n"
|
"\toutput\treg o_sync;\n"
|
"\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");
|
"\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,
|
|
TST_QTRSTAGE_LGWIDTH);
|
if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
|
if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
|
"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
|
"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
|
"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
|
"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
|
"\n");
|
"\n");
|
}
|
}
|
Line 726... |
Line 768... |
"//\n", (dbg)?"_dbg":"", prjname, creator);
|
"//\n", (dbg)?"_dbg":"", prjname, creator);
|
|
|
fprintf(fp, "%s", cpyleft);
|
fprintf(fp, "%s", cpyleft);
|
fprintf(fp,
|
fprintf(fp,
|
"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"
|
"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"
|
"\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"
|
"\tparameter\tIWIDTH=%d,OWIDTH=IWIDTH+1, SHIFT=%d;\n"
|
"\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
|
"\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
|
"\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
|
"\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
|
"\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
|
"\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
|
"\toutput\treg\t\t\to_sync;\n"
|
"\toutput\treg\t\t\to_sync;\n"
|
"\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");
|
"\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"",
|
|
TST_DBLSTAGE_IWIDTH, TST_DBLSTAGE_SHIFT);
|
|
|
if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
|
if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
|
"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
|
"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
|
"\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"
|
"\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"
|
"\n");
|
"\n");
|
Line 871... |
Line 914... |
"//\n", prjname, creator);
|
"//\n", prjname, creator);
|
|
|
fprintf(fp, "%s", cpyleft);
|
fprintf(fp, "%s", cpyleft);
|
fprintf(fp,
|
fprintf(fp,
|
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
|
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
|
"\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"
|
"\tparameter\tAWIDTH=%d,BWIDTH=", TST_SHIFTADDMPY_AW);
|
|
#ifdef TST_SHIFTADDMPY_BW
|
|
fprintf(fp, "%d;\n", TST_SHIFTADDMPY_BW);
|
|
#else
|
|
fprintf(fp, "AWIDTH;\n");
|
|
#endif
|
|
fprintf(fp,
|
"\tinput\t\t\t\t\ti_clk, i_ce;\n"
|
"\tinput\t\t\t\t\ti_clk, i_ce;\n"
|
"\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
|
"\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
|
"\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
|
"\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
|
"\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
|
"\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
|
"\n"
|
"\n"
|
Line 932... |
Line 981... |
"endmodule\n");
|
"endmodule\n");
|
|
|
fclose(fp);
|
fclose(fp);
|
}
|
}
|
|
|
|
void build_bimpy(const char *fname) {
|
|
FILE *fp = fopen(fname, "w");
|
|
if (NULL == fp) {
|
|
fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
|
|
perror("O/S Err was:");
|
|
return;
|
|
}
|
|
|
|
fprintf(fp,
|
|
"////////////////////////////////////////////////////////////////////////////////\n"
|
|
"//\n"
|
|
"// Filename: %s\n"
|
|
"//\n"
|
|
"// Project: %s\n"
|
|
"//\n"
|
|
"// Purpose: A simple 2-bit multiply based upon the fact that LUT's allow\n"
|
|
"// 6-bits of input. In other words, I could build a 3-bit\n"
|
|
"// multiply from 6 LUTs (5 actually, since the first could have\n"
|
|
"// two outputs). This would allow multiplication of three bit\n"
|
|
"// digits, save only for the fact that you would need two bits\n"
|
|
"// of carry. The bimpy approach throttles back a bit and does\n"
|
|
"// a 2x2 bit multiply in a LUT, guaranteeing that it will never\n"
|
|
"// carry more than one bit. While this multiply is hardware\n"
|
|
"// independent (and can still run under Verilator therefore),\n"
|
|
"// it is really motivated by trying to optimize for a specific\n"
|
|
"// piece of hardware (Xilinx-7 series ...) that has at least\n"
|
|
"// 4-input LUT's with carry chains.\n"
|
|
"//\n"
|
|
"//\n"
|
|
"//\n%s"
|
|
"//\n", fname, prjname, creator);
|
|
|
|
fprintf(fp, "%s", cpyleft);
|
|
fprintf(fp,
|
|
"module bimpy(i_clk, i_ce, i_a, i_b, o_r);\n"
|
|
"\tparameter\tBW=18, // Number of bits in i_b\n"
|
|
"\t\t\tLUTB=2; // Number of bits in i_a for our LUT multiply\n"
|
|
"\tinput\t\t\t\ti_clk, i_ce;\n"
|
|
"\tinput\t\t[(LUTB-1):0]\ti_a;\n"
|
|
"\tinput\t\t[(BW-1):0]\ti_b;\n"
|
|
"\toutput\treg\t[(BW+LUTB-1):0] o_r;\n"
|
|
"\n"
|
|
"\twire [(BW+LUTB-2):0] w_r;\n"
|
|
"\twire [(BW+LUTB-3):1] c;\n"
|
|
"\n"
|
|
"\tassign\tw_r = { ((i_a[1])?i_b:{(BW){1'b0}}), 1'b0 }\n"
|
|
"\t\t\t\t^ { 1'b0, ((i_a[0])?i_b:{(BW){1'b0}}) };\n"
|
|
"\tassign\tc = { ((i_a[1])?i_b[(BW-2):0]:{(BW-1){1'b0}}) }\n"
|
|
"\t\t\t& ((i_a[0])?i_b[(BW-1):1]:{(BW-1){1'b0}});\n"
|
|
"\n"
|
|
"\talways @(posedge i_clk)\n"
|
|
"\t\tif (i_ce)\n"
|
|
"\t\t\to_r <= w_r + { c, 2'b0 };\n"
|
|
"\n"
|
|
"endmodule\n");
|
|
|
|
fclose(fp);
|
|
}
|
|
|
|
void build_longbimpy(const char *fname) {
|
|
FILE *fp = fopen(fname, "w");
|
|
if (NULL == fp) {
|
|
fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
|
|
perror("O/S Err was:");
|
|
return;
|
|
}
|
|
|
|
fprintf(fp,
|
|
"////////////////////////////////////////////////////////////////////////////////\n"
|
|
"//\n"
|
|
"// Filename: %s\n"
|
|
"//\n"
|
|
"// Project: %s\n"
|
|
"//\n"
|
|
"// Purpose: A portable shift and add multiply, built with the knowledge\n"
|
|
"// of the existence of a six bit LUT and carry chain. That\n"
|
|
"// knowledge allows us to multiply two bits from one value\n"
|
|
"// at a time against all of the bits of the other value. This\n"
|
|
"// sub multiply is called the bimpy.\n"
|
|
"//\n"
|
|
"// For minimal processing delay, make the first parameter\n"
|
|
"// the one with the least bits, so that AWIDTH <= BWIDTH.\n"
|
|
"//\n"
|
|
"//\n"
|
|
"//\n%s"
|
|
"//\n", fname, prjname, creator);
|
|
|
|
fprintf(fp, "%s", cpyleft);
|
|
fprintf(fp,
|
|
"module longbimpy(i_clk, i_ce, i_a, i_b, o_r);\n"
|
|
"\tparameter AW=%d, // The width of i_a, min width is 5\n"
|
|
"\t\t\tBW=", TST_LONGBIMPY_AW);
|
|
#ifdef TST_LONGBIMPY_BW
|
|
fprintf(fp, "%d", TST_LONGBIMPY_BW);
|
|
#else
|
|
fprintf(fp, "AW");
|
|
#endif
|
|
|
|
fprintf(fp, ", // The width of i_b, can be anything\n"
|
|
"\t\t\t// The following three parameters should not be changed\n"
|
|
"\t\t\t// by any implementation, but are based upon hardware\n"
|
|
"\t\t\t// and the above values:\n"
|
|
"\t\t\tOW=AW+BW, // The output width\n"
|
|
"\t\t\tIW=(AW+1)&(-2), // Internal width of A\n"
|
|
"\t\t\tLUTB=2, // How many bits we can multiply by at once\n"
|
|
"\t\t\tTLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau\n"
|
|
"\tinput\t\t\t\ti_clk, i_ce;\n"
|
|
"\tinput\t\t[(AW-1):0]\ti_a;\n"
|
|
"\tinput\t\t[(BW-1):0]\ti_b;\n"
|
|
"\toutput\treg\t[(AW+BW-1):0]\to_r;\n"
|
|
"\n"
|
|
"\treg\t[(IW-1):0]\tu_a;\n"
|
|
"\treg\t[(BW-1):0]\tu_b;\n"
|
|
"\treg\t\t\tsgn;\n"
|
|
"\n"
|
|
"\treg\t[(IW-1-2*(LUTB)):0]\tr_a[0:(TLEN-3)];\n"
|
|
"\treg\t[(BW-1):0]\t\tr_b[0:(TLEN-3)];\n"
|
|
"\treg\t[(TLEN-1):0]\t\tr_s;\n"
|
|
"\treg\t[(IW+BW-1):0]\t\tacc[0:(TLEN-2)];\n"
|
|
"\tgenvar k;\n"
|
|
"\n"
|
|
"\t// First step:\n"
|
|
"\t// Switch to unsigned arithmetic for our multiply, keeping track\n"
|
|
"\t// of the along the way. We'll then add the sign again later at\n"
|
|
"\t// the end.\n"
|
|
"\t//\n"
|
|
"\t// If we were forced to stay within two's complement arithmetic,\n"
|
|
"\t// taking the absolute value here would require an additional bit.\n"
|
|
"\t// However, because our results are now unsigned, we can stay\n"
|
|
"\t// within the number of bits given (for now).\n"
|
|
"\tgenerate if (IW > AW)\n"
|
|
"\tbegin\n"
|
|
"\t\talways @(posedge i_clk)\n"
|
|
"\t\t\tif (i_ce)\n"
|
|
"\t\t\t\tu_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };\n"
|
|
"\tend else begin\n"
|
|
"\t\talways @(posedge i_clk)\n"
|
|
"\t\t\tif (i_ce)\n"
|
|
"\t\t\t\tu_a <= (i_a[AW-1])?(-i_a):(i_a);\n"
|
|
"\tend endgenerate\n"
|
|
"\n"
|
|
"\talways @(posedge i_clk)\n"
|
|
"\t\tif (i_ce)\n"
|
|
"\t\tbegin\n"
|
|
"\t\t\tu_b <= (i_b[BW-1])?(-i_b):(i_b);\n"
|
|
"\t\t\tsgn <= i_a[AW-1] ^ i_b[BW-1];\n"
|
|
"\t\tend\n"
|
|
"\n"
|
|
"\twire [(BW+LUTB-1):0] pr_a, pr_b;\n"
|
|
"\n"
|
|
"\t//\n"
|
|
"\t// Second step: First two 2xN products.\n"
|
|
"\t//\n"
|
|
"\t// Since we have no tableau of additions (yet), we can do both\n"
|
|
"\t// of the first two rows at the same time and add them together.\n"
|
|
"\t// For the next round, we'll then have a previous sum to accumulate\n"
|
|
"\t// with new and subsequent product, and so only do one product at\n"
|
|
"\t// a time can follow this--but the first clock can do two at a time.\n"
|
|
"\tbimpy\t#(BW) lmpy_0(i_clk,i_ce,u_a[( LUTB-1): 0], u_b, pr_a);\n"
|
|
"\tbimpy\t#(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);\n"
|
|
"\talways @(posedge i_clk)\n"
|
|
"\t\tif (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];\n"
|
|
"\talways @(posedge i_clk)\n"
|
|
"\t\tif (i_ce) r_b[0] <= u_b;\n"
|
|
"\talways @(posedge i_clk)\n"
|
|
"\t\tif (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };\n"
|
|
"\talways @(posedge i_clk) // One clk after p[0],p[1] become valid\n"
|
|
"\t\tif (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}\n"
|
|
"\t\t\t +{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };\n"
|
|
"\n"
|
|
"\tgenerate // Keep track of intermediate values, before multiplying them\n"
|
|
"\tif (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)\n"
|
|
"\tbegin : gencopies\n"
|
|
"\t\talways @(posedge i_clk)\n"
|
|
"\t\tif (i_ce)\n"
|
|
"\t\tbegin\n"
|
|
"\t\t\tr_a[k+1] <= { {(LUTB){1'b0}},\n"
|
|
"\t\t\t\tr_a[k][(IW-1-(2*LUTB)):LUTB] };\n"
|
|
"\t\t\tr_b[k+1] <= r_b[k];\n"
|
|
"\t\tend\n"
|
|
"\tend endgenerate\n"
|
|
"\n"
|
|
"\tgenerate // The actual multiply and accumulate stage\n"
|
|
"\tif (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)\n"
|
|
"\tbegin : genstages\n"
|
|
"\t\t// First, the multiply: 2-bits times BW bits\n"
|
|
"\t\twire\t[(BW+LUTB-1):0] genp;\n"
|
|
"\t\tbimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);\n"
|
|
"\n"
|
|
"\t\t// Then the accumulate step -- on the next clock\n"
|
|
"\t\talways @(posedge i_clk)\n"
|
|
"\t\t\tif (i_ce)\n"
|
|
"\t\t\t\tacc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},\n"
|
|
"\t\t\t\t\tgenp, {{(LUTB*(k+2))}{1'b0}} };\n"
|
|
"\tend endgenerate\n"
|
|
"\n"
|
|
"\twire [(IW+BW-1):0] w_r;\n"
|
|
"\tassign\tw_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];\n"
|
|
"\talways @(posedge i_clk)\n"
|
|
"\t\tif (i_ce)\n"
|
|
"\t\t\to_r <= w_r[(AW+BW-1):0];\n"
|
|
"\n"
|
|
"endmodule\n");
|
|
|
|
fclose(fp);
|
|
}
|
|
|
void build_dblreverse(const char *fname) {
|
void build_dblreverse(const char *fname) {
|
FILE *fp = fopen(fname, "w");
|
FILE *fp = fopen(fname, "w");
|
if (NULL == fp) {
|
if (NULL == fp) {
|
fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
|
fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
|
perror("O/S Err was:");
|
perror("O/S Err was:");
|
Line 1012... |
Line 1268... |
"//\n"
|
"//\n"
|
"//\n");
|
"//\n");
|
fprintf(fp,
|
fprintf(fp,
|
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
|
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
|
"\t\to_out_0, o_out_1, o_sync);\n"
|
"\t\to_out_0, o_out_1, o_sync);\n"
|
"\tparameter\t\t\tLGSIZE=5, WIDTH=24;\n"
|
"\tparameter\t\t\tLGSIZE=%d, WIDTH=24;\n"
|
"\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
|
"\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
|
"\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
|
"\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
|
"\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
|
"\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
|
"\toutput\treg\t\t\to_sync;\n"
|
"\toutput\treg\t\t\to_sync;\n", TST_DBLREVERSE_LGSIZE);
|
|
|
|
fprintf(fp,
|
"\n"
|
"\n"
|
"\treg\t\t\tin_reset;\n"
|
"\treg\t\t\tin_reset;\n"
|
"\treg\t[(LGSIZE-1):0]\tiaddr;\n"
|
"\treg\t[(LGSIZE-1):0]\tiaddr;\n"
|
"\twire\t[(LGSIZE-3):0]\tbraddr;\n"
|
"\twire\t[(LGSIZE-3):0]\tbraddr;\n"
|
"\n"
|
"\n"
|
Line 1181... |
Line 1439... |
|
|
fprintf(fp,
|
fprintf(fp,
|
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
|
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
|
"\t\to_left, o_right, o_aux);\n"
|
"\t\to_left, o_right, o_aux);\n"
|
"\t// Public changeable parameters ...\n"
|
"\t// Public changeable parameters ...\n"
|
"\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
|
"\tparameter IWIDTH=%d,", TST_BUTTERFLY_IWIDTH);
|
|
#ifdef TST_BUTTERFLY_CWIDTH
|
|
fprintf(fp, "CWIDTH=%d,", TST_BUTTERFLY_CWIDTH);
|
|
#else
|
|
fprintf(fp, "CWIDTH=IWIDTH+%d,", xtracbits);
|
|
#endif
|
|
#ifdef TST_BUTTERFLY_OWIDTH
|
|
fprintf(fp, "OWIDTH=%d;\n", TST_BUTTERFLY_OWIDTH);
|
|
#else
|
|
fprintf(fp, "OWIDTH=IWIDTH+1;\n");
|
|
#endif
|
|
fprintf(fp,
|
"\t// Parameters specific to the core that should not be changed.\n"
|
"\t// Parameters specific to the core that should not be changed.\n"
|
"\tparameter MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
|
"\tparameter MPYDELAY=%d'd%d,\n"
|
"\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"
|
"\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"
|
"\t// The LGDELAY should be the base two log of the MPYDELAY. If\n"
|
"\t// The LGDELAY should be the base two log of the MPYDELAY. If\n"
|
"\t// this value is fractional, then round up to the nearest\n"
|
"\t// this value is fractional, then round up to the nearest\n"
|
"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
|
"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
|
"\tparameter\tLGDELAY=%d;\n"
|
"\tparameter\tLGDELAY=%d;\n"
|
Line 1195... |
Line 1464... |
"\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
|
"\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
|
"\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
|
"\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
|
"\tinput\t\ti_aux;\n"
|
"\tinput\t\ti_aux;\n"
|
"\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
|
"\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
|
"\toutput\treg\to_aux;\n"
|
"\toutput\treg\to_aux;\n"
|
"\n", 16, xtracbits, lgdelay(16,xtracbits),
|
"\n", lgdelay(16,xtracbits), bflydelay(16, xtracbits),
|
bflydelay(16, xtracbits), lgdelay(16,xtracbits));
|
lgdelay(16,xtracbits));
|
fprintf(fp,
|
fprintf(fp,
|
"\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
|
"\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
|
"\n"
|
"\n"
|
"\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
|
"\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
|
"\treg\t\t\t\tr_aux, r_aux_2;\n"
|
"\treg\t\t\t\tr_aux, r_aux_2;\n"
|
Line 1292... |
Line 1561... |
"\n");
|
"\n");
|
fprintf(fp,
|
fprintf(fp,
|
"\t// This should really be based upon an IF, such as in\n"
|
"\t// This should really be based upon an IF, such as in\n"
|
"\t// if (IWIDTH < CWIDTH) then ...\n"
|
"\t// if (IWIDTH < CWIDTH) then ...\n"
|
"\t// However, this is the only (other) way I know to do it.\n"
|
"\t// However, this is the only (other) way I know to do it.\n"
|
"\tgenerate\n"
|
"\tgenerate if (CWIDTH < IWIDTH+1)\n"
|
"\tif (CWIDTH < IWIDTH+1)\n"
|
|
"\tbegin\n"
|
"\tbegin\n"
|
"\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
|
"\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
|
"\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
|
"\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
|
"\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
|
"\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
|
"\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
|
"\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
|
"\n"
|
"\n"
|
"\t\t// We need to pad these first two multiplies by an extra\n"
|
"\t\t// We need to pad these first two multiplies by an extra\n"
|
"\t\t// bit just to keep them aligned with the third,\n"
|
"\t\t// bit just to keep them aligned with the third,\n"
|
"\t\t// simpler, multiply.\n"
|
"\t\t// simpler, multiply.\n"
|
"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
|
"\t\t%s #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
|
"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
|
"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
|
"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
|
"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
|
"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
|
"\t\t%s #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
|
"\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
|
"\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
|
"\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
|
"\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
|
"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
|
"\t\t%s #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
|
"\t\t\t\tp3c_in, p3d_in, p_three);\n"
|
"\t\t\t\tp3c_in, p3d_in, p_three);\n"
|
"\tend else begin\n"
|
"\tend else begin\n"
|
"\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
|
"\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
|
"\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
|
"\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
|
"\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
|
"\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
|
"\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
|
"\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
|
"\n"
|
"\n"
|
"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
|
"\t\t%s #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
|
"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
|
"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
|
"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
|
"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
|
"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
|
"\t\t%s #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
|
"\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
|
"\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
|
"\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
|
"\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
|
"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
|
"\t\t%s #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
|
"\t\t\t\tp3d_in, p3c_in, p_three);\n"
|
"\t\t\t\tp3d_in, p3c_in, p_three);\n"
|
"\tend\n"
|
"\tend\n"
|
"\tendgenerate\n"
|
"\tendgenerate\n"
|
"\n");
|
"\n",
|
|
(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
|
|
(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
|
|
(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
|
|
(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
|
|
(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
|
|
(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy");
|
fprintf(fp,
|
fprintf(fp,
|
"\t// These values are held in memory and delayed during the\n"
|
"\t// These values are held in memory and delayed during the\n"
|
"\t// multiply. Here, we recover them. During the multiply,\n"
|
"\t// multiply. Here, we recover them. During the multiply,\n"
|
"\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
|
"\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
|
"\t// therefore, the left_x values need to be right shifted by\n"
|
"\t// therefore, the left_x values need to be right shifted by\n"
|
Line 2268... |
Line 2542... |
(inverse)?"I":"");
|
(inverse)?"I":"");
|
if (real_fft)
|
if (real_fft)
|
fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");
|
fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");
|
if (!single_clock)
|
if (!single_clock)
|
fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");
|
fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");
|
|
if (USE_OLD_MULTIPLY)
|
|
fprintf(hdr, "#define\tUSE_OLD_MULTIPLY\n\n");
|
|
|
|
fprintf(hdr, "// Parameters for testing the longbimpy\n");
|
|
fprintf(hdr, "#define\tTST_LONGBIMPY_AW\t%d\n", TST_LONGBIMPY_AW);
|
|
#ifdef TST_LONGBIMPY_BW
|
|
fprintf(hdr, "#define\tTST_LONGBIMPY_BW\t%d\n\n", TST_LONGBIMPY_BW);
|
|
#else
|
|
fprintf(hdr, "#define\tTST_LONGBIMPY_BW\tTST_LONGBIMPY_AW\n\n");
|
|
#endif
|
|
|
|
fprintf(hdr, "// Parameters for testing the shift add multiply\n");
|
|
fprintf(hdr, "#define\tTST_SHIFTADDMPY_AW\t%d\n", TST_SHIFTADDMPY_AW);
|
|
#ifdef TST_SHIFTADDMPY_BW
|
|
fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\t%d\n\n", TST_SHIFTADDMPY_BW);
|
|
#else
|
|
fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\tTST_SHIFTADDMPY_AW\n\n");
|
|
#endif
|
|
|
|
#define TST_SHIFTADDMPY_AW 16
|
|
#define TST_SHIFTADDMPY_BW 20 // Leave undefined to match AW
|
|
fprintf(hdr, "// Parameters for testing the butterfly\n");
|
|
fprintf(hdr, "#define\tTST_BUTTERFLY_IWIDTH\t%d\n", TST_BUTTERFLY_IWIDTH);
|
|
fprintf(hdr, "#define\tTST_BUTTERFLY_CWIDTH\t%d\n", TST_BUTTERFLY_CWIDTH);
|
|
fprintf(hdr, "#define\tTST_BUTTERFLY_OWIDTH\t%d\n", TST_BUTTERFLY_OWIDTH);
|
|
fprintf(hdr, "#define\tTST_BUTTERFLY_MPYDELAY\t%d\n\n",
|
|
bflydelay(TST_BUTTERFLY_IWIDTH,
|
|
TST_BUTTERFLY_CWIDTH-TST_BUTTERFLY_IWIDTH));
|
|
|
|
fprintf(hdr, "// Parameters for testing the quarter stage\n");
|
|
fprintf(hdr, "#define\tTST_QTRSTAGE_IWIDTH\t%d\n", TST_QTRSTAGE_IWIDTH);
|
|
fprintf(hdr, "#define\tTST_QTRSTAGE_LGWIDTH\t%d\n\n", TST_QTRSTAGE_LGWIDTH);
|
|
|
|
fprintf(hdr, "// Parameters for testing the double stage\n");
|
|
fprintf(hdr, "#define\tTST_DBLSTAGE_IWIDTH\t%d\n", TST_DBLSTAGE_IWIDTH);
|
|
fprintf(hdr, "#define\tTST_DBLSTAGE_SHIFT\t%d\n\n", TST_DBLSTAGE_SHIFT);
|
|
|
|
fprintf(hdr, "// Parameters for testing the bit reversal stage\n");
|
|
fprintf(hdr, "#define\tTST_DBLREVERSE_LGSIZE\t%d\n\n", TST_DBLREVERSE_LGSIZE);
|
fprintf(hdr, "\n" "#endif\n\n");
|
fprintf(hdr, "\n" "#endif\n\n");
|
fclose(hdr);
|
fclose(hdr);
|
}
|
}
|
|
|
{
|
{
|
Line 2614... |
Line 2927... |
if (nummpy > 0) {
|
if (nummpy > 0) {
|
fname = coredir + "/hwbfly.v";
|
fname = coredir + "/hwbfly.v";
|
build_hwbfly(fname.c_str(), xtracbits, rounding);
|
build_hwbfly(fname.c_str(), xtracbits, rounding);
|
}
|
}
|
|
|
|
{
|
|
// To make debugging easier, we build both of these
|
fname = coredir + "/shiftaddmpy.v";
|
fname = coredir + "/shiftaddmpy.v";
|
build_multiply(fname.c_str());
|
build_multiply(fname.c_str());
|
|
|
|
fname = coredir + "/longbimpy.v";
|
|
build_longbimpy(fname.c_str());
|
|
fname = coredir + "/bimpy.v";
|
|
build_bimpy(fname.c_str());
|
|
}
|
|
|
if ((dbg)&&(dbgstage == 4)) {
|
if ((dbg)&&(dbgstage == 4)) {
|
fname = coredir + "/qtrstage_dbg.v";
|
fname = coredir + "/qtrstage_dbg.v";
|
build_quarters(fname.c_str(), rounding, true);
|
build_quarters(fname.c_str(), rounding, true);
|
}
|
}
|
fname = coredir + "/qtrstage.v";
|
fname = coredir + "/qtrstage.v";
|