OpenCores

Rev 28	Rev 29
Line 1...	Line 1...
`/////////////////////////////////////////////////////////////////////////////`	`////////////////////////////////////////////////////////////////////////////////`
`//`	`//`
`// Filename: fftgen.cpp`	`// Filename: fftgen.cpp`
`//`	`//`
`// Project: A Doubletime Pipelined FFT`	`// Project: A Doubletime Pipelined FFT`
`//`	`//`
Line 21...	Line 21...
`// make in the documents directory to build it.`	`// make in the documents directory to build it.`
`//`	`//`
`// Creator: Dan Gisselquist, Ph.D.`	`// Creator: Dan Gisselquist, Ph.D.`
`// Gisselquist Tecnology, LLC`	`// Gisselquist Tecnology, LLC`
`//`	`//`
`///////////////////////////////////////////////////////////////////////////`	`////////////////////////////////////////////////////////////////////////////////`
`//`	`//`
`// Copyright (C) 2015, Gisselquist Technology, LLC`	`// Copyright (C) 2015, Gisselquist Technology, LLC`
`//`	`//`
`// This program is free software (firmware): you can redistribute it and/or`	`// This program is free software (firmware): you can redistribute it and/or`
`// modify it under the terms of the GNU General Public License as published`	`// modify it under the terms of the GNU General Public License as published`
Line 44...	Line 44...
`//`	`//`
`// License: GPL, v3, as defined and found on www.gnu.org,`	`// License: GPL, v3, as defined and found on www.gnu.org,`
`// http://www.gnu.org/licenses/gpl.html`	`// http://www.gnu.org/licenses/gpl.html`
`//`	`//`
`//`	`//`
`///////////////////////////////////////////////////////////////////////////`	`////////////////////////////////////////////////////////////////////////////////`
`//`	`//`
`//`	`//`
`#include <stdio.h>`	`#include <stdio.h>`
`#include <stdlib.h>`	`#include <stdlib.h>`
`#include <unistd.h>`	`#include <unistd.h>`
Line 62...	Line 62...
`#define DEF_NBITSIN 16`	`#define DEF_NBITSIN 16`
`#define DEF_COREDIR "fft-core"`	`#define DEF_COREDIR "fft-core"`
`#define DEF_XTRACBITS 4`	`#define DEF_XTRACBITS 4`
`#define DEF_NMPY 0`	`#define DEF_NMPY 0`
`#define DEF_XTRAPBITS 0`	`#define DEF_XTRAPBITS 0`
	`#define USE_OLD_MULTIPLY false`

	`// To coordinate testing, it helps to have some defines in our header file that`
	`// are common with the default parameters found within the various subroutines.`
	`// We'll define those common parameters here. These values, however, have no`
	`// effect on anything other than bench testing. They do, though, allow us to`
	`// bench test exact copies of what is going on within the FFT when necessary`
	`// in order to find problems.`
	`// First, parameters for the new multiply based upon the bi-multiply structure`
	`// (2-bits/2-tableau rows at a time).`
	`#define TST_LONGBIMPY_AW 16`
	`#define TST_LONGBIMPY_BW 20 // Leave undefined to match AW`

	`// We also include parameters for the shift add multiply`
	`#define TST_SHIFTADDMPY_AW 16`
	`#define TST_SHIFTADDMPY_BW 20 // Leave undefined to match AW`

	`// Now for parameters matching the butterfly`
	`#define TST_BUTTERFLY_IWIDTH 16`
	`#define TST_BUTTERFLY_CWIDTH 20`
	`#define TST_BUTTERFLY_OWIDTH 17`

	`// Now for parameters matching the qtrstage`
	`#define TST_QTRSTAGE_IWIDTH 16`
	`#define TST_QTRSTAGE_LGWIDTH 8`

	`// Parameters for the dblstage`
	`#define TST_DBLSTAGE_IWIDTH 16`
	`#define TST_DBLSTAGE_SHIFT 0`

	`// Now for parameters matching the dblreverse stage`
	`#define TST_DBLREVERSE_LGSIZE 5`

`typedef enum {`	`typedef enum {`
`RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT`	`RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT`
`} ROUND_T;`	`} ROUND_T;`

`const char cpyleft[] =`	`const char cpyleft[] =`
`"///////////////////////////////////////////////////////////////////////////\n"`	`"////////////////////////////////////////////////////////////////////////////////\n"`
`"//\n"`	`"//\n"`
`"// Copyright (C) 2015, Gisselquist Technology, LLC\n"`	`"// Copyright (C) 2015, Gisselquist Technology, LLC\n"`
`"//\n"`	`"//\n"`
`"// This program is free software (firmware): you can redistribute it and/or\n"`	`"// This program is free software (firmware): you can redistribute it and/or\n"`
`"// modify it under the terms of the GNU General Public License as published\n"`	`"// modify it under the terms of the GNU General Public License as published\n"`
Line 91...	Line 123...
`"//\n"`	`"//\n"`
`"// License: GPL, v3, as defined and found on www.gnu.org,\n"`	`"// License: GPL, v3, as defined and found on www.gnu.org,\n"`
`"// http://www.gnu.org/licenses/gpl.html\n"`	`"// http://www.gnu.org/licenses/gpl.html\n"`
`"//\n"`	`"//\n"`
`"//\n"`	`"//\n"`
`"///////////////////////////////////////////////////////////////////////////\n";`	`"////////////////////////////////////////////////////////////////////////////////\n";`
`const char prjname[] = "A Doubletime Pipelined FFT";`	`const char prjname[] = "A Doubletime Pipelined FFT";`
`const char creator[] = "// Creator: Dan Gisselquist, Ph.D.\n"`	`const char creator[] = "// Creator: Dan Gisselquist, Ph.D.\n"`
`"// Gisselquist Tecnology, LLC\n";`	`"// Gisselquist Tecnology, LLC\n";`

`int lgval(int vl) {`	`int lgval(int vl) {`
Line 115...	Line 147...
`}`	`}`

`int bflydelay(int nbits, int xtra) {`	`int bflydelay(int nbits, int xtra) {`
`int cbits = nbits + xtra;`	`int cbits = nbits + xtra;`
`int delay;`	`int delay;`

	`if (USE_OLD_MULTIPLY) {`
`if (nbits+1<cbits)`	`if (nbits+1<cbits)`
`delay = nbits+4;`	`delay = nbits+4;`
`else`	`else`
`delay = cbits+3;`	`delay = cbits+3;`
	`} else {`
	`int na=nbits+2, nb=cbits+1;`
	`if (nb<na) {`
	`int tmp = nb;`
	`nb = na; na = tmp;`
	`} delay = ((na)/2+(na&1)+2);`
	`}`
`return delay;`	`return delay;`
`}`	`}`

`int lgdelay(int nbits, int xtra) {`	`int lgdelay(int nbits, int xtra) {`
`// The butterfly code needs to compare a valid address, of this`	`// The butterfly code needs to compare a valid address, of this`
Line 513...	Line 554...
`(dbg)?"_dbg":"", prjname, creator);`	`(dbg)?"_dbg":"", prjname, creator);`
`fprintf(fp, "%s", cpyleft);`	`fprintf(fp, "%s", cpyleft);`

`fprintf(fp,`	`fprintf(fp,`
`"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"`	`"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"`
`"\tparameter IWIDTH=16, OWIDTH=IWIDTH+1;\n"`	`"\tparameter IWIDTH=%d, OWIDTH=IWIDTH+1;\n"`
`"\t// Parameters specific to the core that should be changed when this\n"`	`"\t// Parameters specific to the core that should be changed when this\n"`
`"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller \n"`	`"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller \n"`
`"\t// spans must use the fftdoubles stage.\n"`	`"\t// spans must use the fftdoubles stage.\n"`
`"\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0;\n"`	`"\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"`
`"\tinput\t i_clk, i_rst, i_ce, i_sync;\n"`	`"\tinput\t i_clk, i_rst, i_ce, i_sync;\n"`
`"\tinput\t [(2*IWIDTH-1):0] i_data;\n"`	`"\tinput\t [(2*IWIDTH-1):0] i_data;\n"`
`"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"`	`"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"`
`"\toutput\treg o_sync;\n"`	`"\toutput\treg o_sync;\n"`
`"\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");`	`"\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,`
	`TST_QTRSTAGE_LGWIDTH);`
`if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"`	`if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"`
`"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2OWIDTH-1):(2OWIDTH-16)],\n"`	`"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2OWIDTH-1):(2OWIDTH-16)],\n"`
`"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"`	`"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"`
`"\n");`	`"\n");`
`}`	`}`
Line 726...	Line 768...
`"//\n", (dbg)?"_dbg":"", prjname, creator);`	`"//\n", (dbg)?"_dbg":"", prjname, creator);`

`fprintf(fp, "%s", cpyleft);`	`fprintf(fp, "%s", cpyleft);`
`fprintf(fp,`	`fprintf(fp,`
`"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"`	`"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"`
`"\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"`	`"\tparameter\tIWIDTH=%d,OWIDTH=IWIDTH+1, SHIFT=%d;\n"`
`"\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"`	`"\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"`
`"\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"`	`"\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"`
`"\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"`	`"\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"`
`"\toutput\treg\t\t\to_sync;\n"`	`"\toutput\treg\t\t\to_sync;\n"`
`"\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");`	`"\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"",`
	`TST_DBLSTAGE_IWIDTH, TST_DBLSTAGE_SHIFT);`

`if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"`	`if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"`
`"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2OWIDTH-1):(2OWIDTH-16)],\n"`	`"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2OWIDTH-1):(2OWIDTH-16)],\n"`
`"\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"`	`"\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"`
`"\n");`	`"\n");`
Line 871...	Line 914...
`"//\n", prjname, creator);`	`"//\n", prjname, creator);`

`fprintf(fp, "%s", cpyleft);`	`fprintf(fp, "%s", cpyleft);`
`fprintf(fp,`	`fprintf(fp,`
`"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"`	`"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"`
`"\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"`	`"\tparameter\tAWIDTH=%d,BWIDTH=", TST_SHIFTADDMPY_AW);`
	`#ifdef TST_SHIFTADDMPY_BW`
	`fprintf(fp, "%d;\n", TST_SHIFTADDMPY_BW);`
	`#else`
	`fprintf(fp, "AWIDTH;\n");`
	`#endif`
	`fprintf(fp,`
`"\tinput\t\t\t\t\ti_clk, i_ce;\n"`	`"\tinput\t\t\t\t\ti_clk, i_ce;\n"`
`"\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"`	`"\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"`
`"\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"`	`"\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"`
`"\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"`	`"\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"`
`"\n"`	`"\n"`
Line 932...	Line 981...
`"endmodule\n");`	`"endmodule\n");`

`fclose(fp);`	`fclose(fp);`
`}`	`}`

	`void build_bimpy(const char *fname) {`
	`FILE *fp = fopen(fname, "w");`
	`if (NULL == fp) {`
	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
	`perror("O/S Err was:");`
	`return;`
	`}`

	`fprintf(fp,`
	`"////////////////////////////////////////////////////////////////////////////////\n"`
	`"//\n"`
	`"// Filename: %s\n"`
	`"//\n"`
	`"// Project: %s\n"`
	`"//\n"`
	`"// Purpose: A simple 2-bit multiply based upon the fact that LUT's allow\n"`
	`"// 6-bits of input. In other words, I could build a 3-bit\n"`
	`"// multiply from 6 LUTs (5 actually, since the first could have\n"`
	`"// two outputs). This would allow multiplication of three bit\n"`
	`"// digits, save only for the fact that you would need two bits\n"`
	`"// of carry. The bimpy approach throttles back a bit and does\n"`
	`"// a 2x2 bit multiply in a LUT, guaranteeing that it will never\n"`
	`"// carry more than one bit. While this multiply is hardware\n"`
	`"// independent (and can still run under Verilator therefore),\n"`
	`"// it is really motivated by trying to optimize for a specific\n"`
	`"// piece of hardware (Xilinx-7 series ...) that has at least\n"`
	`"// 4-input LUT's with carry chains.\n"`
	`"//\n"`
	`"//\n"`
	`"//\n%s"`
	`"//\n", fname, prjname, creator);`

	`fprintf(fp, "%s", cpyleft);`
	`fprintf(fp,`
	`"module bimpy(i_clk, i_ce, i_a, i_b, o_r);\n"`
	`"\tparameter\tBW=18, // Number of bits in i_b\n"`
	`"\t\t\tLUTB=2; // Number of bits in i_a for our LUT multiply\n"`
	`"\tinput\t\t\t\ti_clk, i_ce;\n"`
	`"\tinput\t\t[(LUTB-1):0]\ti_a;\n"`
	`"\tinput\t\t[(BW-1):0]\ti_b;\n"`
	`"\toutput\treg\t[(BW+LUTB-1):0] o_r;\n"`
	`"\n"`
	`"\twire [(BW+LUTB-2):0] w_r;\n"`
	`"\twire [(BW+LUTB-3):1] c;\n"`
	`"\n"`
	`"\tassign\tw_r = { ((i_a[1])?i_b:{(BW){1'b0}}), 1'b0 }\n"`
	`"\t\t\t\t^ { 1'b0, ((i_a[0])?i_b:{(BW){1'b0}}) };\n"`
	`"\tassign\tc = { ((i_a[1])?i_b[(BW-2):0]:{(BW-1){1'b0}}) }\n"`
	`"\t\t\t& ((i_a[0])?i_b[(BW-1):1]:{(BW-1){1'b0}});\n"`
	`"\n"`
	`"\talways @(posedge i_clk)\n"`
	`"\t\tif (i_ce)\n"`
	`"\t\t\to_r <= w_r + { c, 2'b0 };\n"`
	`"\n"`
	`"endmodule\n");`

	`fclose(fp);`
	`}`

	`void build_longbimpy(const char *fname) {`
	`FILE *fp = fopen(fname, "w");`
	`if (NULL == fp) {`
	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
	`perror("O/S Err was:");`
	`return;`
	`}`

	`fprintf(fp,`
	`"////////////////////////////////////////////////////////////////////////////////\n"`
	`"//\n"`
	`"// Filename: %s\n"`
	`"//\n"`
	`"// Project: %s\n"`
	`"//\n"`
	`"// Purpose: A portable shift and add multiply, built with the knowledge\n"`
	`"// of the existence of a six bit LUT and carry chain. That\n"`
	`"// knowledge allows us to multiply two bits from one value\n"`
	`"// at a time against all of the bits of the other value. This\n"`
	`"// sub multiply is called the bimpy.\n"`
	`"//\n"`
	`"// For minimal processing delay, make the first parameter\n"`
	`"// the one with the least bits, so that AWIDTH <= BWIDTH.\n"`
	`"//\n"`
	`"//\n"`
	`"//\n%s"`
	`"//\n", fname, prjname, creator);`

	`fprintf(fp, "%s", cpyleft);`
	`fprintf(fp,`
	`"module longbimpy(i_clk, i_ce, i_a, i_b, o_r);\n"`
	`"\tparameter AW=%d, // The width of i_a, min width is 5\n"`
	`"\t\t\tBW=", TST_LONGBIMPY_AW);`
	`#ifdef TST_LONGBIMPY_BW`
	`fprintf(fp, "%d", TST_LONGBIMPY_BW);`
	`#else`
	`fprintf(fp, "AW");`
	`#endif`

	`fprintf(fp, ", // The width of i_b, can be anything\n"`
	`"\t\t\t// The following three parameters should not be changed\n"`
	`"\t\t\t// by any implementation, but are based upon hardware\n"`
	`"\t\t\t// and the above values:\n"`
	`"\t\t\tOW=AW+BW, // The output width\n"`
	`"\t\t\tIW=(AW+1)&(-2), // Internal width of A\n"`
	`"\t\t\tLUTB=2, // How many bits we can multiply by at once\n"`
	`"\t\t\tTLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau\n"`
	`"\tinput\t\t\t\ti_clk, i_ce;\n"`
	`"\tinput\t\t[(AW-1):0]\ti_a;\n"`
	`"\tinput\t\t[(BW-1):0]\ti_b;\n"`
	`"\toutput\treg\t[(AW+BW-1):0]\to_r;\n"`
	`"\n"`
	`"\treg\t[(IW-1):0]\tu_a;\n"`
	`"\treg\t[(BW-1):0]\tu_b;\n"`
	`"\treg\t\t\tsgn;\n"`
	`"\n"`
	`"\treg\t[(IW-1-2*(LUTB)):0]\tr_a[0:(TLEN-3)];\n"`
	`"\treg\t[(BW-1):0]\t\tr_b[0:(TLEN-3)];\n"`
	`"\treg\t[(TLEN-1):0]\t\tr_s;\n"`
	`"\treg\t[(IW+BW-1):0]\t\tacc[0:(TLEN-2)];\n"`
	`"\tgenvar k;\n"`
	`"\n"`
	`"\t// First step:\n"`
	`"\t// Switch to unsigned arithmetic for our multiply, keeping track\n"`
	`"\t// of the along the way. We'll then add the sign again later at\n"`
	`"\t// the end.\n"`
	`"\t//\n"`
	`"\t// If we were forced to stay within two's complement arithmetic,\n"`
	`"\t// taking the absolute value here would require an additional bit.\n"`
	`"\t// However, because our results are now unsigned, we can stay\n"`
	`"\t// within the number of bits given (for now).\n"`
	`"\tgenerate if (IW > AW)\n"`
	`"\tbegin\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\t\tif (i_ce)\n"`
	`"\t\t\t\tu_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };\n"`
	`"\tend else begin\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\t\tif (i_ce)\n"`
	`"\t\t\t\tu_a <= (i_a[AW-1])?(-i_a):(i_a);\n"`
	`"\tend endgenerate\n"`
	`"\n"`
	`"\talways @(posedge i_clk)\n"`
	`"\t\tif (i_ce)\n"`
	`"\t\tbegin\n"`
	`"\t\t\tu_b <= (i_b[BW-1])?(-i_b):(i_b);\n"`
	`"\t\t\tsgn <= i_a[AW-1] ^ i_b[BW-1];\n"`
	`"\t\tend\n"`
	`"\n"`
	`"\twire [(BW+LUTB-1):0] pr_a, pr_b;\n"`
	`"\n"`
	`"\t//\n"`
	`"\t// Second step: First two 2xN products.\n"`
	`"\t//\n"`
	`"\t// Since we have no tableau of additions (yet), we can do both\n"`
	`"\t// of the first two rows at the same time and add them together.\n"`
	`"\t// For the next round, we'll then have a previous sum to accumulate\n"`
	`"\t// with new and subsequent product, and so only do one product at\n"`
	`"\t// a time can follow this--but the first clock can do two at a time.\n"`
	`"\tbimpy\t#(BW) lmpy_0(i_clk,i_ce,u_a[( LUTB-1): 0], u_b, pr_a);\n"`
	`"\tbimpy\t#(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);\n"`
	`"\talways @(posedge i_clk)\n"`
	`"\t\tif (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];\n"`
	`"\talways @(posedge i_clk)\n"`
	`"\t\tif (i_ce) r_b[0] <= u_b;\n"`
	`"\talways @(posedge i_clk)\n"`
	`"\t\tif (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };\n"`
	`"\talways @(posedge i_clk) // One clk after p[0],p[1] become valid\n"`
	`"\t\tif (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}\n"`
	`"\t\t\t +{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };\n"`
	`"\n"`
	`"\tgenerate // Keep track of intermediate values, before multiplying them\n"`
	`"\tif (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)\n"`
	`"\tbegin : gencopies\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\tif (i_ce)\n"`
	`"\t\tbegin\n"`
	`"\t\t\tr_a[k+1] <= { {(LUTB){1'b0}},\n"`
	`"\t\t\t\tr_a[k][(IW-1-(2*LUTB)):LUTB] };\n"`
	`"\t\t\tr_b[k+1] <= r_b[k];\n"`
	`"\t\tend\n"`
	`"\tend endgenerate\n"`
	`"\n"`
	`"\tgenerate // The actual multiply and accumulate stage\n"`
	`"\tif (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)\n"`
	`"\tbegin : genstages\n"`
	`"\t\t// First, the multiply: 2-bits times BW bits\n"`
	`"\t\twire\t[(BW+LUTB-1):0] genp;\n"`
	`"\t\tbimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);\n"`
	`"\n"`
	`"\t\t// Then the accumulate step -- on the next clock\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\t\tif (i_ce)\n"`
	`"\t\t\t\tacc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},\n"`
	`"\t\t\t\t\tgenp, {{(LUTB*(k+2))}{1'b0}} };\n"`
	`"\tend endgenerate\n"`
	`"\n"`
	`"\twire [(IW+BW-1):0] w_r;\n"`
	`"\tassign\tw_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];\n"`
	`"\talways @(posedge i_clk)\n"`
	`"\t\tif (i_ce)\n"`
	`"\t\t\to_r <= w_r[(AW+BW-1):0];\n"`
	`"\n"`
	`"endmodule\n");`

	`fclose(fp);`
	`}`

`void build_dblreverse(const char *fname) {`	`void build_dblreverse(const char *fname) {`
`FILE *fp = fopen(fname, "w");`	`FILE *fp = fopen(fname, "w");`
`if (NULL == fp) {`	`if (NULL == fp) {`
`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
`perror("O/S Err was:");`	`perror("O/S Err was:");`
Line 1012...	Line 1268...
`"//\n"`	`"//\n"`
`"//\n");`	`"//\n");`
`fprintf(fp,`	`fprintf(fp,`
`"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"`	`"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"`
`"\t\to_out_0, o_out_1, o_sync);\n"`	`"\t\to_out_0, o_out_1, o_sync);\n"`
`"\tparameter\t\t\tLGSIZE=5, WIDTH=24;\n"`	`"\tparameter\t\t\tLGSIZE=%d, WIDTH=24;\n"`
`"\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"`	`"\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"`
`"\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"`	`"\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"`
`"\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"`	`"\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"`
`"\toutput\treg\t\t\to_sync;\n"`	`"\toutput\treg\t\t\to_sync;\n", TST_DBLREVERSE_LGSIZE);`

	`fprintf(fp,`
`"\n"`	`"\n"`
`"\treg\t\t\tin_reset;\n"`	`"\treg\t\t\tin_reset;\n"`
`"\treg\t[(LGSIZE-1):0]\tiaddr;\n"`	`"\treg\t[(LGSIZE-1):0]\tiaddr;\n"`
`"\twire\t[(LGSIZE-3):0]\tbraddr;\n"`	`"\twire\t[(LGSIZE-3):0]\tbraddr;\n"`
`"\n"`	`"\n"`
Line 1181...	Line 1439...

`fprintf(fp,`	`fprintf(fp,`
`"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"`	`"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"`
`"\t\to_left, o_right, o_aux);\n"`	`"\t\to_left, o_right, o_aux);\n"`
`"\t// Public changeable parameters ...\n"`	`"\t// Public changeable parameters ...\n"`
`"\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"`	`"\tparameter IWIDTH=%d,", TST_BUTTERFLY_IWIDTH);`
	`#ifdef TST_BUTTERFLY_CWIDTH`
	`fprintf(fp, "CWIDTH=%d,", TST_BUTTERFLY_CWIDTH);`
	`#else`
	`fprintf(fp, "CWIDTH=IWIDTH+%d,", xtracbits);`
	`#endif`
	`#ifdef TST_BUTTERFLY_OWIDTH`
	`fprintf(fp, "OWIDTH=%d;\n", TST_BUTTERFLY_OWIDTH);`
	`#else`
	`fprintf(fp, "OWIDTH=IWIDTH+1;\n");`
	`#endif`
	`fprintf(fp,`
`"\t// Parameters specific to the core that should not be changed.\n"`	`"\t// Parameters specific to the core that should not be changed.\n"`
`"\tparameter MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"`	`"\tparameter MPYDELAY=%d'd%d,\n"`
`"\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"`	`"\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"`
`"\t// The LGDELAY should be the base two log of the MPYDELAY. If\n"`	`"\t// The LGDELAY should be the base two log of the MPYDELAY. If\n"`
`"\t// this value is fractional, then round up to the nearest\n"`	`"\t// this value is fractional, then round up to the nearest\n"`
`"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"`	`"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"`
`"\tparameter\tLGDELAY=%d;\n"`	`"\tparameter\tLGDELAY=%d;\n"`
Line 1195...	Line 1464...
`"\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"`	`"\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"`
`"\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"`	`"\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"`
`"\tinput\t\ti_aux;\n"`	`"\tinput\t\ti_aux;\n"`
`"\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"`	`"\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"`
`"\toutput\treg\to_aux;\n"`	`"\toutput\treg\to_aux;\n"`
`"\n", 16, xtracbits, lgdelay(16,xtracbits),`	`"\n", lgdelay(16,xtracbits), bflydelay(16, xtracbits),`
`bflydelay(16, xtracbits), lgdelay(16,xtracbits));`	`lgdelay(16,xtracbits));`
`fprintf(fp,`	`fprintf(fp,`
`"\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"`	`"\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"`
`"\n"`	`"\n"`
`"\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"`	`"\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"`
`"\treg\t\t\t\tr_aux, r_aux_2;\n"`	`"\treg\t\t\t\tr_aux, r_aux_2;\n"`
Line 1292...	Line 1561...
`"\n");`	`"\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\t// This should really be based upon an IF, such as in\n"`	`"\t// This should really be based upon an IF, such as in\n"`
`"\t// if (IWIDTH < CWIDTH) then ...\n"`	`"\t// if (IWIDTH < CWIDTH) then ...\n"`
`"\t// However, this is the only (other) way I know to do it.\n"`	`"\t// However, this is the only (other) way I know to do it.\n"`
`"\tgenerate\n"`	`"\tgenerate if (CWIDTH < IWIDTH+1)\n"`
`"\tif (CWIDTH < IWIDTH+1)\n"`
`"\tbegin\n"`	`"\tbegin\n"`
`"\t\twire\t[(CWIDTH):0]\tp3c_in;\n"`	`"\t\twire\t[(CWIDTH):0]\tp3c_in;\n"`
`"\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"`	`"\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"`
`"\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"`	`"\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"`
`"\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"`	`"\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"`
`"\n"`	`"\n"`
`"\t\t// We need to pad these first two multiplies by an extra\n"`	`"\t\t// We need to pad these first two multiplies by an extra\n"`
`"\t\t// bit just to keep them aligned with the third,\n"`	`"\t\t// bit just to keep them aligned with the third,\n"`
`"\t\t// simpler, multiply.\n"`	`"\t\t// simpler, multiply.\n"`
`"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"`	`"\t\t%s #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"`
`"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"`	`"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"`
`"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"`	`"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"`
`"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"`	`"\t\t%s #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"`
`"\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"`	`"\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"`
`"\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"`	`"\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"`
`"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"`	`"\t\t%s #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"`
`"\t\t\t\tp3c_in, p3d_in, p_three);\n"`	`"\t\t\t\tp3c_in, p3d_in, p_three);\n"`
`"\tend else begin\n"`	`"\tend else begin\n"`
`"\t\twire\t[(CWIDTH):0]\tp3c_in;\n"`	`"\t\twire\t[(CWIDTH):0]\tp3c_in;\n"`
`"\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"`	`"\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"`
`"\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"`	`"\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"`
`"\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"`	`"\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"`
`"\n"`	`"\n"`
`"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"`	`"\t\t%s #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"`
`"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"`	`"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"`
`"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"`	`"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"`
`"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"`	`"\t\t%s #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"`
`"\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"`	`"\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"`
`"\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"`	`"\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"`
`"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"`	`"\t\t%s #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"`
`"\t\t\t\tp3d_in, p3c_in, p_three);\n"`	`"\t\t\t\tp3d_in, p3c_in, p_three);\n"`
`"\tend\n"`	`"\tend\n"`
`"\tendgenerate\n"`	`"\tendgenerate\n"`
`"\n");`	`"\n",`
	`(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",`
	`(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",`
	`(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",`
	`(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",`
	`(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",`
	`(USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy");`
`fprintf(fp,`	`fprintf(fp,`
`"\t// These values are held in memory and delayed during the\n"`	`"\t// These values are held in memory and delayed during the\n"`
`"\t// multiply. Here, we recover them. During the multiply,\n"`	`"\t// multiply. Here, we recover them. During the multiply,\n"`

Line 1...

/////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////

//

//

// Filename:    fftgen.cpp

// Filename:    fftgen.cpp

//

//

// Project:     A Doubletime Pipelined FFT

// Project:     A Doubletime Pipelined FFT

//

//

Line 21...

//              make in the documents directory to build it.

//              make in the documents directory to build it.

//

//

// Creator:     Dan Gisselquist, Ph.D.

// Creator:     Dan Gisselquist, Ph.D.

//              Gisselquist Tecnology, LLC

//              Gisselquist Tecnology, LLC

//

//

///////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////

//

//

// Copyright (C) 2015, Gisselquist Technology, LLC

// Copyright (C) 2015, Gisselquist Technology, LLC

//

//

// This program is free software (firmware): you can redistribute it and/or

// This program is free software (firmware): you can redistribute it and/or

// modify it under the terms of  the GNU General Public License as published

// modify it under the terms of  the GNU General Public License as published

Line 44...

//

//

// License:     GPL, v3, as defined and found on www.gnu.org,

// License:     GPL, v3, as defined and found on www.gnu.org,

//              http://www.gnu.org/licenses/gpl.html

//              http://www.gnu.org/licenses/gpl.html

//

//

//

//

///////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////

//

//

//

//

#include <stdio.h>

#include <stdio.h>

#include <stdlib.h>

#include <stdlib.h>

#include <unistd.h>

#include <unistd.h>

Line 62...

#define DEF_NBITSIN     16

#define DEF_NBITSIN     16

#define DEF_COREDIR     "fft-core"

#define DEF_COREDIR     "fft-core"

#define DEF_XTRACBITS   4

#define DEF_XTRACBITS   4

#define DEF_NMPY        0

#define DEF_NMPY        0

#define DEF_XTRAPBITS   0

#define DEF_XTRAPBITS   0

#define USE_OLD_MULTIPLY        false

// To coordinate testing, it helps to have some defines in our header file that

// are common with the default parameters found within the various subroutines.

// We'll define those common parameters here.  These values, however, have no

// effect on anything other than bench testing.  They do, though, allow us to

// bench test exact copies of what is going on within the FFT when necessary

// in order to find problems.

// First, parameters for the new multiply based upon the bi-multiply structure

// (2-bits/2-tableau rows at a time).

#define TST_LONGBIMPY_AW        16

#define TST_LONGBIMPY_BW        20      // Leave undefined to match AW

//  We also include parameters for the shift add multiply

#define TST_SHIFTADDMPY_AW      16

#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW

// Now for parameters matching the butterfly

#define TST_BUTTERFLY_IWIDTH    16

#define TST_BUTTERFLY_CWIDTH    20

#define TST_BUTTERFLY_OWIDTH    17

// Now for parameters matching the qtrstage

#define TST_QTRSTAGE_IWIDTH     16

#define TST_QTRSTAGE_LGWIDTH    8

// Parameters for the dblstage

#define TST_DBLSTAGE_IWIDTH     16

#define TST_DBLSTAGE_SHIFT      0

// Now for parameters matching the dblreverse stage

#define TST_DBLREVERSE_LGSIZE   5

typedef enum {

typedef enum {

        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT

        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT

} ROUND_T;

} ROUND_T;

const char      cpyleft[] =

const char      cpyleft[] =

"///////////////////////////////////////////////////////////////////////////\n"

"////////////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Copyright (C) 2015, Gisselquist Technology, LLC\n"

"// Copyright (C) 2015, Gisselquist Technology, LLC\n"

"//\n"

"//\n"

"// This program is free software (firmware): you can redistribute it and/or\n"

"// This program is free software (firmware): you can redistribute it and/or\n"

"// modify it under the terms of  the GNU General Public License as published\n"

"// modify it under the terms of  the GNU General Public License as published\n"

Line 91...

Line 123...

"//\n"

"//\n"

"// License:    GPL, v3, as defined and found on www.gnu.org,\n"

"// License:    GPL, v3, as defined and found on www.gnu.org,\n"

"//             http://www.gnu.org/licenses/gpl.html\n"

"//             http://www.gnu.org/licenses/gpl.html\n"

"//\n"

"//\n"

"//\n"

"//\n"

"///////////////////////////////////////////////////////////////////////////\n";

"////////////////////////////////////////////////////////////////////////////////\n";

const char      prjname[] = "A Doubletime Pipelined FFT";

const char      prjname[] = "A Doubletime Pipelined FFT";

const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"

const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"

                                "//             Gisselquist Tecnology, LLC\n";

                                "//             Gisselquist Tecnology, LLC\n";

int     lgval(int vl) {

int     lgval(int vl) {

Line 115...

Line 147...

int     bflydelay(int nbits, int xtra) {

int     bflydelay(int nbits, int xtra) {

        int     cbits = nbits + xtra;

        int     cbits = nbits + xtra;

        int     delay;

        int     delay;

        if (USE_OLD_MULTIPLY) {

        if (nbits+1<cbits)

        if (nbits+1<cbits)

                delay = nbits+4;

                delay = nbits+4;

        else

        else

                delay = cbits+3;

                delay = cbits+3;

        } else {

                int     na=nbits+2, nb=cbits+1;

                if (nb<na) {

                        int tmp = nb;

                        nb = na; na = tmp;

                } delay = ((na)/2+(na&1)+2);

        return delay;

        return delay;

int     lgdelay(int nbits, int xtra) {

int     lgdelay(int nbits, int xtra) {

        // The butterfly code needs to compare a valid address, of this

        // The butterfly code needs to compare a valid address, of this

Line 513...

Line 554...

                (dbg)?"_dbg":"", prjname, creator);

                (dbg)?"_dbg":"", prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

        fprintf(fp,

"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"

"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"

        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"

        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"

        "\t// Parameters specific to the core that should be changed when this\n"

        "\t// Parameters specific to the core that should be changed when this\n"

        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"

        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"

        "\t// spans must use the fftdoubles stage.\n"

        "\t// spans must use the fftdoubles stage.\n"

        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0;\n"

        "\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"

        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"

        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"

        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"

        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"

        "\toutput\treg                          o_sync;\n"

        "\toutput\treg                          o_sync;\n"

        "\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");

        "\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,

        TST_QTRSTAGE_LGWIDTH);

        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"

        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"

                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"

                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"

                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"

                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"

"\n");

"\n");

Line 726...

Line 768...

"//\n", (dbg)?"_dbg":"", prjname, creator);

"//\n", (dbg)?"_dbg":"", prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

        fprintf(fp,

"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"

"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"

        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"

        "\tparameter\tIWIDTH=%d,OWIDTH=IWIDTH+1, SHIFT=%d;\n"

        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"

        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"

        "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"

        "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"

        "\toutput\treg\t\t\to_sync;\n"

        "\toutput\treg\t\t\to_sync;\n"

        "\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");

        "\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"",

        TST_DBLSTAGE_IWIDTH, TST_DBLSTAGE_SHIFT);

        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"

        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"

                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"

                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"

                        "\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"

                        "\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"

"\n");

"\n");

Line 871...

Line 914...

"//\n", prjname, creator);

"//\n", prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

        fprintf(fp,

"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"

"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"

        "\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"

        "\tparameter\tAWIDTH=%d,BWIDTH=", TST_SHIFTADDMPY_AW);

#ifdef  TST_SHIFTADDMPY_BW

        fprintf(fp, "%d;\n", TST_SHIFTADDMPY_BW);

#else

        fprintf(fp, "AWIDTH;\n");

#endif

        fprintf(fp,

        "\tinput\t\t\t\t\ti_clk, i_ce;\n"

        "\tinput\t\t\t\t\ti_clk, i_ce;\n"

        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"

        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"

        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"

        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"

        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"

        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"

"\n"

"\n"

Line 932...

Line 981...

"endmodule\n");

"endmodule\n");

        fclose(fp);

        fclose(fp);

void    build_bimpy(const char *fname) {

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                return;

        fprintf(fp,

"////////////////////////////////////////////////////////////////////////////////\n"

"//\n"

"// Filename:   %s\n"

"//\n"

"// Project:    %s\n"

"//\n"

"// Purpose:    A simple 2-bit multiply based upon the fact that LUT's allow\n"

"//             6-bits of input.  In other words, I could build a 3-bit\n"

"//             multiply from 6 LUTs (5 actually, since the first could have\n"

"//             two outputs).  This would allow multiplication of three bit\n"

"//             digits, save only for the fact that you would need two bits\n"

"//             of carry.  The bimpy approach throttles back a bit and does\n"

"//             a 2x2 bit multiply in a LUT, guaranteeing that it will never\n"

"//             carry more than one bit.  While this multiply is hardware\n"

"//             independent (and can still run under Verilator therefore),\n"

"//             it is really motivated by trying to optimize for a specific\n"

"//             piece of hardware (Xilinx-7 series ...) that has at least\n"

"//             4-input LUT's with carry chains.\n"

"//\n"

"//\n"

"//\n%s"

"//\n", fname, prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

"module bimpy(i_clk, i_ce, i_a, i_b, o_r);\n"

"\tparameter\tBW=18, // Number of bits in i_b\n"

"\t\t\tLUTB=2; // Number of bits in i_a for our LUT multiply\n"

"\tinput\t\t\t\ti_clk, i_ce;\n"

"\tinput\t\t[(LUTB-1):0]\ti_a;\n"

"\tinput\t\t[(BW-1):0]\ti_b;\n"

"\toutput\treg\t[(BW+LUTB-1):0] o_r;\n"

"\n"

"\twire [(BW+LUTB-2):0] w_r;\n"

"\twire [(BW+LUTB-3):1] c;\n"

"\n"

"\tassign\tw_r =  { ((i_a[1])?i_b:{(BW){1'b0}}), 1'b0 }\n"

"\t\t\t\t^ { 1'b0, ((i_a[0])?i_b:{(BW){1'b0}}) };\n"

"\tassign\tc = { ((i_a[1])?i_b[(BW-2):0]:{(BW-1){1'b0}}) }\n"

"\t\t\t& ((i_a[0])?i_b[(BW-1):1]:{(BW-1){1'b0}});\n"

"\n"

"\talways @(posedge i_clk)\n"

"\t\tif (i_ce)\n"

"\t\t\to_r <= w_r + { c, 2'b0 };\n"

"\n"

"endmodule\n");

        fclose(fp);

void    build_longbimpy(const char *fname) {

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                return;

        fprintf(fp,

"////////////////////////////////////////////////////////////////////////////////\n"

"//\n"

"// Filename:   %s\n"

"//\n"

"// Project:    %s\n"

"//\n"

"// Purpose:    A portable shift and add multiply, built with the knowledge\n"

"//             of the existence of a six bit LUT and carry chain.  That\n"

"//             knowledge allows us to multiply two bits from one value\n"

"//             at a time against all of the bits of the other value.  This\n"

"//             sub multiply is called the bimpy.\n"

"//\n"

"//             For minimal processing delay, make the first parameter\n"

"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"

"//\n"

"//\n"

"//\n%s"

"//\n", fname, prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

"module longbimpy(i_clk, i_ce, i_a, i_b, o_r);\n"

        "\tparameter    AW=%d,  // The width of i_a, min width is 5\n"

                        "\t\t\tBW=", TST_LONGBIMPY_AW);

#ifdef  TST_LONGBIMPY_BW

        fprintf(fp, "%d", TST_LONGBIMPY_BW);

#else

        fprintf(fp, "AW");

#endif

        fprintf(fp, ",  // The width of i_b, can be anything\n"

                        "\t\t\t// The following three parameters should not be changed\n"

                        "\t\t\t// by any implementation, but are based upon hardware\n"

                        "\t\t\t// and the above values:\n"

                        "\t\t\tOW=AW+BW,        // The output width\n"

                        "\t\t\tIW=(AW+1)&(-2),  // Internal width of A\n"

                        "\t\t\tLUTB=2,  // How many bits we can multiply by at once\n"

                        "\t\t\tTLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau\n"

        "\tinput\t\t\t\ti_clk, i_ce;\n"

        "\tinput\t\t[(AW-1):0]\ti_a;\n"

        "\tinput\t\t[(BW-1):0]\ti_b;\n"

        "\toutput\treg\t[(AW+BW-1):0]\to_r;\n"

"\n"

        "\treg\t[(IW-1):0]\tu_a;\n"

        "\treg\t[(BW-1):0]\tu_b;\n"

        "\treg\t\t\tsgn;\n"

"\n"

        "\treg\t[(IW-1-2*(LUTB)):0]\tr_a[0:(TLEN-3)];\n"

        "\treg\t[(BW-1):0]\t\tr_b[0:(TLEN-3)];\n"

        "\treg\t[(TLEN-1):0]\t\tr_s;\n"

        "\treg\t[(IW+BW-1):0]\t\tacc[0:(TLEN-2)];\n"

        "\tgenvar k;\n"

"\n"

        "\t// First step:\n"

        "\t// Switch to unsigned arithmetic for our multiply, keeping track\n"

        "\t// of the along the way.  We'll then add the sign again later at\n"

        "\t// the end.\n"

        "\t//\n"

        "\t// If we were forced to stay within two's complement arithmetic,\n"

        "\t// taking the absolute value here would require an additional bit.\n"

        "\t// However, because our results are now unsigned, we can stay\n"

        "\t// within the number of bits given (for now).\n"

        "\tgenerate if (IW > AW)\n"

        "\tbegin\n"

                "\t\talways @(posedge i_clk)\n"

                        "\t\t\tif (i_ce)\n"

                        "\t\t\t\tu_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };\n"

        "\tend else begin\n"

                "\t\talways @(posedge i_clk)\n"

                        "\t\t\tif (i_ce)\n"

                        "\t\t\t\tu_a <= (i_a[AW-1])?(-i_a):(i_a);\n"

        "\tend endgenerate\n"

"\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                        "\t\t\tu_b <= (i_b[BW-1])?(-i_b):(i_b);\n"

                        "\t\t\tsgn <= i_a[AW-1] ^ i_b[BW-1];\n"

                "\t\tend\n"

"\n"

        "\twire [(BW+LUTB-1):0] pr_a, pr_b;\n"

"\n"

        "\t//\n"

        "\t// Second step: First two 2xN products.\n"

        "\t//\n"

        "\t// Since we have no tableau of additions (yet), we can do both\n"

        "\t// of the first two rows at the same time and add them together.\n"

        "\t// For the next round, we'll then have a previous sum to accumulate\n"

        "\t// with new and subsequent product, and so only do one product at\n"

        "\t// a time can follow this--but the first clock can do two at a time.\n"

        "\tbimpy\t#(BW) lmpy_0(i_clk,i_ce,u_a[(  LUTB-1):   0], u_b, pr_a);\n"

        "\tbimpy\t#(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce) r_b[0] <= u_b;\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };\n"

        "\talways @(posedge i_clk) // One clk after p[0],p[1] become valid\n"

                "\t\tif (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}\n"

                        "\t\t\t  +{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };\n"

"\n"

        "\tgenerate // Keep track of intermediate values, before multiplying them\n"

        "\tif (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)\n"

        "\tbegin : gencopies\n"

                "\t\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                        "\t\t\tr_a[k+1] <= { {(LUTB){1'b0}},\n"

                                "\t\t\t\tr_a[k][(IW-1-(2*LUTB)):LUTB] };\n"

                        "\t\t\tr_b[k+1] <= r_b[k];\n"

                        "\t\tend\n"

        "\tend endgenerate\n"

"\n"

        "\tgenerate // The actual multiply and accumulate stage\n"

        "\tif (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)\n"

        "\tbegin : genstages\n"

                "\t\t// First, the multiply: 2-bits times BW bits\n"

                "\t\twire\t[(BW+LUTB-1):0] genp;\n"

                "\t\tbimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);\n"

"\n"

                "\t\t// Then the accumulate step -- on the next clock\n"

                "\t\talways @(posedge i_clk)\n"

                        "\t\t\tif (i_ce)\n"

                                "\t\t\t\tacc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},\n"

                                        "\t\t\t\t\tgenp, {{(LUTB*(k+2))}{1'b0}} };\n"

        "\tend endgenerate\n"

"\n"

        "\twire [(IW+BW-1):0]   w_r;\n"

        "\tassign\tw_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                        "\t\t\to_r <= w_r[(AW+BW-1):0];\n"

"\n"

"endmodule\n");

        fclose(fp);

void    build_dblreverse(const char *fname) {

void    build_dblreverse(const char *fname) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

Line 1012...

Line 1268...

"//\n"

"//\n"

"//\n");

"//\n");

        fprintf(fp,

        fprintf(fp,

"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"

"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"

        "\t\to_out_0, o_out_1, o_sync);\n"

        "\t\to_out_0, o_out_1, o_sync);\n"

        "\tparameter\t\t\tLGSIZE=5, WIDTH=24;\n"

        "\tparameter\t\t\tLGSIZE=%d, WIDTH=24;\n"

        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"

        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"

        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"

        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"

        "\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"

        "\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"

        "\toutput\treg\t\t\to_sync;\n"

        "\toutput\treg\t\t\to_sync;\n", TST_DBLREVERSE_LGSIZE);

        fprintf(fp,

"\n"

"\n"

        "\treg\t\t\tin_reset;\n"

        "\treg\t\t\tin_reset;\n"

        "\treg\t[(LGSIZE-1):0]\tiaddr;\n"

        "\treg\t[(LGSIZE-1):0]\tiaddr;\n"

        "\twire\t[(LGSIZE-3):0]\tbraddr;\n"

        "\twire\t[(LGSIZE-3):0]\tbraddr;\n"

"\n"

"\n"

Line 1181...

Line 1439...

        fprintf(fp,

        fprintf(fp,

"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"

"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"

                "\t\to_left, o_right, o_aux);\n"

                "\t\to_left, o_right, o_aux);\n"

        "\t// Public changeable parameters ...\n"

        "\t// Public changeable parameters ...\n"

        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"

        "\tparameter IWIDTH=%d,", TST_BUTTERFLY_IWIDTH);

#ifdef  TST_BUTTERFLY_CWIDTH

        fprintf(fp, "CWIDTH=%d,", TST_BUTTERFLY_CWIDTH);

#else

        fprintf(fp, "CWIDTH=IWIDTH+%d,", xtracbits);

#endif

#ifdef  TST_BUTTERFLY_OWIDTH

        fprintf(fp, "OWIDTH=%d;\n", TST_BUTTERFLY_OWIDTH);

#else

        fprintf(fp, "OWIDTH=IWIDTH+1;\n");

#endif

        fprintf(fp,

        "\t// Parameters specific to the core that should not be changed.\n"

        "\t// Parameters specific to the core that should not be changed.\n"

        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"

        "\tparameter    MPYDELAY=%d'd%d,\n"

                        "\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"

                        "\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"

        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"

        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"

        "\t// this value is fractional, then round up to the nearest\n"

        "\t// this value is fractional, then round up to the nearest\n"

        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"

        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"

        "\tparameter\tLGDELAY=%d;\n"

        "\tparameter\tLGDELAY=%d;\n"

Line 1195...

Line 1464...

        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"

        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"

        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"

        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"

        "\tinput\t\ti_aux;\n"

        "\tinput\t\ti_aux;\n"

        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"

        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"

        "\toutput\treg\to_aux;\n"

        "\toutput\treg\to_aux;\n"

        "\n", 16, xtracbits, lgdelay(16,xtracbits),

        "\n", lgdelay(16,xtracbits), bflydelay(16, xtracbits),

        bflydelay(16, xtracbits), lgdelay(16,xtracbits));

                lgdelay(16,xtracbits));

        fprintf(fp,

        fprintf(fp,

        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"

        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"

"\n"

"\n"

        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"

        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"

        "\treg\t\t\t\tr_aux, r_aux_2;\n"

        "\treg\t\t\t\tr_aux, r_aux_2;\n"

Line 1292...

Line 1561...

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\t// This should really be based upon an IF, such as in\n"

        "\t// This should really be based upon an IF, such as in\n"

        "\t// if (IWIDTH < CWIDTH) then ...\n"

        "\t// if (IWIDTH < CWIDTH) then ...\n"

        "\t// However, this is the only (other) way I know to do it.\n"

        "\t// However, this is the only (other) way I know to do it.\n"

        "\tgenerate\n"

        "\tgenerate if (CWIDTH < IWIDTH+1)\n"

        "\tif (CWIDTH < IWIDTH+1)\n"

        "\tbegin\n"

        "\tbegin\n"

                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"

                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"

                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"

                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"

                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"

                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"

                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"

                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"

                "\n"

                "\n"

                "\t\t// We need to pad these first two multiplies by an extra\n"

                "\t\t// We need to pad these first two multiplies by an extra\n"

                "\t\t// bit just to keep them aligned with the third,\n"

                "\t\t// bit just to keep them aligned with the third,\n"

                "\t\t// simpler, multiply.\n"

                "\t\t// simpler, multiply.\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"

                "\t\t%s #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"

                "\t\t%s #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"

                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"

                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"

                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"

                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"

                "\t\t%s #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"

                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"

                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"

        "\tend else begin\n"

        "\tend else begin\n"

                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"

                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"

                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"

                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"

                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"

                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"

                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"

                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"

                "\n"

                "\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"

                "\t\t%s #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"

                "\t\t%s #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"

                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"

                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"

                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"

                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"

                "\t\t%s #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"

                                "\t\t\t\tp3d_in, p3c_in, p_three);\n"

                                "\t\t\t\tp3d_in, p3c_in, p_three);\n"

        "\tend\n"

        "\tend\n"

        "\tendgenerate\n"

        "\tendgenerate\n"

"\n");

"\n",

                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",

                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",

                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",

                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",

                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",

                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy");

        fprintf(fp,

        fprintf(fp,

        "\t// These values are held in memory and delayed during the\n"

        "\t// These values are held in memory and delayed during the\n"

        "\t// multiply.  Here, we recover them.  During the multiply,\n"

        "\t// multiply.  Here, we recover them.  During the multiply,\n"

        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"

        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"

        "\t// therefore, the left_x values need to be right shifted by\n"

        "\t// therefore, the left_x values need to be right shifted by\n"

Line 2268...

Line 2542...

                                (inverse)?"I":"");

                                (inverse)?"I":"");

                if (real_fft)

                if (real_fft)

                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");

                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");

                if (!single_clock)

                if (!single_clock)

                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");

                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");

                if (USE_OLD_MULTIPLY)

                        fprintf(hdr, "#define\tUSE_OLD_MULTIPLY\n\n");

                fprintf(hdr, "// Parameters for testing the longbimpy\n");

                fprintf(hdr, "#define\tTST_LONGBIMPY_AW\t%d\n", TST_LONGBIMPY_AW);

#ifdef  TST_LONGBIMPY_BW

                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\t%d\n\n", TST_LONGBIMPY_BW);

#else

                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\tTST_LONGBIMPY_AW\n\n");

#endif

                fprintf(hdr, "// Parameters for testing the shift add multiply\n");

                fprintf(hdr, "#define\tTST_SHIFTADDMPY_AW\t%d\n", TST_SHIFTADDMPY_AW);

#ifdef  TST_SHIFTADDMPY_BW

                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\t%d\n\n", TST_SHIFTADDMPY_BW);

#else

                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\tTST_SHIFTADDMPY_AW\n\n");

#endif

#define TST_SHIFTADDMPY_AW      16

#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW

                fprintf(hdr, "// Parameters for testing the butterfly\n");

                fprintf(hdr, "#define\tTST_BUTTERFLY_IWIDTH\t%d\n", TST_BUTTERFLY_IWIDTH);

                fprintf(hdr, "#define\tTST_BUTTERFLY_CWIDTH\t%d\n", TST_BUTTERFLY_CWIDTH);

                fprintf(hdr, "#define\tTST_BUTTERFLY_OWIDTH\t%d\n", TST_BUTTERFLY_OWIDTH);

                fprintf(hdr, "#define\tTST_BUTTERFLY_MPYDELAY\t%d\n\n",

                                bflydelay(TST_BUTTERFLY_IWIDTH,

                                        TST_BUTTERFLY_CWIDTH-TST_BUTTERFLY_IWIDTH));

                fprintf(hdr, "// Parameters for testing the quarter stage\n");

                fprintf(hdr, "#define\tTST_QTRSTAGE_IWIDTH\t%d\n", TST_QTRSTAGE_IWIDTH);

                fprintf(hdr, "#define\tTST_QTRSTAGE_LGWIDTH\t%d\n\n", TST_QTRSTAGE_LGWIDTH);

                fprintf(hdr, "// Parameters for testing the double stage\n");

                fprintf(hdr, "#define\tTST_DBLSTAGE_IWIDTH\t%d\n", TST_DBLSTAGE_IWIDTH);

                fprintf(hdr, "#define\tTST_DBLSTAGE_SHIFT\t%d\n\n", TST_DBLSTAGE_SHIFT);

                fprintf(hdr, "// Parameters for testing the bit reversal stage\n");

                fprintf(hdr, "#define\tTST_DBLREVERSE_LGSIZE\t%d\n\n", TST_DBLREVERSE_LGSIZE);

                fprintf(hdr, "\n" "#endif\n\n");

                fprintf(hdr, "\n" "#endif\n\n");

                fclose(hdr);

                fclose(hdr);

Line 2614...

Line 2927...

                if (nummpy > 0) {

                if (nummpy > 0) {

                        fname = coredir + "/hwbfly.v";

                        fname = coredir + "/hwbfly.v";

                        build_hwbfly(fname.c_str(), xtracbits, rounding);

                        build_hwbfly(fname.c_str(), xtracbits, rounding);

                        // To make debugging easier, we build both of these

                fname = coredir + "/shiftaddmpy.v";

                fname = coredir + "/shiftaddmpy.v";

                build_multiply(fname.c_str());

                build_multiply(fname.c_str());

                        fname = coredir + "/longbimpy.v";

                        build_longbimpy(fname.c_str());

                        fname = coredir + "/bimpy.v";

                        build_bimpy(fname.c_str());

                if ((dbg)&&(dbgstage == 4)) {

                if ((dbg)&&(dbgstage == 4)) {

                        fname = coredir + "/qtrstage_dbg.v";

                        fname = coredir + "/qtrstage_dbg.v";

                        build_quarters(fname.c_str(), rounding, true);

                        build_quarters(fname.c_str(), rounding, true);

                fname = coredir + "/qtrstage.v";

                fname = coredir + "/qtrstage.v";

Browse

Tools

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Diff between revs 28 and 29