OpenCores

Rev 2	Rev 5
Line 23...	Line 23...
`"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"`	`"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"`
`"// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License\n"`	`"// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License\n"`
`"// for more details.\n"`	`"// for more details.\n"`
`"//\n"`	`"//\n"`
`"// You should have received a copy of the GNU General Public License along\n"`	`"// You should have received a copy of the GNU General Public License along\n"`
`"// with this program. If not, see <http://www.gnu.org/licenses/>.\n"`	`"// with this program. (It's in the $(ROOT)/doc directory, run make with no\n"`
	`"// target there if the PDF file isn\'t present.) If not, see\n"`
	`"// <http://www.gnu.org/licenses/> for a copy.\n"`
	`"//\n"`
`"// License: GPL, v3, as defined and found on www.gnu.org,\n"`	`"// License: GPL, v3, as defined and found on www.gnu.org,\n"`
`"// http://www.gnu.org/licenses/gpl.html\n"`	`"// http://www.gnu.org/licenses/gpl.html\n"`
`"//\n"`	`"//\n"`
`"//\n"`	`"//\n"`
`"///////////////////////////////////////////////////////////////////////////\n";`	`"///////////////////////////////////////////////////////////////////////////\n";`
Line 53...	Line 56...

`int lgdelay(int nbits, int xtra) {`	`int lgdelay(int nbits, int xtra) {`
`int cbits = nbits + xtra;`	`int cbits = nbits + xtra;`
`int delay = nbits + 2;`	`int delay = nbits + 2;`
`if (nbits+1<cbits)`	`if (nbits+1<cbits)`
`delay = nbits+2;`	`delay = nbits+4;`
`else`	`else`
`delay = cbits+1;`	`delay = cbits+3;`
`return lgval(delay);`	`return lgval(delay);`
`}`	`}`

`void build_quarters(const char *fname) {`	`void build_quarters(const char *fname) {`
`FILE *fp = fopen(fname, "w");`	`FILE *fp = fopen(fname, "w");`
Line 74...	Line 77...
`"//\n"`	`"//\n"`
`"// Filename: qtrstage.v\n"`	`"// Filename: qtrstage.v\n"`
`"// \n"`	`"// \n"`
`"// Project: %s\n"`	`"// Project: %s\n"`
`"//\n"`	`"//\n"`
`"// Purpose: This file is (almost) a Verilog source file. It is meant to\n"`	`"// Purpose: This file encapsulates the 4 point stage of a decimation in\n"`
`"// be used by a FFT core compiler to generate FFTs which may be\n"`	`"// frequency FFT. This particular implementation is optimized\n"`
`"// used as part of an FFT core. Specifically, this file \n"`	`"// so that all of the multiplies are accomplished by additions\n"`
`"// encapsulates the options of a 4 point, decimation in\n"`	`"// and multiplexers only.\n"`
`"// frequency FFT-stage. This particular stage is optimized so\n"`	`"//\n"`
`"// that all of the multiplies are accomplished by additions and\n"`
`"// mux'es.\n"`
`"//\n%s"`	`"//\n%s"`
`"//\n",`	`"//\n",`
`prjname, creator);`	`prjname, creator);`
`fprintf(fp, "%s", cpyleft);`	`fprintf(fp, "%s", cpyleft);`

Line 155...	Line 156...
`"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"`	`"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"`
`"\n"`	`"\n"`
`"\t\t\t// In sequence, clock = 1\n"`	`"\t\t\t// In sequence, clock = 1\n"`
`"\t\t\tif (pipeline[1])\n"`	`"\t\t\tif (pipeline[1])\n"`
`"\t\t\tbegin\n"`	`"\t\t\tbegin\n"`
`"\t\t\t ob_a <= { sum_r[(IWIDTH):(IWIDTH+1-OWIDTH)],\n"`	`"\t\t\t\tob_a <= { sum_r[(IWIDTH):(IWIDTH+1-OWIDTH)],\n"`
`"\t\t\t\t\t\tsum_i[(IWIDTH):(IWIDTH+1-OWIDTH)] };\n"`	`"\t\t\t\t\t\tsum_i[(IWIDTH):(IWIDTH+1-OWIDTH)] };\n"`
`"\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"`	`"\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"`
`"\t\t\t\tif (~ODD)\n"`	`"\t\t\t\tif (~ODD)\n"`
`"\t\t\t\tbegin\n"`	`"\t\t\t\tbegin\n"`
`"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
Line 201...	Line 202...
`"// Filename: dblstage.v\n"`	`"// Filename: dblstage.v\n"`
`"//\n"`	`"//\n"`
`"// Project: %s\n"`	`"// Project: %s\n"`
`"//\n"`	`"//\n"`
`"// Purpose: This is part of an FPGA implementation that will process\n"`	`"// Purpose: This is part of an FPGA implementation that will process\n"`
`"// data at two samples per clock. If you notice from the\n"`	`"// the final stage of a decimate-in-frequency FFT, running\n"`
`"// derivation of an FFT, the only time both even and odd\n"`	`"// through the data at two samples per clock. If you notice\n"`
`"// samples are used at the same time is the first stage.\n"`	`"// from the derivation of an FFT, the only time both even and\n"`
`"// Therefore, after this stage and these twiddles, all of the\n"`	`"// odd samples are used at the same time is in this stage.\n"`
`"// other stages can run two stages at a time at one sample per\n"`	`"// Therefore, other than this stage and these twiddles, all of\n"`
`"// clock.\n"`	`"// the other stages can run two stages at a time at one sample\n"`
	`"// per clock.\n"`
`"//\n"`	`"//\n"`
`"// In this implementation, the output is valid one clock after\n"`	`"// In this implementation, the output is valid one clock after\n"`
`"// the input is valid. The output also accumulates one bit\n"`	`"// the input is valid. The output also accumulates one bit\n"`
`"// above and beyond the number of bits in the input.\n"`	`"// above and beyond the number of bits in the input.\n"`
`"// \n"`	`"// \n"`
Line 240...	Line 242...
`"\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"`	`"\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"`
`"\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"`	`"\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"`
`"\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"`	`"\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"`
`"\t\t\t\t\to_out_1r, o_out_1i;\n"`	`"\t\t\t\t\to_out_1r, o_out_1i;\n"`
`"\n"`	`"\n"`
`"\t// Don't forget that we accumulate a bit by adding two values together.\n"`	`"\t// Don't forget that we accumulate a bit by adding two values\n"`
`"\t// Therefore our intermediate value must have one more bit than the\n"`	`"\t// together. Therefore our intermediate value must have one more\n"`
`"\t// two originals.\n"`	`"\t// bit than the two originals.\n"`
`"\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"`	`"\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"`
`"\n"`	`"\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
Line 255...	Line 257...
`"\t\t\t//\n"`	`"\t\t\t//\n"`
`"\t\t\tout_1r <= i_in_0r - i_in_1r;\n"`	`"\t\t\tout_1r <= i_in_0r - i_in_1r;\n"`
`"\t\t\tout_1i <= i_in_0i - i_in_1i;\n"`	`"\t\t\tout_1i <= i_in_0i - i_in_1i;\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"\n"`	`"\n"`
`"\t// Now, if the master control program doesn't want to keep all of our\n"`	`"\t// Now, if the master control program doesn't want to keep all of\n"`
`"\t// bits, we can shift down to OWIDTH bits here.\n"`	`"\t// our bits, we can shift down to OWIDTH bits here.\n"`
`"\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\n"`	`"\n"`
Line 324...	Line 326...
`"\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"`	`"\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"`
`"\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"`	`"\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"`
`"\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"`	`"\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"`
`"\tgenvar k;\n"`	`"\tgenvar k;\n"`
`"\n"`	`"\n"`
	`"\t// If we were forced to stay within two\'s complement arithmetic,\n"`
	`"\t// taking the absolute value here would require an additional bit.\n"`
	`"\t// However, because our results are now unsigned, we can stay\n"`
	`"\t// within the number of bits given (for now).\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"`	`"\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"`
`"\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"`	`"\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"`
Line 429...	Line 435...
`"// mem[11xxx1] = s_1[m]\n"`	`"// mem[11xxx1] = s_1[m]\n"`
`"// o_0[m] = mem[00xxx1]\n"`	`"// o_0[m] = mem[00xxx1]\n"`
`"// o_1[m] = mem[01xxx1]\n"`	`"// o_1[m] = mem[01xxx1]\n"`
`"// ...\n"`	`"// ...\n"`
`"//\n"`	`"//\n"`
	`"// The answer is that, yes we can but: we need to use four memory banks\n"`
	`"// to do it properly. These four banks are defined by the two bits\n"`
	`"// that determine the top and bottom of the correct address. Larger\n"`
	`"// FFT\'s would require more memories.\n"`
	`"//\n"`
`"//\n");`	`"//\n");`
`fprintf(fp,`	`fprintf(fp,`
`"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"`	`"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"`
`"\t\to_out_0, o_out_1, o_sync);\n"`	`"\t\to_out_0, o_out_1, o_sync);\n"`
`"\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"`	`"\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"`
Line 578...	Line 589...

`fprintf(fp,`	`fprintf(fp,`
`"module\tbutterfly(i_clk, i_ce, i_coef, i_left, i_right, i_aux,\n"`	`"module\tbutterfly(i_clk, i_ce, i_coef, i_left, i_right, i_aux,\n"`
`"\t\to_left, o_right, o_aux);\n"`	`"\t\to_left, o_right, o_aux);\n"`
`"\t// Public changeable parameters ...\n"`	`"\t// Public changeable parameters ...\n"`
`"\tparameter IWIDTH=16,CWIDTH=IWIDTH,OWIDTH=IWIDTH;\n"`	`"\tparameter IWIDTH=16,CWIDTH=IWIDTH+4,OWIDTH=IWIDTH+1;\n"`
`"\t// Parameters specific to the core that should not be changed.\n"`	`"\t// Parameters specific to the core that should not be changed.\n"`
`"\tparameter MPYDELAY=(IWIDTH+1 < CWIDTH)?(IWIDTH+2):(CWIDTH+1),\n"`	`"\tparameter MPYDELAY=5'd20, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"`
`"\t\t\tSHIFT=0, ROUND=1;\n"`	`"\t\t\tSHIFT=0, ROUND=0;\n"`
`"\t// The LGDELAY should be the base two log of the MPYDELAY. If\n"`	`"\t// The LGDELAY should be the base two log of the MPYDELAY. If\n"`
`"\t// this value is fractional, then round up to the nearest\n"`	`"\t// this value is fractional, then round up to the nearest\n"`
`"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"`	`"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"`
`"\tparameter LGDELAY=5;\n"`	`"\tparameter\tLGDELAY=5;\n"`
`"\tinput i_clk, i_ce;\n"`	`"\tinput\t\ti_clk, i_ce;\n"`
`"\tinput [(2*CWIDTH-1):0] i_coef;\n"`	`"\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"`
`"\tinput [(2*IWIDTH-1):0] i_left, i_right;\n"`	`"\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"`
`"\tinput i_aux;\n"`	`"\tinput\t\ti_aux;\n"`
`"\toutput wire [(2*OWIDTH-1):0] o_left, o_right;\n"`	`"\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"`
`"\toutput wire o_aux;\n"`	`"\toutput\twire o_aux;\n"`
`"\n"`	`"\n"`
`"\twire [(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"`	`"\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"`
`"\n"`	`"\n"`
`"\treg [(2*IWIDTH-1):0] r_left, r_right;\n"`	`"\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"`
`"\treg r_aux, r_aux_2;\n"`	`"\treg\t\t\t\tr_aux, r_aux_2;\n"`
`"\treg [(2*CWIDTH-1):0] r_coef, r_coef_2;\n"`	`"\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"`
`"\twire [(CWIDTH-1):0] r_coef_r, r_coef_i;\n"`	`"\twire\tsigned\t[(CWIDTH-1):0]\tr_coef_r, r_coef_i;\n"`
`"\tassign r_coef_r = r_coef_2[ (2*CWIDTH-1):(CWIDTH)];\n"`	`"\tassign\tr_coef_r = r_coef_2[ (2*CWIDTH-1):(CWIDTH)];\n"`
`"\tassign r_coef_i = r_coef_2[ (CWIDTH-1):0];\n"`	`"\tassign\tr_coef_i = r_coef_2[ ( CWIDTH-1):0];\n"`
`"\twire [(IWIDTH-1):0] r_left_r, r_left_i, r_right_r, r_right_i;\n"`	`"\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"`
`"\tassign r_left_r = i_left[ (2*IWIDTH-1):(IWIDTH)];\n"`	`"\tassign\tr_left_r = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"`
`"\tassign r_left_i = i_left[ (IWIDTH-1):0];\n"`	`"\tassign\tr_left_i = r_left[ (IWIDTH-1):0];\n"`
`"\tassign r_right_r = i_right[(2*IWIDTH-1):(IWIDTH)];\n"`	`"\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"`
`"\tassign r_right_i = i_right[(IWIDTH-1):0];\n"`	`"\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"`
`"\n"`	`"\n"`
`"\treg [(IWIDTH):0] r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"`	`"\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"`
`"\n"`	`"\n"`
`"\treg [(LGDELAY-1):0] fifo_addr;\n"`	`"\treg [(LGDELAY-1):0] fifo_addr;\n"`
`"\twire [(LGDELAY-1):0] fifo_read_addr;\n"`	`"\twire [(LGDELAY-1):0] fifo_read_addr;\n"`
`"\t/* verilator lint_off WIDTH */\n"`
`"\tassign fifo_read_addr = fifo_addr - MPYDELAY;\n"`	`"\tassign fifo_read_addr = fifo_addr - MPYDELAY;\n"`
`"\t/* verilator lint_on WIDTH */\n"`
`"\treg [(2*IWIDTH+2):0] fifo_left [ 0:((1<<LGDELAY)-1)];\n"`	`"\treg [(2*IWIDTH+2):0] fifo_left [ 0:((1<<LGDELAY)-1)];\n"`
`"\n"`	`"\n");`
	`fprintf(fp,`
	`"\t// Set up the input to the multiply\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\t// One clock just latches the inputs\n"`	`"\t\t\t// One clock just latches the inputs\n"`
`"\t\t\tr_left <= i_left; // No change in # of bits\n"`	`"\t\t\tr_left <= i_left; // No change in # of bits\n"`
Line 633...	Line 644...
`"\t\t\tr_dif_i <= r_left_i - r_right_i;\n"`	`"\t\t\tr_dif_i <= r_left_i - r_right_i;\n"`
`"\t\t\t// Other inputs are simply delayed on second clock\n"`	`"\t\t\t// Other inputs are simply delayed on second clock\n"`
`"\t\t\tr_aux_2 <= r_aux;\n"`	`"\t\t\tr_aux_2 <= r_aux;\n"`
`"\t\t\tr_coef_2<= r_coef;\n"`	`"\t\t\tr_coef_2<= r_coef;\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"\n"`	`"\n");`
	`fprintf(fp,`
	`"\t// Don\'t forget to record the even side, since it doesn\'t need\n"`
	`"\t// to be multiplied, but yet we still need the results in sync\n"`
	`"\t// with the answer when it is ready.\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\t// Need to delay the sum side--nothing else happens\n"`	`"\t\t\t// Need to delay the sum side--nothing else happens\n"`
`"\t\t\t// to it, but it needs to stay synchronized with the\n"`	`"\t\t\t// to it, but it needs to stay synchronized with the\n"`
`"\t\t\t// right side.\n"`	`"\t\t\t// right side.\n"`
`"\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"`	`"\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"`
`"\t\t\tfifo_addr <= fifo_addr + 1;\n"`	`"\t\t\tfifo_addr <= fifo_addr + 1;\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"\n"`	`"\n"`
`"\twire [(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"`	`"\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"`
`"\tassign ir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"`	`"\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"`
`"\tassign ir_coef_i = r_coef_2[(CWIDTH-1):0];\n"`	`"\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"`
`"\twire [(IWIDTH+CWIDTH+1+2-1):0] p_one, p_two, p_three;\n"`	`"\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"`
`"\n"`	`"\n"`
`"\t// Multiply output is always a width of IWIDTH+CWIDTH-1. ALWAYS.\n"`	`"\n");`
`"\t// We take care of dropping the width to OWIDTH in our routine\n"`	`fprintf(fp,`
`"\t// below, but this is the definition of a multiply.\n"`	`"\t// Multiply output is always a width of the sum of the widths of\n"`
`"\n"`	`"\t// the two inputs. ALWAYS. This is independent of the number of\n"`
`"\n"`	`"\t// bits in p_one, p_two, or p_three. These values needed to \n"`
`"\n"`	`"\t// accumulate a bit (or two) each. However, this approach to a\n"`
`"// This should really be based upon an IF\n"`	`"\t// three multiply complex multiply cannot increase the total\n"`
`"// if (IWIDTH < CWIDTH) then ...\n"`	`"\t// number of bits in our final output. We\'ll take care of\n"`
	`"\t// dropping back down to the proper width, OWIDTH, in our routine\n"`
	`"\t// below.\n"`
	`"\n"`
	`"\n");`
	`fprintf(fp,`
	`"\t// We accomplish here \"Karatsuba\" multiplication. That is,\n"`
	`"\t// by doing three multiplies we accomplish the work of four.\n"`
	`"\t// Let\'s prove to ourselves that this works ... We wish to\n"`
	`"\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"`
	`"\t//\ta + jb = r_dif_r + j r_dif_i, and\n"`
	`"\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"`
	`"\t// We do this by calculating the intermediate products P1, P2,\n"`
	`"\t// and P3 as\n"`
	`"\t//\tP1 = ac\n"`
	`"\t//\tP2 = bd\n"`
	`"\t//\tP3 = (a + b) * (c + d)\n"`
	`"\t// and then complete our final answer with\n"`
	`"\t//\tac - bd = P1 - P2 (this checks)\n"`
	`"\t//\tad + bc = P3 - P2 - P1\n"`
	`"\t//\t = (ac + bc + ad + bd) - bd - ac\n"`
	`"\t//\t = bc + ad (this checks)\n"`
	`"\n"`
	`"\n");`
	`fprintf(fp,`
	`"\t// This should really be based upon an IF, such as in\n"`
	`"\t// if (IWIDTH < CWIDTH) then ...\n"`
	`"\t// However, this is the only (other) way I know to do it.\n"`
`"\tgenerate\n"`	`"\tgenerate\n"`
`"\tif (CWIDTH < IWIDTH+1)\n"`	`"\tif (CWIDTH < IWIDTH+1)\n"`
`"\tbegin\n"`	`"\tbegin\n"`
`"\t\t// We need to pad these first two multiplies by an extra\n"`	`"\t\t// We need to pad these first two multiplies by an extra\n"`
`"\t\t// just to keep them aligned with the third, simpler,\n"`	`"\t\t// bit just to keep them aligned with the third,\n"`
`"\t\t// multiply.\n"`	`"\t\t// simpler, multiply.\n"`
`"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"`	`"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"`
`"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"`	`"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"`
`"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"`	`"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"`
`"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"`	`"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"`
`"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"`	`"\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"`
`"\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"`	`"\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"`
`"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"`	`"\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"`
`"\t\t\t\tir_coef_i+ir_coef_r, r_dif_r + r_dif_i, p_three);\n"`	`"\t\t\t\tir_coef_i+ir_coef_r,\n"`
	`"\t\t\t\tr_dif_r + r_dif_i,\n"`
	`"\t\t\t\tp_three);\n"`
`"\tend else begin\n"`	`"\tend else begin\n"`
`"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"`	`"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"`
`"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"`	`"\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"`
`"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"`	`"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"`
`"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"`	`"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"`
`"\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"`	`"\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"`
`"\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_two);\n"`	`"\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"`
`"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"`	`"\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"`
`"\t\t\t\tr_dif_r+r_dif_i,\n"`	`"\t\t\t\tr_dif_r+r_dif_i,\n"`
`"\t\t\t\tir_coef_i+ir_coef_r,\n"`	`"\t\t\t\tir_coef_i+ir_coef_r,\n"`
`"\t\t\t\tp_three);\n"`	`"\t\t\t\tp_three);\n"`
`"\tend\n"`	`"\tend\n"`
`"\tendgenerate\n"`	`"\tendgenerate\n"`
`"\n"`	`"\n");`
	`fprintf(fp,`
	`"\t// These values are held in memory and delayed during the\n"`
	`"\t// multiply. Here, we recover them. During the multiply,\n"`
	`"\t// values were multiplied by 2^(CWIDTH-2)exp{-j2pi...},\n"`
	`"\t// therefore, the left_x values need to be right shifted by\n"`
	`"\t// CWIDTH-2 as well. The additional bits come from a sign\n"`
	`"\t// extension.\n"`
`"\twire aux;\n"`	`"\twire aux;\n"`
`"\twire [(IWIDTH+CWIDTH):0] left_i, left_r;\n"`	`"\twire\tsigned\t[(IWIDTH+CWIDTH):0] fifo_i, fifo_r;\n"`
`"\treg [(2*IWIDTH+2):0] fifo_read;\n"`	`"\treg\t\t[(2*IWIDTH+2):0] fifo_read;\n"`
`"\tassign left_r = { fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH){1'b0}} };\n"`	`"\tassign\tfifo_r = { {2{fifo_read[2(IWIDTH+1)-1]}}, fifo_read[(2(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"`
`"\tassign left_i = { fifo_read[((IWIDTH+1)-1):0], {(CWIDTH){1'b0}} };\n"`	`"\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"`
`"\tassign aux = fifo_read[2*IWIDTH+2];\n"`	`"\tassign\taux = fifo_read[2*IWIDTH+2];\n"`
`"\n"`	`"\n"`
`"\n"`	`"\n"`
`"\treg [(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i, b_right_r, b_right_i, mpy_r, mpy_i;\n"`	`"\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"`
`"\treg [(CWIDTH+IWIDTH+3-1):0] rnd;\n"`	`"\t\t\t\t\t\tb_right_r, b_right_i;\n"`
`"\tassign rnd = ((~ROUND)\|\|(SHIFT==0))?\n"`	`"\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"`
`"\t\t\t({(CWIDTH+IWIDTH+3){1'b0}})\n"`	`"\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] rnd;\n"`
`"\t\t\t: ({ {(OWIDTH+1+SHIFT){1'b0}},1'b1,{(CWIDTH+IWIDTH+3-2-OWIDTH-SHIFT){1'b0}} });\n"`	`"\tgenerate\n"`
	`"\tif ((~ROUND)\|\|(CWIDTH+IWIDTH-OWIDTH-SHIFT<1))\n"`
	`"\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"`
	`"\telse\n"`
	`"\t\tassign rnd = ({ {(OWIDTH+3+SHIFT){1'b0}},1'b1,\n"`
	`"\t\t\t\t{(CWIDTH+IWIDTH-OWIDTH-SHIFT-1){1'b0}} });\n"`
	`"\tendgenerate\n"`
	`"\n");`
	`fprintf(fp,`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\t// First clock, recover all values\n"`	`"\t\t\t// First clock, recover all values\n"`
`"\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"`	`"\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"`
`"\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"`	`"\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"`
	`"\t\t\t// although they only need to be (IWIDTH+1)\n"`
	`"\t\t\t// + (CWIDTH) bits wide. (We\'ve got two\n"`
	`"\t\t\t// extra bits we need to get rid of.)\n"`
`"\t\t\tmpy_r <= p_one - p_two;\n"`	`"\t\t\tmpy_r <= p_one - p_two;\n"`
`"\t\t\tmpy_i <= p_three - p_one - p_two;\n"`	`"\t\t\tmpy_i <= p_three - p_one - p_two;\n"`
`"\n"`	`"\n"`
`"\t\t\t// Second clock, round and latch for final clock\n"`	`"\t\t\t// Second clock, round and latch for final clock\n"`
`"\t\t\tb_right_r <= mpy_r + rnd;\n"`	`"\t\t\tb_right_r <= mpy_r + rnd;\n"`
`"\t\t\tb_right_i <= mpy_i + rnd;\n"`	`"\t\t\tb_right_i <= mpy_i + rnd;\n"`
`"\t\t\tb_left_r <= { {2{left_r[(IWIDTH+CWIDTH)]}},left_r } + rnd;\n"`	`"\t\t\tb_left_r <= { {2{fifo_r[(IWIDTH+CWIDTH)]}},fifo_r } + rnd;\n"`
`"\t\t\tb_left_i <= { {2{left_i[(IWIDTH+CWIDTH)]}},left_i } + rnd;\n"`	`"\t\t\tb_left_i <= { {2{fifo_i[(IWIDTH+CWIDTH)]}},fifo_i } + rnd;\n"`
`"\t\t\to_aux <= aux;\n"`	`"\t\t\to_aux <= aux;\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"\n"`	`"\n");`
	`fprintf(fp,`
`"\t// Final clock--clock and remove unnecessary bits.\n"`	`"\t// Final clock--clock and remove unnecessary bits.\n"`
`"\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to OWIDTH,\n"`	`"\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"`
`"\t// and SHIFT by SHIFT bits in the process.\n"`	`"\t// OWIDTH, and SHIFT by SHIFT bits in the process. The trick is\n"`
`"\tassign o_left_r = b_left_r[ (CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"`	`"\t// that we don\'t need (IWIDTH+CWIDTH+3) bits. We\'ve accumulated\n"`
`"\tassign o_left_i = b_left_i[ (CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"`	`"\t// them, but the actual values will never fill all these bits.\n"`
`"\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"`	`"\t// In particular, we only need:\n"`
`"\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"`	`"\t//\t IWIDTH bits for the input\n"`
`"\n"`	`"\t//\t +1 bit for the add/subtract\n"`
	`"\t//\t+CWIDTH bits for the coefficient multiply\n"`
	`"\t//\t +1 bit for the add/subtract in the complex multiply\n"`
	`"\t//\t ------\n"`
	`"\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"`
	`"\t//\n"`
	`"\t// However, the coefficient multiply multiplied by a maximum value\n"`
	`"\t// of 2^(CWIDTH-2). Thus, we only have\n"`
	`"\t//\t IWIDTH bits for the input\n"`
	`"\t//\t +1 bit for the add/subtract\n"`
	`"\t//\t+CWIDTH-2 bits for the coefficient multiply\n"`
	`"\t//\t +1 (optional) bit for the add/subtract in the cpx mpy.\n"`
	`"\t//\t -------- ... multiply. (This last bit may be shifted out.)\n"`
	`"\t//\t (IWIDTH+CWIDTH) valid output bits. \n"`
	`"\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"`
	`"\t// or if he wishes to arbitrarily shift some of these off (via\n"`
	`"\t// SHIFT) we accomplish that here.\n"`
	`"\tassign o_left_r = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"`
	`"\tassign o_left_i = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"`
	`"\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"`
	`"\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"`
	`"\n"`
	`"\t// As a final step, we pack our outputs into two packed two\'s\n"`
	`"\t// complement numbers per output word, so that each output word\n"`
	`"\t// has (2*OWIDTH) bits in it, with the top half being the real\n"`
	`"\t// portion and the bottom half being the imaginary portion.\n"`
`"\tassign o_left = { o_left_r, o_left_i };\n"`	`"\tassign o_left = { o_left_r, o_left_i };\n"`
`"\tassign o_right= { o_right_r,o_right_i};\n"`	`"\tassign o_right= { o_right_r,o_right_i};\n"`
`"\n"`	`"\n"`
`"endmodule\n");`	`"endmodule\n");`
`fclose(fp);`	`fclose(fp);`
Line 905...	Line 993...
`"\t\t\t\toB <= oB + 1;\n"`	`"\t\t\t\toB <= oB + 1;\n"`
`"\t\t\t\to_sync <= 1'b0;\n"`	`"\t\t\t\to_sync <= 1'b0;\n"`
`"\t\t\tend else\n"`	`"\t\t\tend else\n"`
`"\t\t\t\to_sync <= 1'b0;\n"`	`"\t\t\t\to_sync <= 1'b0;\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"\n"`	`"\n", (inv)?"i":"");`
	`fprintf(fstage,`
`"\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"`	`"\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"`
`"\t\t\t.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"`	`"\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"`
`"\t\tbfly(i_clk, (b_ce&i_ce), ib_c,\n"`	`"\t\tbfly(i_clk, (b_ce&i_ce), ib_c,\n"`
`"\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n"`	`"\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n"`
`"endmodule;\n",`	`"endmodule;\n",`
`(inv)?"i":"");`	`lgdelay(nbits, xtra), (1<xtra)?(nbits+4):(nbits+xtra+3));`
`}`	`}`

`void usage(void) {`	`void usage(void) {`
`fprintf(stderr,`	`fprintf(stderr,`
`"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"`	`"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"`

Line 23...

"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"

"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"

"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"

"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"

"// for more details.\n"

"// for more details.\n"

"//\n"

"//\n"

"// You should have received a copy of the GNU General Public License along\n"

"// You should have received a copy of the GNU General Public License along\n"

"// with this program.  If not, see <http://www.gnu.org/licenses/>.\n"

"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"

"// target there if the PDF file isn\'t present.)  If not, see\n"

"// <http://www.gnu.org/licenses/> for a copy.\n"

"//\n"

"// License:    GPL, v3, as defined and found on www.gnu.org,\n"

"// License:    GPL, v3, as defined and found on www.gnu.org,\n"

"//             http://www.gnu.org/licenses/gpl.html\n"

"//             http://www.gnu.org/licenses/gpl.html\n"

"//\n"

"//\n"

"//\n"

"//\n"

"///////////////////////////////////////////////////////////////////////////\n";

"///////////////////////////////////////////////////////////////////////////\n";

Line 53...

Line 56...

int     lgdelay(int nbits, int xtra) {

int     lgdelay(int nbits, int xtra) {

        int     cbits = nbits + xtra;

        int     cbits = nbits + xtra;

        int     delay = nbits + 2;

        int     delay = nbits + 2;

        if (nbits+1<cbits)

        if (nbits+1<cbits)

                delay = nbits+2;

                delay = nbits+4;

        else

        else

                delay = cbits+1;

                delay = cbits+3;

        return lgval(delay);

        return lgval(delay);

void    build_quarters(const char *fname) {

void    build_quarters(const char *fname) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

Line 74...

Line 77...

"//\n"

"//\n"

"// Filename:   qtrstage.v\n"

"// Filename:   qtrstage.v\n"

"//             \n"

"//             \n"

"// Project:    %s\n"

"// Project:    %s\n"

"//\n"

"//\n"

"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"

"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"

"//             be used by a FFT core compiler to generate FFTs which may be\n"

"//             frequency FFT.  This particular implementation is optimized\n"

"//             used as part of an FFT core.  Specifically, this file \n"

"//             so that all of the multiplies are accomplished by additions\n"

"//             encapsulates the options of a 4 point, decimation in\n"

"//             and multiplexers only.\n"

"//             frequency FFT-stage.  This particular stage is optimized so\n"

"//\n"

"//             that all of the multiplies are accomplished by additions and\n"

"//             mux'es.\n"

"//\n%s"

"//\n%s"

"//\n",

"//\n",

                prjname, creator);

                prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

Line 155...

Line 156...

"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"

"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"

"\n"

"\n"

"\t\t\t// In sequence, clock = 1\n"

"\t\t\t// In sequence, clock = 1\n"

"\t\t\tif (pipeline[1])\n"

"\t\t\tif (pipeline[1])\n"

"\t\t\tbegin\n"

"\t\t\tbegin\n"

"\t\t\t ob_a <= { sum_r[(IWIDTH):(IWIDTH+1-OWIDTH)],\n"

                        "\t\t\t\tob_a <= { sum_r[(IWIDTH):(IWIDTH+1-OWIDTH)],\n"

"\t\t\t\t\t\tsum_i[(IWIDTH):(IWIDTH+1-OWIDTH)] };\n"

"\t\t\t\t\t\tsum_i[(IWIDTH):(IWIDTH+1-OWIDTH)] };\n"

"\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"

"\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"

"\t\t\t\tif (~ODD)\n"

"\t\t\t\tif (~ODD)\n"

"\t\t\t\tbegin\n"

"\t\t\t\tbegin\n"

"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

Line 201...

Line 202...

"// Filename:   dblstage.v\n"

"// Filename:   dblstage.v\n"

"//\n"

"//\n"

"// Project:    %s\n"

"// Project:    %s\n"

"//\n"

"//\n"

"// Purpose:    This is part of an FPGA implementation that will process\n"

"// Purpose:    This is part of an FPGA implementation that will process\n"

"//             data at two samples per clock.  If you notice from the\n"

"//             the final stage of a decimate-in-frequency FFT, running\n"

"//             derivation of an FFT, the only time both even and odd\n"

"//             through the data at two samples per clock.  If you notice\n"

"//             samples are used at the same time is the first stage.\n"

"//             from the derivation of an FFT, the only time both even and\n"

"//             Therefore, after this stage and these twiddles, all of the\n"

"//             odd samples are used at the same time is in this stage.\n"

"//             other stages can run two stages at a time at one sample per\n"

"//             Therefore, other than this stage and these twiddles, all of\n"

"//             clock.\n"

"//             the other stages can run two stages at a time at one sample\n"

"//             per clock.\n"

"//\n"

"//\n"

"//             In this implementation, the output is valid one clock after\n"

"//             In this implementation, the output is valid one clock after\n"

"//             the input is valid.  The output also accumulates one bit\n"

"//             the input is valid.  The output also accumulates one bit\n"

"//             above and beyond the number of bits in the input.\n"

"//             above and beyond the number of bits in the input.\n"

"//             \n"

"//             \n"

Line 240...

Line 242...

"\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"

"\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"

"\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"

"\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"

"\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"

"\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"

"\t\t\t\t\to_out_1r, o_out_1i;\n"

"\t\t\t\t\to_out_1r, o_out_1i;\n"

"\n"

"\n"

"\t// Don't forget that we accumulate a bit by adding two values together.\n"

        "\t// Don't forget that we accumulate a bit by adding two values\n"

"\t// Therefore our intermediate value must have one more bit than the\n"

        "\t// together. Therefore our intermediate value must have one more\n"

"\t// two originals.\n"

        "\t// bit than the two originals.\n"

"\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"

"\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"

"\n"

"\n"

"\talways @(posedge i_clk)\n"

"\talways @(posedge i_clk)\n"

"\t\tif (i_ce)\n"

"\t\tif (i_ce)\n"

"\t\tbegin\n"

"\t\tbegin\n"

Line 255...

Line 257...

"\t\t\t//\n"

"\t\t\t//\n"

"\t\t\tout_1r <= i_in_0r - i_in_1r;\n"

"\t\t\tout_1r <= i_in_0r - i_in_1r;\n"

"\t\t\tout_1i <= i_in_0i - i_in_1i;\n"

"\t\t\tout_1i <= i_in_0i - i_in_1i;\n"

"\t\tend\n"

"\t\tend\n"

"\n"

"\n"

"\t// Now, if the master control program doesn't want to keep all of our\n"

        "\t// Now, if the master control program doesn't want to keep all of\n"

"\t// bits, we can shift down to OWIDTH bits here.\n"

        "\t// our bits, we can shift down to OWIDTH bits here.\n"

"\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\n"

"\n"

Line 324...

Line 326...

        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"

        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"

        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"

        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"

        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"

        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"

        "\tgenvar k;\n"

        "\tgenvar k;\n"

"\n"

"\n"

        "\t// If we were forced to stay within two\'s complement arithmetic,\n"

        "\t// taking the absolute value here would require an additional bit.\n"

        "\t// However, because our results are now unsigned, we can stay\n"

        "\t// within the number of bits given (for now).\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"

                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"

                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"

                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"

Line 429...

Line 435...

"//     mem[11xxx1] = s_1[m]\n"

"//     mem[11xxx1] = s_1[m]\n"

"//     o_0[m] = mem[00xxx1]\n"

"//     o_0[m] = mem[00xxx1]\n"

"//     o_1[m] = mem[01xxx1]\n"

"//     o_1[m] = mem[01xxx1]\n"

"//     ...\n"

"//     ...\n"

"//\n"

"//\n"

"//     The answer is that, yes we can but: we need to use four memory banks\n"

"//     to do it properly.  These four banks are defined by the two bits\n"

"//     that determine the top and bottom of the correct address.  Larger\n"

"//     FFT\'s would require more memories.\n"

"//\n"

"//\n");

"//\n");

        fprintf(fp,

        fprintf(fp,

"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"

"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"

"\t\to_out_0, o_out_1, o_sync);\n"

"\t\to_out_0, o_out_1, o_sync);\n"

"\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"

"\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"

Line 578...

Line 589...

        fprintf(fp,

        fprintf(fp,

"module\tbutterfly(i_clk, i_ce, i_coef, i_left, i_right, i_aux,\n"

"module\tbutterfly(i_clk, i_ce, i_coef, i_left, i_right, i_aux,\n"

"\t\to_left, o_right, o_aux);\n"

"\t\to_left, o_right, o_aux);\n"

"\t// Public changeable parameters ...\n"

"\t// Public changeable parameters ...\n"

"\tparameter IWIDTH=16,CWIDTH=IWIDTH,OWIDTH=IWIDTH;\n"

        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+4,OWIDTH=IWIDTH+1;\n"

"\t// Parameters specific to the core that should not be changed.\n"

"\t// Parameters specific to the core that should not be changed.\n"

"\tparameter    MPYDELAY=(IWIDTH+1 < CWIDTH)?(IWIDTH+2):(CWIDTH+1),\n"

        "\tparameter    MPYDELAY=5'd20, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"

"\t\t\tSHIFT=0, ROUND=1;\n"

                        "\t\t\tSHIFT=0, ROUND=0;\n"

"\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"

"\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"

"\t// this value is fractional, then round up to the nearest\n"

"\t// this value is fractional, then round up to the nearest\n"

"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"

"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"

"\tparameter    LGDELAY=5;\n"

        "\tparameter\tLGDELAY=5;\n"

"\tinput                i_clk, i_ce;\n"

        "\tinput\t\ti_clk, i_ce;\n"

"\tinput                [(2*CWIDTH-1):0] i_coef;\n"

        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"

"\tinput                [(2*IWIDTH-1):0] i_left, i_right;\n"

        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"

"\tinput                i_aux;\n"

        "\tinput\t\ti_aux;\n"

"\toutput       wire    [(2*OWIDTH-1):0] o_left, o_right;\n"

        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"

"\toutput       wire    o_aux;\n"

        "\toutput\twire o_aux;\n"

"\n"

"\n"

"\twire [(OWIDTH-1):0]  o_left_r, o_left_i, o_right_r, o_right_i;\n"

        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"

"\n"

"\n"

"\treg  [(2*IWIDTH-1):0]        r_left, r_right;\n"

        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"

"\treg                          r_aux, r_aux_2;\n"

        "\treg\t\t\t\tr_aux, r_aux_2;\n"

"\treg  [(2*CWIDTH-1):0]        r_coef, r_coef_2;\n"

        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"

"\twire [(CWIDTH-1):0]          r_coef_r, r_coef_i;\n"

        "\twire\tsigned\t[(CWIDTH-1):0]\tr_coef_r, r_coef_i;\n"

"\tassign       r_coef_r  = r_coef_2[ (2*CWIDTH-1):(CWIDTH)];\n"

        "\tassign\tr_coef_r  = r_coef_2[ (2*CWIDTH-1):(CWIDTH)];\n"

"\tassign       r_coef_i  = r_coef_2[ (CWIDTH-1):0];\n"

        "\tassign\tr_coef_i  = r_coef_2[ (  CWIDTH-1):0];\n"

"\twire [(IWIDTH-1):0]  r_left_r, r_left_i, r_right_r, r_right_i;\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"

"\tassign       r_left_r  = i_left[ (2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"

"\tassign       r_left_i  = i_left[ (IWIDTH-1):0];\n"

        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"

"\tassign       r_right_r = i_right[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"

"\tassign       r_right_i = i_right[(IWIDTH-1):0];\n"

        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"

"\n"

"\n"

"\treg  [(IWIDTH):0]    r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"

        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"

"\n"

"\n"

"\treg  [(LGDELAY-1):0] fifo_addr;\n"

"\treg  [(LGDELAY-1):0] fifo_addr;\n"

"\twire [(LGDELAY-1):0] fifo_read_addr;\n"

"\twire [(LGDELAY-1):0] fifo_read_addr;\n"

"\t/* verilator lint_off WIDTH */\n"

"\tassign fifo_read_addr = fifo_addr - MPYDELAY;\n"

"\tassign fifo_read_addr = fifo_addr - MPYDELAY;\n"

"\t/* verilator lint_on WIDTH */\n"

"\treg  [(2*IWIDTH+2):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"

"\treg  [(2*IWIDTH+2):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"

"\n"

"\n");

        fprintf(fp,

        "\t// Set up the input to the multiply\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\t// One clock just latches the inputs\n"

                        "\t\t\t// One clock just latches the inputs\n"

                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"

                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"

Line 633...

Line 644...

                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"

                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"

                        "\t\t\t// Other inputs are simply delayed on second clock\n"

                        "\t\t\t// Other inputs are simply delayed on second clock\n"

                        "\t\t\tr_aux_2 <= r_aux;\n"

                        "\t\t\tr_aux_2 <= r_aux;\n"

                        "\t\t\tr_coef_2<= r_coef;\n"

                        "\t\t\tr_coef_2<= r_coef;\n"

        "\t\tend\n"

        "\t\tend\n"

"\n"

"\n");

        fprintf(fp,

        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"

        "\t// to be multiplied, but yet we still need the results in sync\n"

        "\t// with the answer when it is ready.\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\t// Need to delay the sum side--nothing else happens\n"

                        "\t\t\t// Need to delay the sum side--nothing else happens\n"

                        "\t\t\t// to it, but it needs to stay synchronized with the\n"

                        "\t\t\t// to it, but it needs to stay synchronized with the\n"

                        "\t\t\t// right side.\n"

                        "\t\t\t// right side.\n"

                        "\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"

                        "\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"

                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"

                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"

                "\t\tend\n"

                "\t\tend\n"

"\n"

"\n"

        "\twire [(CWIDTH-1):0]  ir_coef_r, ir_coef_i;\n"

        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"

        "\tassign ir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"

        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"

        "\tassign ir_coef_i = r_coef_2[(CWIDTH-1):0];\n"

        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"

        "\twire [(IWIDTH+CWIDTH+1+2-1):0]       p_one, p_two, p_three;\n"

        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"

"\n"

"\n"

        "\t// Multiply output is always a width of IWIDTH+CWIDTH-1.  ALWAYS.\n"

"\n");

        "\t// We take care of dropping the width to OWIDTH in our routine\n"

        fprintf(fp,

        "\t// below, but this is the definition of a multiply.\n"

        "\t// Multiply output is always a width of the sum of the widths of\n"

"\n"

        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"

"\n"

        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"

"\n"

        "\t// accumulate a bit (or two) each.  However, this approach to a\n"

"// This should really be based upon an IF\n"

        "\t// three multiply complex multiply cannot increase the total\n"

"// if (IWIDTH < CWIDTH) then ...\n"

        "\t// number of bits in our final output.  We\'ll take care of\n"

        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"

        "\t// below.\n"

"\n"

"\n");

        fprintf(fp,

        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"

        "\t// by doing three multiplies we accomplish the work of four.\n"

        "\t// Let\'s prove to ourselves that this works ... We wish to\n"

        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"

        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"

        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"

        "\t// We do this by calculating the intermediate products P1, P2,\n"

        "\t// and P3 as\n"

        "\t//\tP1 = ac\n"

        "\t//\tP2 = bd\n"

        "\t//\tP3 = (a + b) * (c + d)\n"

        "\t// and then complete our final answer with\n"

        "\t//\tac - bd = P1 - P2 (this checks)\n"

        "\t//\tad + bc = P3 - P2 - P1\n"

        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"

        "\t//\t        = bc + ad (this checks)\n"

"\n"

"\n");

        fprintf(fp,

        "\t// This should really be based upon an IF, such as in\n"

        "\t// if (IWIDTH < CWIDTH) then ...\n"

        "\t// However, this is the only (other) way I know to do it.\n"

        "\tgenerate\n"

        "\tgenerate\n"

        "\tif (CWIDTH < IWIDTH+1)\n"

        "\tif (CWIDTH < IWIDTH+1)\n"

        "\tbegin\n"

        "\tbegin\n"

                "\t\t// We need to pad these first two multiplies by an extra\n"

                "\t\t// We need to pad these first two multiplies by an extra\n"

                "\t\t// just to keep them aligned with the third, simpler,\n"

                "\t\t// bit just to keep them aligned with the third,\n"

                "\t\t// multiply.\n"

                "\t\t// simpler, multiply.\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"

                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"

                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"

                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"

                        "\t\t\t\tir_coef_i+ir_coef_r, r_dif_r + r_dif_i, p_three);\n"

                        "\t\t\t\tir_coef_i+ir_coef_r,\n"

                        "\t\t\t\tr_dif_r + r_dif_i,\n"

                        "\t\t\t\tp_three);\n"

        "\tend else begin\n"

        "\tend else begin\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"

                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"

                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_two);\n"

                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"

                                "\t\t\t\tr_dif_r+r_dif_i,\n"

                                "\t\t\t\tr_dif_r+r_dif_i,\n"

                                "\t\t\t\tir_coef_i+ir_coef_r,\n"

                                "\t\t\t\tir_coef_i+ir_coef_r,\n"

                                "\t\t\t\tp_three);\n"

                                "\t\t\t\tp_three);\n"

        "\tend\n"

        "\tend\n"

        "\tendgenerate\n"

        "\tendgenerate\n"

"\n"

"\n");

        fprintf(fp,

        "\t// These values are held in memory and delayed during the\n"

        "\t// multiply.  Here, we recover them.  During the multiply,\n"

        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"

        "\t// therefore, the left_x values need to be right shifted by\n"

        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"

        "\t// extension.\n"

        "\twire aux;\n"

        "\twire aux;\n"

        "\twire [(IWIDTH+CWIDTH):0]     left_i, left_r;\n"

        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"

        "\treg  [(2*IWIDTH+2):0]        fifo_read;\n"

        "\treg\t\t[(2*IWIDTH+2):0]      fifo_read;\n"

        "\tassign       left_r = { fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH){1'b0}} };\n"

        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"

        "\tassign       left_i = { fifo_read[((IWIDTH+1)-1):0], {(CWIDTH){1'b0}} };\n"

        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"

        "\tassign       aux = fifo_read[2*IWIDTH+2];\n"

        "\tassign\taux = fifo_read[2*IWIDTH+2];\n"

"\n"

"\n"

"\n"

"\n"

        "\treg [(CWIDTH+IWIDTH+3-1):0]  b_left_r, b_left_i, b_right_r, b_right_i, mpy_r, mpy_i;\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"

        "\treg  [(CWIDTH+IWIDTH+3-1):0] rnd;\n"

                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"

        "\tassign rnd = ((~ROUND)||(SHIFT==0))?\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"

        "\t\t\t({(CWIDTH+IWIDTH+3){1'b0}})\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] rnd;\n"

        "\t\t\t: ({ {(OWIDTH+1+SHIFT){1'b0}},1'b1,{(CWIDTH+IWIDTH+3-2-OWIDTH-SHIFT){1'b0}} });\n"

        "\tgenerate\n"

        "\tif ((~ROUND)||(CWIDTH+IWIDTH-OWIDTH-SHIFT<1))\n"

                "\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"

        "\telse\n"

                "\t\tassign rnd = ({ {(OWIDTH+3+SHIFT){1'b0}},1'b1,\n"

                "\t\t\t\t{(CWIDTH+IWIDTH-OWIDTH-SHIFT-1){1'b0}} });\n"

        "\tendgenerate\n"

"\n");

        fprintf(fp,

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\t// First clock, recover all values\n"

                        "\t\t\t// First clock, recover all values\n"

                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"

                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"

                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"

                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"

                        "\t\t\t// although they only need to be (IWIDTH+1)\n"

                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"

                        "\t\t\t// extra bits we need to get rid of.)\n"

                        "\t\t\tmpy_r <= p_one - p_two;\n"

                        "\t\t\tmpy_r <= p_one - p_two;\n"

                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"

                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"

"\n"

"\n"

                        "\t\t\t// Second clock, round and latch for final clock\n"

                        "\t\t\t// Second clock, round and latch for final clock\n"

                        "\t\t\tb_right_r <= mpy_r + rnd;\n"

                        "\t\t\tb_right_r <= mpy_r + rnd;\n"

                        "\t\t\tb_right_i <= mpy_i + rnd;\n"

                        "\t\t\tb_right_i <= mpy_i + rnd;\n"

                        "\t\t\tb_left_r <= { {2{left_r[(IWIDTH+CWIDTH)]}},left_r } + rnd;\n"

                        "\t\t\tb_left_r <= { {2{fifo_r[(IWIDTH+CWIDTH)]}},fifo_r } + rnd;\n"

                        "\t\t\tb_left_i <= { {2{left_i[(IWIDTH+CWIDTH)]}},left_i } + rnd;\n"

                        "\t\t\tb_left_i <= { {2{fifo_i[(IWIDTH+CWIDTH)]}},fifo_i } + rnd;\n"

                        "\t\t\to_aux <= aux;\n"

                        "\t\t\to_aux <= aux;\n"

                "\t\tend\n"

                "\t\tend\n"

"\n"

"\n");

        fprintf(fp,

        "\t// Final clock--clock and remove unnecessary bits.\n"

        "\t// Final clock--clock and remove unnecessary bits.\n"

        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to OWIDTH,\n"

        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"

        "\t// and SHIFT by SHIFT bits in the process.\n"

        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"

        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"

        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"

        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"

        "\t// them, but the actual values will never fill all these bits.\n"

        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"

        "\t// In particular, we only need:\n"

        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"

        "\t//\t IWIDTH bits for the input\n"

"\n"

        "\t//\t     +1 bit for the add/subtract\n"

        "\t//\t+CWIDTH bits for the coefficient multiply\n"

        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"

        "\t//\t ------\n"

        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"

        "\t//\n"

        "\t// However, the coefficient multiply multiplied by a maximum value\n"

        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"

        "\t//\t   IWIDTH bits for the input\n"

        "\t//\t       +1 bit for the add/subtract\n"

        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"

        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"

        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"

        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"

        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"

        "\t// or if he wishes to arbitrarily shift some of these off (via\n"

        "\t// SHIFT) we accomplish that here.\n"

        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

"\n"

        "\t// As a final step, we pack our outputs into two packed two\'s\n"

        "\t// complement numbers per output word, so that each output word\n"

        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"

        "\t// portion and the bottom half being the imaginary portion.\n"

        "\tassign       o_left = { o_left_r, o_left_i };\n"

        "\tassign       o_left = { o_left_r, o_left_i };\n"

        "\tassign       o_right= { o_right_r,o_right_i};\n"

        "\tassign       o_right= { o_right_r,o_right_i};\n"

"\n"

"\n"

"endmodule\n");

"endmodule\n");

        fclose(fp);

        fclose(fp);

Line 905...

Line 993...

                        "\t\t\t\toB <= oB + 1;\n"

                        "\t\t\t\toB <= oB + 1;\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

                "\t\t\tend else\n"

                "\t\t\tend else\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

        "\t\tend\n"

        "\t\tend\n"

"\n"

"\n", (inv)?"i":"");

        fprintf(fstage,

"\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"

"\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"

"\t\t\t.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"

"\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"

"\t\tbfly(i_clk, (b_ce&i_ce), ib_c,\n"

"\t\tbfly(i_clk, (b_ce&i_ce), ib_c,\n"

"\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n"

"\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n"

"endmodule;\n",

"endmodule;\n",

        (inv)?"i":"");

        lgdelay(nbits, xtra), (1<xtra)?(nbits+4):(nbits+xtra+3));

void    usage(void) {

void    usage(void) {

        fprintf(stderr,

        fprintf(stderr,

"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"

"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"

Browse

Tools

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Diff between revs 2 and 5