OpenCores

Rev 9	Rev 14
`#include <stdio.h>`	`#include <stdio.h>`
`#include <stdlib.h>`	`#include <stdlib.h>`
`#include <unistd.h>`	`#include <unistd.h>`
`#include <sys/stat.h>`	`#include <sys/stat.h>`
`#include <string.h>`	`#include <string.h>`
	`#include <string>`
`#include <math.h>`	`#include <math.h>`
`#include <ctype.h>`	`#include <ctype.h>`
`#include <assert.h>`	`#include <assert.h>`

`#define COREDIR "fft-core"`	`#define COREDIR "fft-core"`

`const char cpyleft[] =`	`const char cpyleft[] =`
`"///////////////////////////////////////////////////////////////////////////\n"`	`"///////////////////////////////////////////////////////////////////////////\n"`
`"//\n"`	`"//\n"`
`"// Copyright (C) 2015, Gisselquist Technology, LLC\n"`	`"// Copyright (C) 2015, Gisselquist Technology, LLC\n"`
`"//\n"`	`"//\n"`
`"// This program is free software (firmware): you can redistribute it and/or\n"`	`"// This program is free software (firmware): you can redistribute it and/or\n"`
`"// modify it under the terms of the GNU General Public License as published\n"`	`"// modify it under the terms of the GNU General Public License as published\n"`
`"// by the Free Software Foundation, either version 3 of the License, or (at\n"`	`"// by the Free Software Foundation, either version 3 of the License, or (at\n"`
`"// your option) any later version.\n"`	`"// your option) any later version.\n"`
`"//\n"`	`"//\n"`
`"// This program is distributed in the hope that it will be useful, but WITHOUT\n"`	`"// This program is distributed in the hope that it will be useful, but WITHOUT\n"`
`"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"`	`"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"`
`"// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License\n"`	`"// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License\n"`
`"// for more details.\n"`	`"// for more details.\n"`
`"//\n"`	`"//\n"`
`"// You should have received a copy of the GNU General Public License along\n"`	`"// You should have received a copy of the GNU General Public License along\n"`
`"// with this program. (It's in the $(ROOT)/doc directory, run make with no\n"`	`"// with this program. (It's in the $(ROOT)/doc directory, run make with no\n"`
`"// target there if the PDF file isn\'t present.) If not, see\n"`	`"// target there if the PDF file isn\'t present.) If not, see\n"`
`"// <http://www.gnu.org/licenses/> for a copy.\n"`	`"// <http://www.gnu.org/licenses/> for a copy.\n"`
`"//\n"`	`"//\n"`
`"// License: GPL, v3, as defined and found on www.gnu.org,\n"`	`"// License: GPL, v3, as defined and found on www.gnu.org,\n"`
`"// http://www.gnu.org/licenses/gpl.html\n"`	`"// http://www.gnu.org/licenses/gpl.html\n"`
`"//\n"`	`"//\n"`
`"//\n"`	`"//\n"`
`"///////////////////////////////////////////////////////////////////////////\n";`	`"///////////////////////////////////////////////////////////////////////////\n";`
`const char prjname[] = "A Doubletime Pipelined FFT\n";`	`const char prjname[] = "A Doubletime Pipelined FFT";`
`const char creator[] = "// Creator: Dan Gisselquist, Ph.D.\n"`	`const char creator[] = "// Creator: Dan Gisselquist, Ph.D.\n"`
`"// Gisselquist Tecnology, LLC\n";`	`"// Gisselquist Tecnology, LLC\n";`

`int lgval(int vl) {`	`int lgval(int vl) {`
`int lg;`	`int lg;`

`for(lg=1; (1<<lg) < vl; lg++)`	`for(lg=1; (1<<lg) < vl; lg++)`
`;`	`;`
`return lg;`	`return lg;`
`}`	`}`

`int nextlg(int vl) {`	`int nextlg(int vl) {`
`int r;`	`int r;`

`for(r=1; r<vl; r<<=1)`	`for(r=1; r<vl; r<<=1)`
`;`	`;`
`return r;`	`return r;`
`}`	`}`

`int lgdelay(int nbits, int xtra) {`	`int bflydelay(int nbits, int xtra) {`
`int cbits = nbits + xtra;`	`int cbits = nbits + xtra;`
`int delay = nbits + 2;`	`int delay;`
`if (nbits+1<cbits)`	`if (nbits+1<cbits)`
`delay = nbits+4;`	`delay = nbits+4;`
`else`	`else`
`delay = cbits+3;`	`delay = cbits+3;`
`return lgval(delay);`	`return delay;`
	`}`

	`int lgdelay(int nbits, int xtra) {`
	`// The butterfly code needs to compare a valid address, of this`
	`// many bits, with an address two greater. This guarantees we`
	`// have enough bits for that comparison. We'll also end up with`
	`// more storage space to look for these values, but without a`
	`// redesign that's just what we'll deal with.`
	`return lgval(bflydelay(nbits, xtra)+3);`
`}`	`}`

`void build_quarters(const char *fname) {`	`void build_quarters(const char *fname) {`
`FILE *fp = fopen(fname, "w");`	`FILE *fp = fopen(fname, "w");`
`if (NULL == fp) {`	`if (NULL == fp) {`
`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
`perror("O/S Err was:");`	`perror("O/S Err was:");`
`return;`	`return;`
`}`	`}`

`fprintf(fp,`	`fprintf(fp,`
`"///////////////////////////////////////////////////////////////////////////\n"`	`"///////////////////////////////////////////////////////////////////////////\n"`
`"//\n"`	`"//\n"`
`"// Filename: qtrstage.v\n"`	`"// Filename: qtrstage.v\n"`
`"// \n"`	`"// \n"`
`"// Project: %s\n"`	`"// Project: %s\n"`
`"//\n"`	`"//\n"`
`"// Purpose: This file encapsulates the 4 point stage of a decimation in\n"`	`"// Purpose: This file encapsulates the 4 point stage of a decimation in\n"`
`"// frequency FFT. This particular implementation is optimized\n"`	`"// frequency FFT. This particular implementation is optimized\n"`
`"// so that all of the multiplies are accomplished by additions\n"`	`"// so that all of the multiplies are accomplished by additions\n"`
`"// and multiplexers only.\n"`	`"// and multiplexers only.\n"`
`"//\n"`	`"//\n"`
`"//\n%s"`	`"//\n%s"`
`"//\n",`	`"//\n",`
`prjname, creator);`	`prjname, creator);`
`fprintf(fp, "%s", cpyleft);`	`fprintf(fp, "%s", cpyleft);`

`fprintf(fp,`	`fprintf(fp,`
`"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"`	`"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"`
`"\tparameter IWIDTH=16, OWIDTH=IWIDTH+1;\n"`	`"\tparameter IWIDTH=16, OWIDTH=IWIDTH+1;\n"`
`"\t// Parameters specific to the core that should be changed when this\n"`	`"\t// Parameters specific to the core that should be changed when this\n"`
`"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller \n"`	`"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller \n"`
`"\t// spans must use the fftdoubles stage.\n"`	`"\t// spans must use the fftdoubles stage.\n"`
`"\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0,ROUND=0;\n"`	`"\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0,ROUND=0;\n"`
`"\tinput\t i_clk, i_rst, i_ce, i_sync;\n"`	`"\tinput\t i_clk, i_rst, i_ce, i_sync;\n"`
`"\tinput\t [(2*IWIDTH-1):0] i_data;\n"`	`"\tinput\t [(2*IWIDTH-1):0] i_data;\n"`
`"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"`	`"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"`
`"\toutput\treg o_sync;\n"`	`"\toutput\treg o_sync;\n"`
`"\t\n"`	`"\t\n");`
	`fprintf(fp,`
`"\treg\t wait_for_sync;\n"`	`"\treg\t wait_for_sync;\n"`
`"\treg\t[2:0] pipeline;\n"`	`"\treg\t[2:0] pipeline;\n"`
`"\n"`	`"\n"`
`"\treg\t[(IWIDTH):0] sum_r, sum_i, diff_r, diff_i;\n"`	`"\treg\t[(IWIDTH):0] sum_r, sum_i, diff_r, diff_i;\n"`
`"\twire\t[(IWIDTH):0] n_diff_i;\n"`	`"\twire\t[(IWIDTH):0] n_diff_r, n_diff_i;\n"`
	`"\tassign n_diff_r = -diff_r;\n"`
`"\tassign n_diff_i = -diff_i;\n"`	`"\tassign n_diff_i = -diff_i;\n"`
`"\n"`	`"\n"`
`"\treg\t[(2*OWIDTH-1):0] ob_a;\n"`	`"\treg\t[(2*OWIDTH-1):0] ob_a;\n"`
`"\twire\t[(2*OWIDTH-1):0] ob_b;\n"`	`"\twire\t[(2*OWIDTH-1):0] ob_b;\n"`
`"\treg\t[(OWIDTH-1):0] ob_b_r, ob_b_i;\n"`	`"\treg\t[(OWIDTH-1):0] ob_b_r, ob_b_i;\n"`
`"\tassign ob_b = { ob_b_r, ob_b_i };\n"`	`"\tassign ob_b = { ob_b_r, ob_b_i };\n"`
`"\n"`	`"\n"`
`"\treg\t[(LGWIDTH-1):0] iaddr;\n"`	`"\treg\t[(LGWIDTH-1):0] iaddr;\n"`
`"\treg\t[(2*IWIDTH-1):0] imem;\n"`	`"\treg\t[(2*IWIDTH-1):0] imem;\n"`
`"\n"`	`"\n"`
`"\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"`	`"\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"`
`"\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"`	`"\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"`
`"\tassign\timem_i = imem[(IWIDTH-1):0];\n"`	`"\tassign\timem_i = imem[(IWIDTH-1):0];\n"`
`"\n"`	`"\n"`
`"\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"`	`"\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"`
`"\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"`	`"\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"`
`"\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"`	`"\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"`
`"\n"`	`"\n"`
`"\treg [(2*OWIDTH-1):0] omem;\n"`	`"\treg [(2*OWIDTH-1):0] omem;\n"`
`"\n"`	`"\n");`
	`fprintf(fp,`
`"\twire [(IWIDTH-1):0] rnd;\n"`	`"\twire [(IWIDTH-1):0] rnd;\n"`
`"\tgenerate\n"`	`"\tgenerate\n"`
`"\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"`	`"\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"`
`"\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"`	`"\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"`
`"\telse\n"`	`"\telse\n"`
`"\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"`	`"\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"`
`"\tendgenerate\n"`	`"\tendgenerate\n"`
`"\n"`	`"\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_rst)\n"`	`"\t\tif (i_rst)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\twait_for_sync <= 1'b1;\n"`	`"\t\t\twait_for_sync <= 1'b1;\n"`
`"\t\t\tiaddr <= 0;\n"`	`"\t\t\tiaddr <= 0;\n"`
`"\t\t\tpipeline <= 3'b000;\n"`	`"\t\t\tpipeline <= 3'b000;\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"\t\telse if ((i_ce)&&((~wait_for_sync)\|\|(i_sync)))\n"`	`"\t\telse if ((i_ce)&&((~wait_for_sync)\|\|(i_sync)))\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\t// Always\n"`	`"\t\t\t// Always\n"`
`"\t\t\timem <= i_data;\n"`	`"\t\t\timem <= i_data;\n"`
`"\t\t\tiaddr <= iaddr + 1;\n"`	`"\t\t\tiaddr <= iaddr + 1;\n"`
`"\t\t\twait_for_sync <= 1'b0;\n"`	`"\t\t\twait_for_sync <= 1'b0;\n"`
`"\n"`	`"\n"`
`"\t\t\t// In sequence, clock = 0\n"`	`"\t\t\t// In sequence, clock = 0\n"`
`"\t\t\tif (iaddr[0])\n"`	`"\t\t\tif (iaddr[0])\n"`
`"\t\t\tbegin\n"`	`"\t\t\tbegin\n"`
`"\t\t\t\tsum_r <= imem_r + i_data_r + rnd;\n"`	`"\t\t\t\tsum_r <= imem_r + i_data_r + rnd;\n"`
`"\t\t\t\tsum_i <= imem_i + i_data_i + rnd;\n"`	`"\t\t\t\tsum_i <= imem_i + i_data_i + rnd;\n"`
`"\t\t\t\tdiff_r <= imem_r - i_data_r + rnd;\n"`	`"\t\t\t\tdiff_r <= imem_r - i_data_r + rnd;\n"`
`"\t\t\t\tdiff_i <= imem_i - i_data_i + rnd;\n"`	`"\t\t\t\tdiff_i <= imem_i - i_data_i + rnd;\n"`
`"\n"`	`"\n"`
`"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b1 };\n"`	`"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b1 };\n"`
`"\t\t\tend else\n"`	`"\t\t\tend else\n"`
`"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"`	`"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"`
`"\n"`	`"\n"`
`"\t\t\t// In sequence, clock = 1\n"`	`"\t\t\t// In sequence, clock = 1\n"`
`"\t\t\tif (pipeline[1])\n"`	`"\t\t\tif (pipeline[1])\n"`
`"\t\t\tbegin\n"`	`"\t\t\tbegin\n"`
`"\t\t\t\tob_a <= { sum_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)],\n"`	`"\t\t\t\tob_a <= { sum_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)],\n"`
`"\t\t\t\t\t\tsum_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)] };\n"`	`"\t\t\t\t\t\tsum_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)] };\n"`
`"\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"`	`"\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"`
`"\t\t\t\tif (ODD == 0)\n"`	`"\t\t\t\tif (ODD == 0)\n"`
`"\t\t\t\tbegin\n"`	`"\t\t\t\tbegin\n"`
`"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\t\t\t\t\tob_b_i <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\t\t\t\t\tob_b_i <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`// "\t\t\t\t\tob_b_r <= { (OWIDTH) {1'b0} };\n"`	`// "\t\t\t\t\tob_b_r <= { (OWIDTH) {1'b0} };\n"`
`// "\t\t\t\t\tob_b_i <= { (OWIDTH) {1'b0} };\n"`	`// "\t\t\t\t\tob_b_i <= { (OWIDTH) {1'b0} };\n"`
`"\t\t\t\tend else if (~INVERSE) begin\n"`	`"\t\t\t\tend else if (INVERSE==0) begin\n"`
`"\t\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"`	`"\t\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"`
`"\t\t\t\t\tob_b_r <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\t\t\t\t\tob_b_r <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\t\t\t\t\tob_b_i <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\t\t\t\t\tob_b_i <= n_diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`// "\t\t\t\t\tob_b_r <= { (OWIDTH) {1'b0} };\n"`	`// "\t\t\t\t\tob_b_r <= { (OWIDTH) {1'b0} };\n"`
`// "\t\t\t\t\tob_b_i <= { (OWIDTH) {1'b0} };\n"`	`// "\t\t\t\t\tob_b_i <= { (OWIDTH) {1'b0} };\n"`
`"\t\t\t\tend else begin\n"`	`"\t\t\t\tend else begin\n"`
`"\t\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"`	`"\t\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"`
`"\t\t\t\t\tob_b_r <= n_diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\t\t\t\t\tob_b_r <= n_diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\t\t\t\t\tob_b_i <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\t\t\t\t\tob_b_i <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`// "\t\t\t\t\tob_b_r <= { (OWIDTH) {1'b0} };\n"`	`// "\t\t\t\t\tob_b_r <= { (OWIDTH) {1'b0} };\n"`
`// "\t\t\t\t\tob_b_i <= { (OWIDTH) {1'b0} };\n"`	`// "\t\t\t\t\tob_b_i <= { (OWIDTH) {1'b0} };\n"`

`"\t\t\t\tend\n"`	`"\t\t\t\tend\n"`
`"\t\t\t\t// (wire) ob_b <= { ob_b_r, ob_b_i };\n"`	`"\t\t\t\t// (wire) ob_b <= { ob_b_r, ob_b_i };\n"`
`"\t\t\tend\n"`	`"\t\t\tend\n"`
`"\t\t\t// In sequence, clock = 2\n"`	`"\t\t\t// In sequence, clock = 2\n"`
`"\t\t\tif (pipeline[2])\n"`	`"\t\t\tif (pipeline[2])\n"`
`"\t\t\tbegin\n"`	`"\t\t\tbegin\n"`
`"\t\t\t\tomem <= ob_b;\n"`	`"\t\t\t\tomem <= ob_b;\n"`
`"\t\t\t\to_data <= ob_a;\n"`	`"\t\t\t\to_data <= ob_a;\n"`
`"\t\t\tend else\n"`	`"\t\t\tend else\n"`
`"\t\t\t\to_data <= omem;\n"`	`"\t\t\t\to_data <= omem;\n"`
`"\t\t\t// Don\'t forget in the sync check that we are running\n"`	`"\t\t\t// Don\'t forget in the sync check that we are running\n"`
`"\t\t\t// at two clocks per sample. Thus we need to\n"`	`"\t\t\t// at two clocks per sample. Thus we need to\n"`
`"\t\t\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"`	`"\t\t\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"`
`"\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b100);\n"`	`"\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b100);\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"endmodule\n");`	`"endmodule\n");`
`}`	`}`

`void build_dblstage(const char *fname) {`	`void build_dblstage(const char *fname) {`
`FILE *fp = fopen(fname, "w");`	`FILE *fp = fopen(fname, "w");`
`if (NULL == fp) {`	`if (NULL == fp) {`
`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
`perror("O/S Err was:");`	`perror("O/S Err was:");`
`return;`	`return;`
`}`	`}`

`fprintf(fp,`	`fprintf(fp,`
`"///////////////////////////////////////////////////////////////////////////\n"`	`"///////////////////////////////////////////////////////////////////////////\n"`
`"//\n"`	`"//\n"`
`"// Filename: dblstage.v\n"`	`"// Filename: dblstage.v\n"`
`"//\n"`	`"//\n"`
`"// Project: %s\n"`	`"// Project: %s\n"`
`"//\n"`	`"//\n"`
`"// Purpose: This is part of an FPGA implementation that will process\n"`	`"// Purpose: This is part of an FPGA implementation that will process\n"`
`"// the final stage of a decimate-in-frequency FFT, running\n"`	`"// the final stage of a decimate-in-frequency FFT, running\n"`
`"// through the data at two samples per clock. If you notice\n"`	`"// through the data at two samples per clock. If you notice\n"`
`"// from the derivation of an FFT, the only time both even and\n"`	`"// from the derivation of an FFT, the only time both even and\n"`
`"// odd samples are used at the same time is in this stage.\n"`	`"// odd samples are used at the same time is in this stage.\n"`
`"// Therefore, other than this stage and these twiddles, all of\n"`	`"// Therefore, other than this stage and these twiddles, all of\n"`
`"// the other stages can run two stages at a time at one sample\n"`	`"// the other stages can run two stages at a time at one sample\n"`
`"// per clock.\n"`	`"// per clock.\n"`
`"//\n"`	`"//\n"`
`"// In this implementation, the output is valid one clock after\n"`	`"// In this implementation, the output is valid one clock after\n"`
`"// the input is valid. The output also accumulates one bit\n"`	`"// the input is valid. The output also accumulates one bit\n"`
`"// above and beyond the number of bits in the input.\n"`	`"// above and beyond the number of bits in the input.\n"`
`"// \n"`	`"// \n"`
`"// i_clk A system clock\n"`	`"// i_clk A system clock\n"`
`"// i_rst A synchronous reset\n"`	`"// i_rst A synchronous reset\n"`
`"// i_ce Circuit enable--nothing happens unless this line is high\n"`	`"// i_ce Circuit enable--nothing happens unless this line is high\n"`
`"// i_sync A synchronization signal, high once per FFT at the start\n"`	`"// i_sync A synchronization signal, high once per FFT at the start\n"`
`"// i_left The first (even) complex sample input. The higher order\n"`	`"// i_left The first (even) complex sample input. The higher order\n"`
`"// bits contain the real portion, low order bits the\n"`	`"// bits contain the real portion, low order bits the\n"`
`"// imaginary portion, all in two\'s complement.\n"`	`"// imaginary portion, all in two\'s complement.\n"`
`"// i_right The next (odd) complex sample input, same format as\n"`	`"// i_right The next (odd) complex sample input, same format as\n"`
`"// i_left.\n"`	`"// i_left.\n"`
`"// o_left The first (even) complex output.\n"`	`"// o_left The first (even) complex output.\n"`
`"// o_right The next (odd) complex output.\n"`	`"// o_right The next (odd) complex output.\n"`
`"// o_sync Output synchronization signal.\n"`	`"// o_sync Output synchronization signal.\n"`
`"//\n%s"`	`"//\n%s"`
`"//\n", prjname, creator);`	`"//\n", prjname, creator);`

`fprintf(fp, "%s", cpyleft);`	`fprintf(fp, "%s", cpyleft);`
`fprintf(fp,`	`fprintf(fp,`
`"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"`	`"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"`
`"\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"`	`"\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"`
`"\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"`	`"\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"`
`"\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"`	`"\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"`
`"\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"`	`"\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"`
`"\toutput\treg\t\t\to_sync;\n"`	`"\toutput\treg\t\t\to_sync;\n"`
`"\n"`	`"\n"`
`"\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"`	`"\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"`
`"\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"`	`"\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"`
`"\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"`	`"\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"`
`"\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"`	`"\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"`
`"\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"`	`"\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"`
`"\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"`	`"\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"`
`"\t\t\t\t\to_out_1r, o_out_1i;\n"`	`"\t\t\t\t\to_out_1r, o_out_1i;\n"`
`"\n"`	`"\n"`
`"\t// Don't forget that we accumulate a bit by adding two values\n"`	`"\t// Don't forget that we accumulate a bit by adding two values\n"`
`"\t// together. Therefore our intermediate value must have one more\n"`	`"\t// together. Therefore our intermediate value must have one more\n"`
`"\t// bit than the two originals.\n"`	`"\t// bit than the two originals.\n"`
`"\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"`	`"\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"`
`"\n"`	`"\n"`
`"\treg\twait_for_sync;\n"`	`"\treg\twait_for_sync;\n"`
`"\n"`	`"\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_rst)\n"`	`"\t\tif (i_rst)\n"`
`"\t\t\twait_for_sync <= 1'b1;\n"`	`"\t\t\twait_for_sync <= 1'b1;\n"`
`"\t\telse if ((i_ce)&&((~wait_for_sync)\|\|(i_sync)))\n"`	`"\t\telse if ((i_ce)&&((~wait_for_sync)\|\|(i_sync)))\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\twait_for_sync <= 1'b0;\n"`	`"\t\t\twait_for_sync <= 1'b0;\n"`
`"\t\t\t//\n"`	`"\t\t\t//\n"`
`"\t\t\tout_0r <= i_in_0r + i_in_1r;\n"`	`"\t\t\tout_0r <= i_in_0r + i_in_1r;\n"`
`"\t\t\tout_0i <= i_in_0i + i_in_1i;\n"`	`"\t\t\tout_0i <= i_in_0i + i_in_1i;\n"`
`"\t\t\t//\n"`	`"\t\t\t//\n"`
`"\t\t\tout_1r <= i_in_0r - i_in_1r;\n"`	`"\t\t\tout_1r <= i_in_0r - i_in_1r;\n"`
`"\t\t\tout_1i <= i_in_0i - i_in_1i;\n"`	`"\t\t\tout_1i <= i_in_0i - i_in_1i;\n"`
`"\t\t\t//\n"`	`"\t\t\t//\n"`
`"\t\t\to_sync <= i_sync;\n"`	`"\t\t\to_sync <= i_sync;\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"\n"`	`"\n"`
`"\t// Now, if the master control program doesn't want to keep all of\n"`	`"\t// Now, if the master control program doesn't want to keep all of\n"`
`"\t// our bits, we can shift down to OWIDTH bits here.\n"`	`"\t// our bits, we can shift down to OWIDTH bits here.\n"`
`"\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\n"`	`"\n"`
`"\tassign\to_left = { o_out_0r, o_out_0i };\n"`	`"\tassign\to_left = { o_out_0r, o_out_0i };\n"`
`"\tassign\to_right = { o_out_1r, o_out_1i };\n"`	`"\tassign\to_right = { o_out_1r, o_out_1i };\n"`
`"\n"`	`"\n"`
`"endmodule\n");`	`"endmodule\n");`
`fclose(fp);`	`fclose(fp);`
`}`	`}`

`void build_multiply(const char *fname) {`	`void build_multiply(const char *fname) {`
`FILE *fp = fopen(fname, "w");`	`FILE *fp = fopen(fname, "w");`
`if (NULL == fp) {`	`if (NULL == fp) {`
`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
`perror("O/S Err was:");`	`perror("O/S Err was:");`
`return;`	`return;`
`}`	`}`

`fprintf(fp,`	`fprintf(fp,`
`"///////////////////////////////////////////////////////////////////////////\n"`	`"///////////////////////////////////////////////////////////////////////////\n"`
`"//\n"`	`"//\n"`
`"// Filename: shiftaddmpy.v\n"`	`"// Filename: shiftaddmpy.v\n"`
`"//\n"`	`"//\n"`
`"// Project: %s\n"`	`"// Project: %s\n"`
`"//\n"`	`"//\n"`
`"// Purpose: A portable shift and add multiply.\n"`	`"// Purpose: A portable shift and add multiply.\n"`
`"//\n"`	`"//\n"`
`"// While both Xilinx and Altera will offer single clock \n"`	`"// While both Xilinx and Altera will offer single clock \n"`
`"// multiplies, this simple approach will multiply two numbers\n"`	`"// multiplies, this simple approach will multiply two numbers\n"`
`"// on any architecture. The result maintains the full width\n"`	`"// on any architecture. The result maintains the full width\n"`
`"// of the multiply, there are no extra stuff bits, no rounding,\n"`	`"// of the multiply, there are no extra stuff bits, no rounding,\n"`
`"// no shifted bits, etc.\n"`	`"// no shifted bits, etc.\n"`
`"//\n"`	`"//\n"`
`"// Further, for those applications that can support it, this\n"`	`"// Further, for those applications that can support it, this\n"`
`"// multiply is pipelined and will produce one answer per clock.\n"`	`"// multiply is pipelined and will produce one answer per clock.\n"`
`"//\n"`	`"//\n"`
`"// For minimal processing delay, make the first parameter\n"`	`"// For minimal processing delay, make the first parameter\n"`
`"// the one with the least bits, so that AWIDTH <= BWIDTH.\n"`	`"// the one with the least bits, so that AWIDTH <= BWIDTH.\n"`
`"//\n"`	`"//\n"`
`"// The processing delay in this multiply is (AWIDTH+1) cycles.\n"`	`"// The processing delay in this multiply is (AWIDTH+1) cycles.\n"`
`"// That is, if the data is present on the input at clock t=0,\n"`	`"// That is, if the data is present on the input at clock t=0,\n"`
`"// the result will be present on the output at time t=AWIDTH+1;\n"`	`"// the result will be present on the output at time t=AWIDTH+1;\n"`
`"//\n"`	`"//\n"`
`"//\n%s"`	`"//\n%s"`
`"//\n", prjname, creator);`	`"//\n", prjname, creator);`

`fprintf(fp, "%s", cpyleft);`	`fprintf(fp, "%s", cpyleft);`
`fprintf(fp,`	`fprintf(fp,`
`"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"`	`"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"`
`"\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"`	`"\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"`
`"\tinput\t\t\t\t\ti_clk, i_ce;\n"`	`"\tinput\t\t\t\t\ti_clk, i_ce;\n"`
`"\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"`	`"\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"`
`"\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"`	`"\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"`
`"\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"`	`"\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"`
`"\n"`	`"\n"`
`"\treg\t[(AWIDTH-1):0]\tu_a;\n"`	`"\treg\t[(AWIDTH-1):0]\tu_a;\n"`
`"\treg\t[(BWIDTH-1):0]\tu_b;\n"`	`"\treg\t[(BWIDTH-1):0]\tu_b;\n"`
`"\treg\t\t\tsgn;\n"`	`"\treg\t\t\tsgn;\n"`
`"\n"`	`"\n"`
`"\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"`	`"\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"`
`"\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"`	`"\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"`
`"\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"`	`"\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"`
`"\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"`	`"\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"`
`"\tgenvar k;\n"`	`"\tgenvar k;\n"`
`"\n"`	`"\n"`
`"\t// If we were forced to stay within two\'s complement arithmetic,\n"`	`"\t// If we were forced to stay within two\'s complement arithmetic,\n"`
`"\t// taking the absolute value here would require an additional bit.\n"`	`"\t// taking the absolute value here would require an additional bit.\n"`
`"\t// However, because our results are now unsigned, we can stay\n"`	`"\t// However, because our results are now unsigned, we can stay\n"`
`"\t// within the number of bits given (for now).\n"`	`"\t// within the number of bits given (for now).\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"`	`"\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"`
`"\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"`	`"\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"`
`"\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"`	`"\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"\n"`	`"\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1'b0}}, u_b }\n"`	`"\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1'b0}}, u_b }\n"`
`"\t\t\t\t\t: {(AWIDTH+BWIDTH){1'b0}};\n"`	`"\t\t\t\t\t: {(AWIDTH+BWIDTH){1'b0}};\n"`
`"\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"`	`"\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"`
`"\t\t\tr_b[0] <= { {(AWIDTH-1){1'b0}}, u_b };\n"`	`"\t\t\tr_b[0] <= { {(AWIDTH-1){1'b0}}, u_b };\n"`
`"\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"`	`"\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"\n"`	`"\n"`
`"\tgenerate\n"`	`"\tgenerate\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\tif (i_ce)\n"`	`"\tif (i_ce)\n"`
`"\tbegin\n"`	`"\tbegin\n"`
`"\t\tfor(k=0; k<AWIDTH-1; k++)\n"`	`"\t\tfor(k=0; k<AWIDTH-1; k++)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1'b0}:0);\n"`	`"\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1'b0}:0);\n"`
`"\t\t\tr_a[k+1] <= { 1'b0, r_a[k][(AWIDTH-2):1] };\n"`	`"\t\t\tr_a[k+1] <= { 1'b0, r_a[k][(AWIDTH-2):1] };\n"`
`"\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1'b0};\n"`	`"\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1'b0};\n"`
`"\t\t\tr_s[k+1] <= r_s[k];\n"`	`"\t\t\tr_s[k+1] <= r_s[k];\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"\tend\n"`	`"\tend\n"`
`"\tendgenerate\n"`	`"\tendgenerate\n"`
`"\n"`	`"\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"`	`"\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"`
`"\n"`	`"\n"`
`"endmodule\n");`	`"endmodule\n");`

`fclose(fp);`	`fclose(fp);`
`}`	`}`

`void build_dblreverse(const char *fname) {`	`void build_dblreverse(const char *fname) {`
`FILE *fp = fopen(fname, "w");`	`FILE *fp = fopen(fname, "w");`
`if (NULL == fp) {`	`if (NULL == fp) {`
`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
`perror("O/S Err was:");`	`perror("O/S Err was:");`
`return;`	`return;`
`}`	`}`

`fprintf(fp,`	`fprintf(fp,`
`"///////////////////////////////////////////////////////////////////////////\n"`	`"///////////////////////////////////////////////////////////////////////////\n"`
`"//\n"`	`"//\n"`
`"// Filename: dblreverse.v\n"`	`"// Filename: dblreverse.v\n"`
`"//\n"`	`"//\n"`
`"// Project: %s\n"`	`"// Project: %s\n"`
`"//\n"`	`"//\n"`
`"// Purpose: This module bitreverses a pipelined FFT input. Operation is\n"`	`"// Purpose: This module bitreverses a pipelined FFT input. Operation is\n"`
`"// expected as follows:\n"`	`"// expected as follows:\n"`
`"//\n"`	`"//\n"`
`"// i_clk A running clock at whatever system speed is offered.\n"`	`"// i_clk A running clock at whatever system speed is offered.\n"`
`"// i_rst A synchronous reset signal, that resets all internals\n"`	`"// i_rst A synchronous reset signal, that resets all internals\n"`
`"// i_ce If this is one, one input is consumed and an output\n"`	`"// i_ce If this is one, one input is consumed and an output\n"`
`"// is produced.\n"`	`"// is produced.\n"`
`"// i_in_0, i_in_1\n"`	`"// i_in_0, i_in_1\n"`
`"// Two inputs to be consumed, each of width WIDTH.\n"`	`"// Two inputs to be consumed, each of width WIDTH.\n"`
`"// o_out_0, o_out_1\n"`	`"// o_out_0, o_out_1\n"`
`"// Two of the bitreversed outputs, also of the same\n"`	`"// Two of the bitreversed outputs, also of the same\n"`
`"// width, WIDTH. Of course, there is a delay from the\n"`	`"// width, WIDTH. Of course, there is a delay from the\n"`
`"// first input to the first output. For this purpose,\n"`	`"// first input to the first output. For this purpose,\n"`
`"// o_sync is present.\n"`	`"// o_sync is present.\n"`
`"// o_sync This will be a 1'b1 for the first value in any block.\n"`	`"// o_sync This will be a 1'b1 for the first value in any block.\n"`
`"// Following a reset, this will only become 1'b1 once\n"`	`"// Following a reset, this will only become 1'b1 once\n"`
`"// the data has been loaded and is now valid. After that,\n"`	`"// the data has been loaded and is now valid. After that,\n"`
`"// all outputs will be valid.\n"`	`"// all outputs will be valid.\n"`
`"//\n%s"`	`"//\n%s"`
`"//\n", prjname, creator);`	`"//\n", prjname, creator);`
`fprintf(fp, "%s", cpyleft);`	`fprintf(fp, "%s", cpyleft);`
`fprintf(fp,`	`fprintf(fp,`
`"\n\n"`	`"\n\n"`
`"//\n"`	`"//\n"`
`"// How do we do bit reversing at two smples per clock? Can we separate out\n"`	`"// How do we do bit reversing at two smples per clock? Can we separate out\n"`
`"// our work into eight memory banks, writing two banks at once and reading\n"`	`"// our work into eight memory banks, writing two banks at once and reading\n"`
`"// another two banks in the same clock?\n"`	`"// another two banks in the same clock?\n"`
`"//\n"`	`"//\n"`
`"// mem[00xxx0] = s_0[n]\n"`	`"// mem[00xxx0] = s_0[n]\n"`
`"// mem[00xxx1] = s_1[n]\n"`	`"// mem[00xxx1] = s_1[n]\n"`
`"// o_0[n] = mem[10xxx0]\n"`	`"// o_0[n] = mem[10xxx0]\n"`
`"// o_1[n] = mem[11xxx0]\n"`	`"// o_1[n] = mem[11xxx0]\n"`
`"// ...\n"`	`"// ...\n"`
`"// mem[01xxx0] = s_0[m]\n"`	`"// mem[01xxx0] = s_0[m]\n"`
`"// mem[01xxx1] = s_1[m]\n"`	`"// mem[01xxx1] = s_1[m]\n"`
`"// o_0[m] = mem[10xxx1]\n"`	`"// o_0[m] = mem[10xxx1]\n"`
`"// o_1[m] = mem[11xxx1]\n"`	`"// o_1[m] = mem[11xxx1]\n"`
`"// ...\n"`	`"// ...\n"`
`"// mem[10xxx0] = s_0[n]\n"`	`"// mem[10xxx0] = s_0[n]\n"`
`"// mem[10xxx1] = s_1[n]\n"`	`"// mem[10xxx1] = s_1[n]\n"`
`"// o_0[n] = mem[00xxx0]\n"`	`"// o_0[n] = mem[00xxx0]\n"`
`"// o_1[n] = mem[01xxx0]\n"`	`"// o_1[n] = mem[01xxx0]\n"`
`"// ...\n"`	`"// ...\n"`
`"// mem[11xxx0] = s_0[m]\n"`	`"// mem[11xxx0] = s_0[m]\n"`
`"// mem[11xxx1] = s_1[m]\n"`	`"// mem[11xxx1] = s_1[m]\n"`
`"// o_0[m] = mem[00xxx1]\n"`	`"// o_0[m] = mem[00xxx1]\n"`
`"// o_1[m] = mem[01xxx1]\n"`	`"// o_1[m] = mem[01xxx1]\n"`
`"// ...\n"`	`"// ...\n"`
`"//\n"`	`"//\n"`
`"// The answer is that, yes we can but: we need to use four memory banks\n"`	`"// The answer is that, yes we can but: we need to use four memory banks\n"`
`"// to do it properly. These four banks are defined by the two bits\n"`	`"// to do it properly. These four banks are defined by the two bits\n"`
`"// that determine the top and bottom of the correct address. Larger\n"`	`"// that determine the top and bottom of the correct address. Larger\n"`
`"// FFT\'s would require more memories.\n"`	`"// FFT\'s would require more memories.\n"`
`"//\n"`	`"//\n"`
`"//\n");`	`"//\n");`
`fprintf(fp,`	`fprintf(fp,`
`"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"`	`"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"`
`"\t\to_out_0, o_out_1, o_sync);\n"`	`"\t\to_out_0, o_out_1, o_sync);\n"`
`"\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"`	`"\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"`
`"\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"`	`"\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"`
`"\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"`	`"\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"`
`"\toutput\treg\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"`	`"\toutput\treg\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"`
`"\toutput\treg\t\t\to_sync;\n"`	`"\toutput\treg\t\t\to_sync;\n"`
`"\n"`	`"\n"`
`"\treg\tin_reset;\n"`	`"\treg\tin_reset;\n"`
`"\treg\t[(LGSIZE):0]\tiaddr;\n"`	`"\treg\t[(LGSIZE):0]\tiaddr;\n"`
`"\treg\t[(2*WIDTH-1):0]\tmem_0e [0:((1<<(LGSIZE-1))-1)];\n"`	`"\treg\t[(2*WIDTH-1):0]\tmem_0e [0:((1<<(LGSIZE-1))-1)];\n"`
`"\treg\t[(2*WIDTH-1):0]\tmem_0o [0:((1<<(LGSIZE-1))-1)];\n"`	`"\treg\t[(2*WIDTH-1):0]\tmem_0o [0:((1<<(LGSIZE-1))-1)];\n"`
`"\treg\t[(2*WIDTH-1):0]\tmem_1e [0:((1<<(LGSIZE-1))-1)];\n"`	`"\treg\t[(2*WIDTH-1):0]\tmem_1e [0:((1<<(LGSIZE-1))-1)];\n"`
`"\treg\t[(2*WIDTH-1):0]\tmem_1o [0:((1<<(LGSIZE-1))-1)];\n"`	`"\treg\t[(2*WIDTH-1):0]\tmem_1o [0:((1<<(LGSIZE-1))-1)];\n"`

#include <stdio.h>

#include <stdio.h>

#include <stdlib.h>

#include <stdlib.h>

#include <unistd.h>

#include <unistd.h>

#include <sys/stat.h>

#include <sys/stat.h>

#include <string.h>

#include <string.h>

#include <string>

#include <math.h>

#include <math.h>

#include <ctype.h>

#include <ctype.h>

#include <assert.h>

#include <assert.h>

#define COREDIR "fft-core"

#define COREDIR "fft-core"

const char      cpyleft[] =

const char      cpyleft[] =

"///////////////////////////////////////////////////////////////////////////\n"

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Copyright (C) 2015, Gisselquist Technology, LLC\n"

"// Copyright (C) 2015, Gisselquist Technology, LLC\n"

"//\n"

"//\n"

"// This program is free software (firmware): you can redistribute it and/or\n"

"// This program is free software (firmware): you can redistribute it and/or\n"

"// modify it under the terms of  the GNU General Public License as published\n"

"// modify it under the terms of  the GNU General Public License as published\n"

"// by the Free Software Foundation, either version 3 of the License, or (at\n"

"// by the Free Software Foundation, either version 3 of the License, or (at\n"

"// your option) any later version.\n"

"// your option) any later version.\n"

"//\n"

"//\n"

"// This program is distributed in the hope that it will be useful, but WITHOUT\n"

"// This program is distributed in the hope that it will be useful, but WITHOUT\n"

"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"

"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"

"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"

"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"

"// for more details.\n"

"// for more details.\n"

"//\n"

"//\n"

"// You should have received a copy of the GNU General Public License along\n"

"// You should have received a copy of the GNU General Public License along\n"

"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"

"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"

"// target there if the PDF file isn\'t present.)  If not, see\n"

"// target there if the PDF file isn\'t present.)  If not, see\n"

"// <http://www.gnu.org/licenses/> for a copy.\n"

"// <http://www.gnu.org/licenses/> for a copy.\n"

"//\n"

"//\n"

"// License:    GPL, v3, as defined and found on www.gnu.org,\n"

"// License:    GPL, v3, as defined and found on www.gnu.org,\n"

"//             http://www.gnu.org/licenses/gpl.html\n"

"//             http://www.gnu.org/licenses/gpl.html\n"

"//\n"

"//\n"

"//\n"

"//\n"

"///////////////////////////////////////////////////////////////////////////\n";

"///////////////////////////////////////////////////////////////////////////\n";

const char      prjname[] = "A Doubletime Pipelined FFT\n";

const char      prjname[] = "A Doubletime Pipelined FFT";

const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"

const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"

                                "//             Gisselquist Tecnology, LLC\n";

                                "//             Gisselquist Tecnology, LLC\n";

int     lgval(int vl) {

int     lgval(int vl) {

        int     lg;

        int     lg;

        for(lg=1; (1<<lg) < vl; lg++)

        for(lg=1; (1<<lg) < vl; lg++)

        return lg;

        return lg;

int     nextlg(int vl) {

int     nextlg(int vl) {

        int     r;

        int     r;

        for(r=1; r<vl; r<<=1)

        for(r=1; r<vl; r<<=1)

        return r;

        return r;

int     lgdelay(int nbits, int xtra) {

int     bflydelay(int nbits, int xtra) {

        int     cbits = nbits + xtra;

        int     cbits = nbits + xtra;

        int     delay = nbits + 2;

        int     delay;

        if (nbits+1<cbits)

        if (nbits+1<cbits)

                delay = nbits+4;

                delay = nbits+4;

        else

        else

                delay = cbits+3;

                delay = cbits+3;

        return lgval(delay);

        return delay;

int     lgdelay(int nbits, int xtra) {

        // The butterfly code needs to compare a valid address, of this

        // many bits, with an address two greater.  This guarantees we

        // have enough bits for that comparison.  We'll also end up with

        // more storage space to look for these values, but without a

        // redesign that's just what we'll deal with.

        return lgval(bflydelay(nbits, xtra)+3);

void    build_quarters(const char *fname) {

void    build_quarters(const char *fname) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        fprintf(fp,

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Filename:   qtrstage.v\n"

"// Filename:   qtrstage.v\n"

"//             \n"

"//             \n"

"// Project:    %s\n"

"// Project:    %s\n"

"//\n"

"//\n"

"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"

"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"

"//             frequency FFT.  This particular implementation is optimized\n"

"//             frequency FFT.  This particular implementation is optimized\n"

"//             so that all of the multiplies are accomplished by additions\n"

"//             so that all of the multiplies are accomplished by additions\n"

"//             and multiplexers only.\n"

"//             and multiplexers only.\n"

"//\n"

"//\n"

"//\n%s"

"//\n%s"

"//\n",

"//\n",

                prjname, creator);

                prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

        fprintf(fp,

"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"

"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"

        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"

        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"

        "\t// Parameters specific to the core that should be changed when this\n"

        "\t// Parameters specific to the core that should be changed when this\n"

        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"

        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"

        "\t// spans must use the fftdoubles stage.\n"

        "\t// spans must use the fftdoubles stage.\n"

        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0,ROUND=0;\n"

        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0,ROUND=0;\n"

        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"

        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"

        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"

        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"

        "\toutput\treg                          o_sync;\n"

        "\toutput\treg                          o_sync;\n"

        "\t\n"

        "\t\n");

        fprintf(fp,

        "\treg\t        wait_for_sync;\n"

        "\treg\t        wait_for_sync;\n"

        "\treg\t[2:0]   pipeline;\n"

        "\treg\t[2:0]   pipeline;\n"

"\n"

"\n"

        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"

        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"

        "\twire\t[(IWIDTH):0]   n_diff_i;\n"

        "\twire\t[(IWIDTH):0]   n_diff_r, n_diff_i;\n"

        "\tassign n_diff_r = -diff_r;\n"

        "\tassign n_diff_i = -diff_i;\n"

        "\tassign n_diff_i = -diff_i;\n"

"\n"

"\n"

        "\treg\t[(2*OWIDTH-1):0]        ob_a;\n"

        "\treg\t[(2*OWIDTH-1):0]        ob_a;\n"

        "\twire\t[(2*OWIDTH-1):0]       ob_b;\n"

        "\twire\t[(2*OWIDTH-1):0]       ob_b;\n"

        "\treg\t[(OWIDTH-1):0]          ob_b_r, ob_b_i;\n"

        "\treg\t[(OWIDTH-1):0]          ob_b_r, ob_b_i;\n"

        "\tassign       ob_b = { ob_b_r, ob_b_i };\n"

        "\tassign       ob_b = { ob_b_r, ob_b_i };\n"

"\n"

"\n"

        "\treg\t[(LGWIDTH-1):0]         iaddr;\n"

        "\treg\t[(LGWIDTH-1):0]         iaddr;\n"

        "\treg\t[(2*IWIDTH-1):0]        imem;\n"

        "\treg\t[(2*IWIDTH-1):0]        imem;\n"

"\n"

"\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"

        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"

        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"

"\n"

"\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"

        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"

        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"

"\n"

"\n"

        "\treg  [(2*OWIDTH-1):0]        omem;\n"

        "\treg  [(2*OWIDTH-1):0]        omem;\n"

"\n"

"\n");

        fprintf(fp,

        "\twire [(IWIDTH-1):0]  rnd;\n"

        "\twire [(IWIDTH-1):0]  rnd;\n"

        "\tgenerate\n"

        "\tgenerate\n"

        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"

        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"

                "\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"

                "\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"

        "\telse\n"

        "\telse\n"

                "\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"

                "\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"

        "\tendgenerate\n"

        "\tendgenerate\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_rst)\n"

                "\t\tif (i_rst)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\twait_for_sync <= 1'b1;\n"

                        "\t\t\twait_for_sync <= 1'b1;\n"

                        "\t\t\tiaddr <= 0;\n"

                        "\t\t\tiaddr <= 0;\n"

                        "\t\t\tpipeline <= 3'b000;\n"

                        "\t\t\tpipeline <= 3'b000;\n"

                "\t\tend\n"

                "\t\tend\n"

                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\t// Always\n"

                        "\t\t\t// Always\n"

                        "\t\t\timem <= i_data;\n"

                        "\t\t\timem <= i_data;\n"

                        "\t\t\tiaddr <= iaddr + 1;\n"

                        "\t\t\tiaddr <= iaddr + 1;\n"

                        "\t\t\twait_for_sync <= 1'b0;\n"

                        "\t\t\twait_for_sync <= 1'b0;\n"

"\n"

"\n"

                        "\t\t\t// In sequence, clock = 0\n"

                        "\t\t\t// In sequence, clock = 0\n"

                        "\t\t\tif (iaddr[0])\n"

                        "\t\t\tif (iaddr[0])\n"

                        "\t\t\tbegin\n"

                        "\t\t\tbegin\n"

                                "\t\t\t\tsum_r  <= imem_r + i_data_r + rnd;\n"

                                "\t\t\t\tsum_r  <= imem_r + i_data_r + rnd;\n"

                                "\t\t\t\tsum_i  <= imem_i + i_data_i + rnd;\n"

                                "\t\t\t\tsum_i  <= imem_i + i_data_i + rnd;\n"

                                "\t\t\t\tdiff_r <= imem_r - i_data_r + rnd;\n"

                                "\t\t\t\tdiff_r <= imem_r - i_data_r + rnd;\n"

                                "\t\t\t\tdiff_i <= imem_i - i_data_i + rnd;\n"

                                "\t\t\t\tdiff_i <= imem_i - i_data_i + rnd;\n"

"\n"

"\n"

                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b1 };\n"

                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b1 };\n"

                        "\t\t\tend else\n"

                        "\t\t\tend else\n"

                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"

                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"

"\n"

"\n"

                        "\t\t\t// In sequence, clock = 1\n"

                        "\t\t\t// In sequence, clock = 1\n"

                        "\t\t\tif (pipeline[1])\n"

                        "\t\t\tif (pipeline[1])\n"

                        "\t\t\tbegin\n"

                        "\t\t\tbegin\n"

"\t\t\t\tob_a <= { sum_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)],\n"

"\t\t\t\tob_a <= { sum_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)],\n"

        "\t\t\t\t\t\tsum_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)] };\n"

        "\t\t\t\t\t\tsum_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)] };\n"

                                "\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"

                                "\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"

                                "\t\t\t\tif (ODD == 0)\n"

                                "\t\t\t\tif (ODD == 0)\n"

                                "\t\t\t\tbegin\n"

                                "\t\t\t\tbegin\n"

"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_i <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_i <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"

// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"

// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"

// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"

                                "\t\t\t\tend else if (~INVERSE) begin\n"

                                "\t\t\t\tend else if (INVERSE==0) begin\n"

"\t\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"

"\t\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"

"\t\t\t\t\tob_b_r <=   diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_r <=   diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_i <=   diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_i <= n_diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"

// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"

// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"

// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"

                                "\t\t\t\tend else begin\n"

                                "\t\t\t\tend else begin\n"

"\t\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"

"\t\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"

"\t\t\t\t\tob_b_r <= n_diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_r <= n_diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_i <=   diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_i <=   diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"

// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"

// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"

// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"

                                "\t\t\t\tend\n"

                                "\t\t\t\tend\n"

                                "\t\t\t\t// (wire) ob_b <= { ob_b_r, ob_b_i };\n"

                                "\t\t\t\t// (wire) ob_b <= { ob_b_r, ob_b_i };\n"

                        "\t\t\tend\n"

                        "\t\t\tend\n"

                        "\t\t\t// In sequence, clock = 2\n"

                        "\t\t\t// In sequence, clock = 2\n"

                        "\t\t\tif (pipeline[2])\n"

                        "\t\t\tif (pipeline[2])\n"

                        "\t\t\tbegin\n"

                        "\t\t\tbegin\n"

                                "\t\t\t\tomem <= ob_b;\n"

                                "\t\t\t\tomem <= ob_b;\n"

                                "\t\t\t\to_data <= ob_a;\n"

                                "\t\t\t\to_data <= ob_a;\n"

                        "\t\t\tend else\n"

                        "\t\t\tend else\n"

                                "\t\t\t\to_data <= omem;\n"

                                "\t\t\t\to_data <= omem;\n"

                        "\t\t\t// Don\'t forget in the sync check that we are running\n"

                        "\t\t\t// Don\'t forget in the sync check that we are running\n"

                        "\t\t\t// at two clocks per sample.  Thus we need to\n"

                        "\t\t\t// at two clocks per sample.  Thus we need to\n"

                        "\t\t\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"

                        "\t\t\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"

                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b100);\n"

                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b100);\n"

                "\t\tend\n"

                "\t\tend\n"

"endmodule\n");

"endmodule\n");

void    build_dblstage(const char *fname) {

void    build_dblstage(const char *fname) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        fprintf(fp,

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Filename:   dblstage.v\n"

"// Filename:   dblstage.v\n"

"//\n"

"//\n"

"// Project:    %s\n"

"// Project:    %s\n"

"//\n"

"//\n"

"// Purpose:    This is part of an FPGA implementation that will process\n"

"// Purpose:    This is part of an FPGA implementation that will process\n"

"//             the final stage of a decimate-in-frequency FFT, running\n"

"//             the final stage of a decimate-in-frequency FFT, running\n"

"//             through the data at two samples per clock.  If you notice\n"

"//             through the data at two samples per clock.  If you notice\n"

"//             from the derivation of an FFT, the only time both even and\n"

"//             from the derivation of an FFT, the only time both even and\n"

"//             odd samples are used at the same time is in this stage.\n"

"//             odd samples are used at the same time is in this stage.\n"

"//             Therefore, other than this stage and these twiddles, all of\n"

"//             Therefore, other than this stage and these twiddles, all of\n"

"//             the other stages can run two stages at a time at one sample\n"

"//             the other stages can run two stages at a time at one sample\n"

"//             per clock.\n"

"//             per clock.\n"

"//\n"

"//\n"

"//             In this implementation, the output is valid one clock after\n"

"//             In this implementation, the output is valid one clock after\n"

"//             the input is valid.  The output also accumulates one bit\n"

"//             the input is valid.  The output also accumulates one bit\n"

"//             above and beyond the number of bits in the input.\n"

"//             above and beyond the number of bits in the input.\n"

"//             \n"

"//             \n"

"//             i_clk   A system clock\n"

"//             i_clk   A system clock\n"

"//             i_rst   A synchronous reset\n"

"//             i_rst   A synchronous reset\n"

"//             i_ce    Circuit enable--nothing happens unless this line is high\n"

"//             i_ce    Circuit enable--nothing happens unless this line is high\n"

"//             i_sync  A synchronization signal, high once per FFT at the start\n"

"//             i_sync  A synchronization signal, high once per FFT at the start\n"

"//             i_left  The first (even) complex sample input.  The higher order\n"

"//             i_left  The first (even) complex sample input.  The higher order\n"

"//                     bits contain the real portion, low order bits the\n"

"//                     bits contain the real portion, low order bits the\n"

"//                     imaginary portion, all in two\'s complement.\n"

"//                     imaginary portion, all in two\'s complement.\n"

"//             i_right The next (odd) complex sample input, same format as\n"

"//             i_right The next (odd) complex sample input, same format as\n"

"//                     i_left.\n"

"//                     i_left.\n"

"//             o_left  The first (even) complex output.\n"

"//             o_left  The first (even) complex output.\n"

"//             o_right The next (odd) complex output.\n"

"//             o_right The next (odd) complex output.\n"

"//             o_sync  Output synchronization signal.\n"

"//             o_sync  Output synchronization signal.\n"

"//\n%s"

"//\n%s"

"//\n", prjname, creator);

"//\n", prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

        fprintf(fp,

"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"

"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"

        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"

        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"

        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"

        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"

        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"

        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"

        "\toutput\treg\t\t\to_sync;\n"

        "\toutput\treg\t\t\to_sync;\n"

"\n"

"\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\ti_in_0r, i_in_0i, i_in_1r, i_in_1i;\n"

        "\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"

        "\tassign\ti_in_0r = i_left[(2*IWIDTH-1):(IWIDTH)]; \n"

        "\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"

        "\tassign\ti_in_0i = i_left[(IWIDTH-1):0]; \n"

        "\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"

        "\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"

        "\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"

        "\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"

        "\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"

        "\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"

                                "\t\t\t\t\to_out_1r, o_out_1i;\n"

                                "\t\t\t\t\to_out_1r, o_out_1i;\n"

"\n"

"\n"

        "\t// Don't forget that we accumulate a bit by adding two values\n"

        "\t// Don't forget that we accumulate a bit by adding two values\n"

        "\t// together. Therefore our intermediate value must have one more\n"

        "\t// together. Therefore our intermediate value must have one more\n"

        "\t// bit than the two originals.\n"

        "\t// bit than the two originals.\n"

        "\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"

        "\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"

"\n"

"\n"

        "\treg\twait_for_sync;\n"

        "\treg\twait_for_sync;\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_rst)\n"

                "\t\tif (i_rst)\n"

                        "\t\t\twait_for_sync <= 1'b1;\n"

                        "\t\t\twait_for_sync <= 1'b1;\n"

                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\twait_for_sync <= 1'b0;\n"

                        "\t\t\twait_for_sync <= 1'b0;\n"

                        "\t\t\t//\n"

                        "\t\t\t//\n"

                        "\t\t\tout_0r <= i_in_0r + i_in_1r;\n"

                        "\t\t\tout_0r <= i_in_0r + i_in_1r;\n"

                        "\t\t\tout_0i <= i_in_0i + i_in_1i;\n"

                        "\t\t\tout_0i <= i_in_0i + i_in_1i;\n"

                        "\t\t\t//\n"

                        "\t\t\t//\n"

                        "\t\t\tout_1r <= i_in_0r - i_in_1r;\n"

                        "\t\t\tout_1r <= i_in_0r - i_in_1r;\n"

                        "\t\t\tout_1i <= i_in_0i - i_in_1i;\n"

                        "\t\t\tout_1i <= i_in_0i - i_in_1i;\n"

                        "\t\t\t//\n"

                        "\t\t\t//\n"

                        "\t\t\to_sync <= i_sync;\n"

                        "\t\t\to_sync <= i_sync;\n"

                "\t\tend\n"

                "\t\tend\n"

"\n"

"\n"

        "\t// Now, if the master control program doesn't want to keep all of\n"

        "\t// Now, if the master control program doesn't want to keep all of\n"

        "\t// our bits, we can shift down to OWIDTH bits here.\n"

        "\t// our bits, we can shift down to OWIDTH bits here.\n"

        "\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

        "\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

        "\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

        "\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

        "\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

        "\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

        "\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

        "\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\n"

"\n"

        "\tassign\to_left  = { o_out_0r, o_out_0i };\n"

        "\tassign\to_left  = { o_out_0r, o_out_0i };\n"

        "\tassign\to_right = { o_out_1r, o_out_1i };\n"

        "\tassign\to_right = { o_out_1r, o_out_1i };\n"

"\n"

"\n"

"endmodule\n");

"endmodule\n");

        fclose(fp);

        fclose(fp);

void    build_multiply(const char *fname) {

void    build_multiply(const char *fname) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        fprintf(fp,

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Filename:   shiftaddmpy.v\n"

"// Filename:   shiftaddmpy.v\n"

"//\n"

"//\n"

"// Project:    %s\n"

"// Project:    %s\n"

"//\n"

"//\n"

"// Purpose:    A portable shift and add multiply.\n"

"// Purpose:    A portable shift and add multiply.\n"

"//\n"

"//\n"

"//             While both Xilinx and Altera will offer single clock \n"

"//             While both Xilinx and Altera will offer single clock \n"

"//             multiplies, this simple approach will multiply two numbers\n"

"//             multiplies, this simple approach will multiply two numbers\n"

"//             on any architecture.  The result maintains the full width\n"

"//             on any architecture.  The result maintains the full width\n"

"//             of the multiply, there are no extra stuff bits, no rounding,\n"

"//             of the multiply, there are no extra stuff bits, no rounding,\n"

"//             no shifted bits, etc.\n"

"//             no shifted bits, etc.\n"

"//\n"

"//\n"

"//             Further, for those applications that can support it, this\n"

"//             Further, for those applications that can support it, this\n"

"//             multiply is pipelined and will produce one answer per clock.\n"

"//             multiply is pipelined and will produce one answer per clock.\n"

"//\n"

"//\n"

"//             For minimal processing delay, make the first parameter\n"

"//             For minimal processing delay, make the first parameter\n"

"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"

"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"

"//\n"

"//\n"

"//             The processing delay in this multiply is (AWIDTH+1) cycles.\n"

"//             The processing delay in this multiply is (AWIDTH+1) cycles.\n"

"//             That is, if the data is present on the input at clock t=0,\n"

"//             That is, if the data is present on the input at clock t=0,\n"

"//             the result will be present on the output at time t=AWIDTH+1;\n"

"//             the result will be present on the output at time t=AWIDTH+1;\n"

"//\n"

"//\n"

"//\n%s"

"//\n%s"

"//\n", prjname, creator);

"//\n", prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

        fprintf(fp,

"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"

"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"

        "\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"

        "\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"

        "\tinput\t\t\t\t\ti_clk, i_ce;\n"

        "\tinput\t\t\t\t\ti_clk, i_ce;\n"

        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"

        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"

        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"

        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"

        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"

        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"

"\n"

"\n"

        "\treg\t[(AWIDTH-1):0]\tu_a;\n"

        "\treg\t[(AWIDTH-1):0]\tu_a;\n"

        "\treg\t[(BWIDTH-1):0]\tu_b;\n"

        "\treg\t[(BWIDTH-1):0]\tu_b;\n"

        "\treg\t\t\tsgn;\n"

        "\treg\t\t\tsgn;\n"

"\n"

"\n"

        "\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"

        "\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n"

        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"

        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"

        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"

        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"

        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"

        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"

        "\tgenvar k;\n"

        "\tgenvar k;\n"

"\n"

"\n"

        "\t// If we were forced to stay within two\'s complement arithmetic,\n"

        "\t// If we were forced to stay within two\'s complement arithmetic,\n"

        "\t// taking the absolute value here would require an additional bit.\n"

        "\t// taking the absolute value here would require an additional bit.\n"

        "\t// However, because our results are now unsigned, we can stay\n"

        "\t// However, because our results are now unsigned, we can stay\n"

        "\t// within the number of bits given (for now).\n"

        "\t// within the number of bits given (for now).\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"

                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"

                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"

                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"

                        "\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"

                        "\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n"

                "\t\tend\n"

                "\t\tend\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1'b0}}, u_b }\n"

                        "\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1'b0}}, u_b }\n"

                        "\t\t\t\t\t: {(AWIDTH+BWIDTH){1'b0}};\n"

                        "\t\t\t\t\t: {(AWIDTH+BWIDTH){1'b0}};\n"

                        "\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"

                        "\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n"

                        "\t\t\tr_b[0] <= { {(AWIDTH-1){1'b0}}, u_b };\n"

                        "\t\t\tr_b[0] <= { {(AWIDTH-1){1'b0}}, u_b };\n"

                        "\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"

                        "\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n"

                "\t\tend\n"

                "\t\tend\n"

"\n"

"\n"

        "\tgenerate\n"

        "\tgenerate\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\tif (i_ce)\n"

        "\tif (i_ce)\n"

        "\tbegin\n"

        "\tbegin\n"

                "\t\tfor(k=0; k<AWIDTH-1; k++)\n"

                "\t\tfor(k=0; k<AWIDTH-1; k++)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1'b0}:0);\n"

                        "\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1'b0}:0);\n"

                        "\t\t\tr_a[k+1] <= { 1'b0, r_a[k][(AWIDTH-2):1] };\n"

                        "\t\t\tr_a[k+1] <= { 1'b0, r_a[k][(AWIDTH-2):1] };\n"

                        "\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1'b0};\n"

                        "\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1'b0};\n"

                        "\t\t\tr_s[k+1] <= r_s[k];\n"

                        "\t\t\tr_s[k+1] <= r_s[k];\n"

                "\t\tend\n"

                "\t\tend\n"

        "\tend\n"

        "\tend\n"

        "\tendgenerate\n"

        "\tendgenerate\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                        "\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"

                        "\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n"

"\n"

"\n"

"endmodule\n");

"endmodule\n");

        fclose(fp);

        fclose(fp);

void    build_dblreverse(const char *fname) {

void    build_dblreverse(const char *fname) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        fprintf(fp,

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Filename:   dblreverse.v\n"

"// Filename:   dblreverse.v\n"

"//\n"

"//\n"

"// Project:    %s\n"

"// Project:    %s\n"

"//\n"

"//\n"

"// Purpose:    This module bitreverses a pipelined FFT input.  Operation is\n"

"// Purpose:    This module bitreverses a pipelined FFT input.  Operation is\n"

"//             expected as follows:\n"

"//             expected as follows:\n"

"//\n"

"//\n"

"//             i_clk   A running clock at whatever system speed is offered.\n"

"//             i_clk   A running clock at whatever system speed is offered.\n"

"//             i_rst   A synchronous reset signal, that resets all internals\n"

"//             i_rst   A synchronous reset signal, that resets all internals\n"

"//             i_ce    If this is one, one input is consumed and an output\n"

"//             i_ce    If this is one, one input is consumed and an output\n"

"//                     is produced.\n"

"//                     is produced.\n"

"//             i_in_0, i_in_1\n"

"//             i_in_0, i_in_1\n"

"//                     Two inputs to be consumed, each of width WIDTH.\n"

"//                     Two inputs to be consumed, each of width WIDTH.\n"

"//             o_out_0, o_out_1\n"

"//             o_out_0, o_out_1\n"

"//                     Two of the bitreversed outputs, also of the same\n"

"//                     Two of the bitreversed outputs, also of the same\n"

"//                     width, WIDTH.  Of course, there is a delay from the\n"

"//                     width, WIDTH.  Of course, there is a delay from the\n"

"//                     first input to the first output.  For this purpose,\n"

"//                     first input to the first output.  For this purpose,\n"

"//                     o_sync is present.\n"

"//                     o_sync is present.\n"

"//             o_sync  This will be a 1'b1 for the first value in any block.\n"

"//             o_sync  This will be a 1'b1 for the first value in any block.\n"

"//                     Following a reset, this will only become 1'b1 once\n"

"//                     Following a reset, this will only become 1'b1 once\n"

"//                     the data has been loaded and is now valid.  After that,\n"

"//                     the data has been loaded and is now valid.  After that,\n"

"//                     all outputs will be valid.\n"

"//                     all outputs will be valid.\n"

"//\n%s"

"//\n%s"

"//\n", prjname, creator);

"//\n", prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

        fprintf(fp,

"\n\n"

"\n\n"

"//\n"

"//\n"

"// How do we do bit reversing at two smples per clock?  Can we separate out\n"

"// How do we do bit reversing at two smples per clock?  Can we separate out\n"

"// our work into eight memory banks, writing two banks at once and reading\n"

"// our work into eight memory banks, writing two banks at once and reading\n"

"// another two banks in the same clock?\n"

"// another two banks in the same clock?\n"

"//\n"

"//\n"

"//     mem[00xxx0] = s_0[n]\n"

"//     mem[00xxx0] = s_0[n]\n"

"//     mem[00xxx1] = s_1[n]\n"

"//     mem[00xxx1] = s_1[n]\n"

"//     o_0[n] = mem[10xxx0]\n"

"//     o_0[n] = mem[10xxx0]\n"

"//     o_1[n] = mem[11xxx0]\n"

"//     o_1[n] = mem[11xxx0]\n"

"//     ...\n"

"//     ...\n"

"//     mem[01xxx0] = s_0[m]\n"

"//     mem[01xxx0] = s_0[m]\n"

"//     mem[01xxx1] = s_1[m]\n"

"//     mem[01xxx1] = s_1[m]\n"

"//     o_0[m] = mem[10xxx1]\n"

"//     o_0[m] = mem[10xxx1]\n"

"//     o_1[m] = mem[11xxx1]\n"

"//     o_1[m] = mem[11xxx1]\n"

"//     ...\n"

"//     ...\n"

"//     mem[10xxx0] = s_0[n]\n"

"//     mem[10xxx0] = s_0[n]\n"

"//     mem[10xxx1] = s_1[n]\n"

"//     mem[10xxx1] = s_1[n]\n"

"//     o_0[n] = mem[00xxx0]\n"

"//     o_0[n] = mem[00xxx0]\n"

"//     o_1[n] = mem[01xxx0]\n"

"//     o_1[n] = mem[01xxx0]\n"

"//     ...\n"

"//     ...\n"

"//     mem[11xxx0] = s_0[m]\n"

"//     mem[11xxx0] = s_0[m]\n"

"//     mem[11xxx1] = s_1[m]\n"

"//     mem[11xxx1] = s_1[m]\n"

"//     o_0[m] = mem[00xxx1]\n"

"//     o_0[m] = mem[00xxx1]\n"

"//     o_1[m] = mem[01xxx1]\n"

"//     o_1[m] = mem[01xxx1]\n"

"//     ...\n"

"//     ...\n"

"//\n"

"//\n"

"//     The answer is that, yes we can but: we need to use four memory banks\n"

"//     The answer is that, yes we can but: we need to use four memory banks\n"

"//     to do it properly.  These four banks are defined by the two bits\n"

"//     to do it properly.  These four banks are defined by the two bits\n"

"//     that determine the top and bottom of the correct address.  Larger\n"

"//     that determine the top and bottom of the correct address.  Larger\n"

"//     FFT\'s would require more memories.\n"

"//     FFT\'s would require more memories.\n"

"//\n"

"//\n"

"//\n");

"//\n");

        fprintf(fp,

        fprintf(fp,

"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"

"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"

        "\t\to_out_0, o_out_1, o_sync);\n"

        "\t\to_out_0, o_out_1, o_sync);\n"

        "\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"

        "\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"

        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"

        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"

        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"

        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"

        "\toutput\treg\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"

        "\toutput\treg\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"

        "\toutput\treg\t\t\to_sync;\n"

        "\toutput\treg\t\t\to_sync;\n"

"\n"

"\n"

        "\treg\tin_reset;\n"

        "\treg\tin_reset;\n"

        "\treg\t[(LGSIZE):0]\tiaddr;\n"

        "\treg\t[(LGSIZE):0]\tiaddr;\n"

        "\treg\t[(2*WIDTH-1):0]\tmem_0e [0:((1<<(LGSIZE-1))-1)];\n"

        "\treg\t[(2*WIDTH-1):0]\tmem_0e [0:((1<<(LGSIZE-1))-1)];\n"

        "\treg\t[(2*WIDTH-1):0]\tmem_0o [0:((1<<(LGSIZE-1))-1)];\n"

        "\treg\t[(2*WIDTH-1):0]\tmem_0o [0:((1<<(LGSIZE-1))-1)];\n"

        "\treg\t[(2*WIDTH-1):0]\tmem_1e [0:((1<<(LGSIZE-1))-1)];\n"

        "\treg\t[(2*WIDTH-1):0]\tmem_1e [0:((1<<(LGSIZE-1))-1)];\n"

        "\treg\t[(2*WIDTH-1):0]\tmem_1o [0:((1<<(LGSIZE-1))-1)];\n"

        "\treg\t[(2*WIDTH-1):0]\tmem_1o [0:((1<<(LGSIZE-1))-1)];\n"

"\n"

"\n"

        "\twire\t[(2*LGSIZE-1):0]       braddr;\n"

        "\twire\t[(2*LGSIZE-1):0]       braddr;\n"

        "\tgenvar\tk;\n"

        "\tgenvar\tk;\n"

        "\tgenerate for(k=0; k<LGSIZE; k++)\n"

        "\tgenerate for(k=0; k<LGSIZE; k++)\n"

                "\t\tassign braddr[k] = iaddr[LGSIZE-1-k];\n"

                "\t\tassign braddr[k] = iaddr[LGSIZE-1-k];\n"

        "\tendgenerate\n"

        "\tendgenerate\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_rst)\n"

                "\t\tif (i_rst)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tiaddr <= 0;\n"

                        "\t\t\tiaddr <= 0;\n"

                        "\t\t\tin_reset <= 1'b1;\n"

                        "\t\t\tin_reset <= 1'b1;\n"

                "\t\tend else if (i_ce)\n"

                "\t\tend else if (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tif (iaddr[(LGSIZE-1)])\n"

                        "\t\t\tif (iaddr[(LGSIZE-1)])\n"

                        "\t\t\tbegin\n"

                        "\t\t\tbegin\n"

                                "\t\t\t\tmem_1e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"

                                "\t\t\t\tmem_1e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"

                                "\t\t\t\tmem_1o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"

                                "\t\t\t\tmem_1o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"

                        "\t\t\tend else begin\n"

                        "\t\t\tend else begin\n"

                                "\t\t\t\tmem_0e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"

                                "\t\t\t\tmem_0e[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_0;\n"

                                "\t\t\t\tmem_0o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"

                                "\t\t\t\tmem_0o[{iaddr[LGSIZE],iaddr[(LGSIZE-2):1]}] <= i_in_1;\n"

                        "\t\t\tend\n"

                        "\t\t\tend\n"

                        "\t\t\tiaddr <= iaddr + 2;\n"

                        "\t\t\tiaddr <= iaddr + 2;\n"

                        "\t\t\tif (&iaddr[(LGSIZE-1):1])\n"

                        "\t\t\tif (&iaddr[(LGSIZE-1):1])\n"

                                "\t\t\t\tin_reset <= 1'b0;\n"

                                "\t\t\t\tin_reset <= 1'b0;\n"

                        "\t\t\tif (in_reset)\n"

                        "\t\t\tif (in_reset)\n"

                        "\t\t\tbegin\n"

                        "\t\t\tbegin\n"

                                "\t\t\t\to_out_0 <= {(2*WIDTH){1'b0}};\n"

                                "\t\t\t\to_out_0 <= {(2*WIDTH){1'b0}};\n"

                                "\t\t\t\to_out_1 <= {(2*WIDTH){1'b0}};\n"

                                "\t\t\t\to_out_1 <= {(2*WIDTH){1'b0}};\n"

                                "\t\t\t\to_sync <= 1'b0;\n"

                                "\t\t\t\to_sync <= 1'b0;\n"

                        "\t\t\tend else\n"

                        "\t\t\tend else\n"

                        "\t\t\tbegin\n"

                        "\t\t\tbegin\n"

                                "\t\t\t\tif (braddr[0])\n"

                                "\t\t\t\tif (braddr[0])\n"

                                "\t\t\t\tbegin\n"

                                "\t\t\t\tbegin\n"

"\t\t\t\t\to_out_0 <= mem_0o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"

"\t\t\t\t\to_out_0 <= mem_0o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"

"\t\t\t\t\to_out_1 <= mem_1o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"

"\t\t\t\t\to_out_1 <= mem_1o[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"

                                "\t\t\t\tend else begin\n"

                                "\t\t\t\tend else begin\n"

"\t\t\t\t\to_out_0 <= mem_0e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"

"\t\t\t\t\to_out_0 <= mem_0e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"

"\t\t\t\t\to_out_1 <= mem_1e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"

"\t\t\t\t\to_out_1 <= mem_1e[{~iaddr[LGSIZE],braddr[(LGSIZE-2):1]}];\n"

                                "\t\t\t\tend\n"

                                "\t\t\t\tend\n"

                                "\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-1):0]);\n"

                                "\t\t\t\to_sync <= ~(|iaddr[(LGSIZE-1):0]);\n"

                        "\t\t\tend\n"

                        "\t\t\tend\n"

                "\t\tend\n"

                "\t\tend\n"

"\n"

"\n"

"endmodule;\n");

"endmodule;\n");

        fclose(fp);

        fclose(fp);

void    build_butterfly(const char *fname) {

void    build_butterfly(const char *fname, int xtracbits) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        fprintf(fp,

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Filename:   butterfly.v\n"

"// Filename:   butterfly.v\n"

"//\n"

"//\n"

"// Project:    %s\n"

"// Project:    %s\n"

"//\n"

"//\n"

"// Purpose:    This routine caculates a butterfly for a decimation\n"

"// Purpose:    This routine caculates a butterfly for a decimation\n"

"//             in frequency version of an FFT.  Specifically, given\n"

"//             in frequency version of an FFT.  Specifically, given\n"

"//             complex Left and Right values together with a \n"

"//             complex Left and Right values together with a \n"

"//             coefficient, the output of this routine is given\n"

"//             coefficient, the output of this routine is given\n"

"//             by:\n"

"//             by:\n"

"//\n"

"//\n"

"//             L' = L + R\n"

"//             L' = L + R\n"

"//             R' = (L - R)*C\n"

"//             R' = (L - R)*C\n"

"//\n"

"//\n"

"//             The rest of the junk below handles timing (mostly),\n"

"//             The rest of the junk below handles timing (mostly),\n"

"//             to make certain that L' and R' reach the output at\n"

"//             to make certain that L' and R' reach the output at\n"

"//             the same clock.  Further, just to make certain\n"

"//             the same clock.  Further, just to make certain\n"

"//             that is the case, an 'aux' input exists.  This\n"

"//             that is the case, an 'aux' input exists.  This\n"

"//             aux value will come out of this routine synchronized\n"

"//             aux value will come out of this routine synchronized\n"

"//             to the values it came in with.  (i.e., both L', R',\n"

"//             to the values it came in with.  (i.e., both L', R',\n"

"//             and aux all have the same delay.)  Hence, a caller\n"

"//             and aux all have the same delay.)  Hence, a caller\n"

"//             of this routine may set aux on the first input with\n"

"//             of this routine may set aux on the first input with\n"

"//             valid data, and then wait to see aux set on the output\n"

"//             valid data, and then wait to see aux set on the output\n"

"//             to know when to find the first output with valid data.\n"

"//             to know when to find the first output with valid data.\n"

"//\n"

"//\n"

"//             All bits are preserved until the very last clock,\n"

"//             All bits are preserved until the very last clock,\n"

"//             where any more bits than OWIDTH will be quietly\n"

"//             where any more bits than OWIDTH will be quietly\n"

"//             discarded.\n"

"//             discarded.\n"

"//\n"

"//\n"

"//             This design features no overflow checking.\n"

"//             This design features no overflow checking.\n"

"// \n"

"// \n"

"// Notes:\n"

"// Notes:\n"

"//             CORDIC:\n"

"//             CORDIC:\n"

"//             Much as we would like, we can't use a cordic here.\n"

"//             Much as we would like, we can't use a cordic here.\n"

"//             The goal is to accomplish an FFT, as defined, and a\n"

"//             The goal is to accomplish an FFT, as defined, and a\n"

"//             CORDIC places a scale factor onto the data.  Removing\n"

"//             CORDIC places a scale factor onto the data.  Removing\n"

"//             the scale factor would cost a two multiplies, which\n"

"//             the scale factor would cost a two multiplies, which\n"

"//             is precisely what we are trying to avoid.\n"

"//             is precisely what we are trying to avoid.\n"

"//\n"

"//\n"

"//\n"

"//\n"

"//             3-MULTIPLIES:\n"

"//             3-MULTIPLIES:\n"

"//             It should also be possible to do this with three \n"

"//             It should also be possible to do this with three \n"

"//             multiplies and an extra two addition cycles.  \n"

"//             multiplies and an extra two addition cycles.  \n"

"//\n"

"//\n"

"//             We want\n"

"//             We want\n"

"//                     R+I = (a + jb) * (c + jd)\n"

"//                     R+I = (a + jb) * (c + jd)\n"

"//                     R+I = (ac-bd) + j(ad+bc)\n"

"//                     R+I = (ac-bd) + j(ad+bc)\n"

"//             We multiply\n"

"//             We multiply\n"

"//                     P1 = ac\n"

"//                     P1 = ac\n"

"//                     P2 = bd\n"

"//                     P2 = bd\n"

"//                     P3 = (a+b)(c+d)\n"

"//                     P3 = (a+b)(c+d)\n"

"//             Then \n"

"//             Then \n"

"//                     R+I=(P1-P2)+j(P3-P2-P1)\n"

"//                     R+I=(P1-P2)+j(P3-P2-P1)\n"

"//\n"

"//\n"

"//             WIDTHS:\n"

"//             WIDTHS:\n"

"//             On multiplying an X width number by an\n"

"//             On multiplying an X width number by an\n"

"//             Y width number, X>Y, the result should be (X+Y)\n"

"//             Y width number, X>Y, the result should be (X+Y)\n"

"//             bits, right?\n"

"//             bits, right?\n"

"//             -2^(X-1) <= a <= 2^(X-1) - 1\n"

"//             -2^(X-1) <= a <= 2^(X-1) - 1\n"

"//             -2^(Y-1) <= b <= 2^(Y-1) - 1\n"

"//             -2^(Y-1) <= b <= 2^(Y-1) - 1\n"

"//             (2^(Y-1)-1)*(-2^(X-1)) <= ab <= 2^(X-1)2^(Y-1)\n"

"//             (2^(Y-1)-1)*(-2^(X-1)) <= ab <= 2^(X-1)2^(Y-1)\n"

"//             -2^(X+Y-2)+2^(X-1) <= ab <= 2^(X+Y-2) <= 2^(X+Y-1) - 1\n"

"//             -2^(X+Y-2)+2^(X-1) <= ab <= 2^(X+Y-2) <= 2^(X+Y-1) - 1\n"

"//             -2^(X+Y-1) <= ab <= 2^(X+Y-1)-1\n"

"//             -2^(X+Y-1) <= ab <= 2^(X+Y-1)-1\n"

"//             YUP!  But just barely.  Do this and you'll really want\n"

"//             YUP!  But just barely.  Do this and you'll really want\n"

"//             to drop a bit, although you will risk overflow in so\n"

"//             to drop a bit, although you will risk overflow in so\n"

"//             doing.\n"

"//             doing.\n"

"//\n%s"

"//\n%s"

"//\n", prjname, creator);

"//\n", prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

        fprintf(fp,

"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"

"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"

                "\t\to_left, o_right, o_aux);\n"

                "\t\to_left, o_right, o_aux);\n"

        "\t// Public changeable parameters ...\n"

        "\t// Public changeable parameters ...\n"

        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+4,OWIDTH=IWIDTH+1;\n"

        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"

        "\t// Parameters specific to the core that should not be changed.\n"

        "\t// Parameters specific to the core that should not be changed.\n"

        "\tparameter    MPYDELAY=5'd20, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"

        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"

                        "\t\t\tSHIFT=0, ROUND=0;\n"

                        "\t\t\tSHIFT=0, ROUND=0;\n"

        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"

        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"

        "\t// this value is fractional, then round up to the nearest\n"

        "\t// this value is fractional, then round up to the nearest\n"

        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"

        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"

        "\tparameter\tLGDELAY=5;\n"

        "\tparameter\tLGDELAY=%d;\n"

        "\tinput\t\ti_clk, i_rst, i_ce;\n"

        "\tinput\t\ti_clk, i_rst, i_ce;\n"

        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"

        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"

        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"

        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"

        "\tinput\t\ti_aux;\n"

        "\tinput\t\ti_aux;\n"

        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"

        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"

        "\toutput\twire o_aux;\n"

        "\toutput\twire o_aux;\n"

"\n"

        "\n", 16, xtracbits, lgdelay(16,xtracbits),

        bflydelay(16, xtracbits), lgdelay(16,xtracbits));

        fprintf(fp,

        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"

        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"

"\n"

"\n"

        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"

        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"

        "\treg\t\t\t\tr_aux, r_aux_2;\n"

        "\treg\t\t\t\tr_aux, r_aux_2;\n"

        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"

        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"

        "\twire\tsigned\t[(CWIDTH-1):0]\tr_coef_r, r_coef_i;\n"

        "\twire\tsigned\t[(CWIDTH-1):0]\tr_coef_r, r_coef_i;\n"

        "\tassign\tr_coef_r  = r_coef_2[ (2*CWIDTH-1):(CWIDTH)];\n"

        "\tassign\tr_coef_r  = r_coef_2[ (2*CWIDTH-1):(CWIDTH)];\n"

        "\tassign\tr_coef_i  = r_coef_2[ (  CWIDTH-1):0];\n"

        "\tassign\tr_coef_i  = r_coef_2[ (  CWIDTH-1):0];\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"

        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"

        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"

        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"

        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"

"\n"

"\n"

        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"

        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"

"\n"

"\n"

        "\treg  [(LGDELAY-1):0] fifo_addr;\n"

        "\treg  [(LGDELAY-1):0] fifo_addr;\n"

        "\twire [(LGDELAY-1):0] fifo_read_addr;\n"

        "\twire [(LGDELAY-1):0] fifo_read_addr;\n"

        "\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n"

        "\tassign\tfifo_read_addr = fifo_addr - MPYDELAY;\n"

        "\treg  [(2*IWIDTH+2):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"

        "\treg  [(2*IWIDTH+2):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"

        "\treg\t\t\t\tovalid;\n"

        "\treg\t\t\t\tovalid;\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\t// Set up the input to the multiply\n"

        "\t// Set up the input to the multiply\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\t// One clock just latches the inputs\n"

                        "\t\t\t// One clock just latches the inputs\n"

                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"

                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"

                        "\t\t\tr_right <= i_right;\n"

                        "\t\t\tr_right <= i_right;\n"

                        "\t\t\tr_aux <= i_aux;\n"

                        "\t\t\tr_aux <= i_aux;\n"

                        "\t\t\tr_coef  <= i_coef;\n"

                        "\t\t\tr_coef  <= i_coef;\n"

                        "\t\t\t// Next clock adds/subtracts\n"

                        "\t\t\t// Next clock adds/subtracts\n"

                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"

                        "\t\t\tr_sum_r <= r_left_r + r_right_r; // Now IWIDTH+1 bits\n"

                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"

                        "\t\t\tr_sum_i <= r_left_i + r_right_i;\n"

                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"

                        "\t\t\tr_dif_r <= r_left_r - r_right_r;\n"

                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"

                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"

                        "\t\t\t// Other inputs are simply delayed on second clock\n"

                        "\t\t\t// Other inputs are simply delayed on second clock\n"

                        "\t\t\tr_aux_2 <= r_aux;\n"

                        "\t\t\tr_aux_2 <= r_aux;\n"

                        "\t\t\tr_coef_2<= r_coef;\n"

                        "\t\t\tr_coef_2<= r_coef;\n"

        "\t\tend\n"

        "\t\tend\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"

        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"

        "\t// to be multiplied, but yet we still need the results in sync\n"

        "\t// to be multiplied, but yet we still need the results in sync\n"

        "\t// with the answer when it is ready.\n"

        "\t// with the answer when it is ready.\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_rst)\n"

                "\t\tif (i_rst)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tfifo_addr <= 0;\n"

                        "\t\t\tfifo_addr <= 0;\n"

                        "\t\t\tovalid <= 1'b0;\n"

                        "\t\t\tovalid <= 1'b0;\n"

                "\t\tend else if (i_ce)\n"

                "\t\tend else if (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\t// Need to delay the sum side--nothing else happens\n"

                        "\t\t\t// Need to delay the sum side--nothing else happens\n"

                        "\t\t\t// to it, but it needs to stay synchronized with the\n"

                        "\t\t\t// to it, but it needs to stay synchronized with the\n"

                        "\t\t\t// right side.\n"

                        "\t\t\t// right side.\n"

                        "\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"

                        "\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"

                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"

                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"

                        "\t\t\tovalid <= (ovalid) || (fifo_addr > MPYDELAY+1);\n"

"\n"

                        "\t\t\tovalid <= (ovalid) || (fifo_addr > (MPYDELAY+1));\n"

                "\t\tend\n"

                "\t\tend\n"

"\n"

"\n"

        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"

        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"

        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"

        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"

        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"

        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"

        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"

        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"

"\n"

"\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\t// Multiply output is always a width of the sum of the widths of\n"

        "\t// Multiply output is always a width of the sum of the widths of\n"

        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"

        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"

        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"

        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"

        "\t// accumulate a bit (or two) each.  However, this approach to a\n"

        "\t// accumulate a bit (or two) each.  However, this approach to a\n"

        "\t// three multiply complex multiply cannot increase the total\n"

        "\t// three multiply complex multiply cannot increase the total\n"

        "\t// number of bits in our final output.  We\'ll take care of\n"

        "\t// number of bits in our final output.  We\'ll take care of\n"

        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"

        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"

        "\t// below.\n"

        "\t// below.\n"

"\n"

"\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"

        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"

        "\t// by doing three multiplies we accomplish the work of four.\n"

        "\t// by doing three multiplies we accomplish the work of four.\n"

        "\t// Let\'s prove to ourselves that this works ... We wish to\n"

        "\t// Let\'s prove to ourselves that this works ... We wish to\n"

        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"

        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"

        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"

        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"

        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"

        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"

        "\t// We do this by calculating the intermediate products P1, P2,\n"

        "\t// We do this by calculating the intermediate products P1, P2,\n"

        "\t// and P3 as\n"

        "\t// and P3 as\n"

        "\t//\tP1 = ac\n"

        "\t//\tP1 = ac\n"

        "\t//\tP2 = bd\n"

        "\t//\tP2 = bd\n"

        "\t//\tP3 = (a + b) * (c + d)\n"

        "\t//\tP3 = (a + b) * (c + d)\n"

        "\t// and then complete our final answer with\n"

        "\t// and then complete our final answer with\n"

        "\t//\tac - bd = P1 - P2 (this checks)\n"

        "\t//\tac - bd = P1 - P2 (this checks)\n"

        "\t//\tad + bc = P3 - P2 - P1\n"

        "\t//\tad + bc = P3 - P2 - P1\n"

        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"

        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"

        "\t//\t        = bc + ad (this checks)\n"

        "\t//\t        = bc + ad (this checks)\n"

"\n"

"\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\t// This should really be based upon an IF, such as in\n"

        "\t// This should really be based upon an IF, such as in\n"

        "\t// if (IWIDTH < CWIDTH) then ...\n"

        "\t// if (IWIDTH < CWIDTH) then ...\n"

        "\t// However, this is the only (other) way I know to do it.\n"

        "\t// However, this is the only (other) way I know to do it.\n"

        "\tgenerate\n"

        "\tgenerate\n"

        "\tif (CWIDTH < IWIDTH+1)\n"

        "\tif (CWIDTH < IWIDTH+1)\n"

        "\tbegin\n"

        "\tbegin\n"

                "\t\t// We need to pad these first two multiplies by an extra\n"

                "\t\t// We need to pad these first two multiplies by an extra\n"

                "\t\t// bit just to keep them aligned with the third,\n"

                "\t\t// bit just to keep them aligned with the third,\n"

                "\t\t// simpler, multiply.\n"

                "\t\t// simpler, multiply.\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"

                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"

                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"

                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"

                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"

                        "\t\t\t\tir_coef_i+ir_coef_r,\n"

                        "\t\t\t\tir_coef_i+ir_coef_r,\n"

                        "\t\t\t\tr_dif_r + r_dif_i,\n"

                        "\t\t\t\tr_dif_r + r_dif_i,\n"

                        "\t\t\t\tp_three);\n"

                        "\t\t\t\tp_three);\n"

        "\tend else begin\n"

        "\tend else begin\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"

                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"

                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"

                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"

                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"

                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"

                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"

                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"

                                "\t\t\t\tr_dif_r+r_dif_i,\n"

                                "\t\t\t\tr_dif_r+r_dif_i,\n"

                                "\t\t\t\tir_coef_i+ir_coef_r,\n"

                                "\t\t\t\tir_coef_i+ir_coef_r,\n"

                                "\t\t\t\tp_three);\n"

                                "\t\t\t\tp_three);\n"

        "\tend\n"

        "\tend\n"

        "\tendgenerate\n"

        "\tendgenerate\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\t// These values are held in memory and delayed during the\n"

        "\t// These values are held in memory and delayed during the\n"

        "\t// multiply.  Here, we recover them.  During the multiply,\n"

        "\t// multiply.  Here, we recover them.  During the multiply,\n"

        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"

        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"

        "\t// therefore, the left_x values need to be right shifted by\n"

        "\t// therefore, the left_x values need to be right shifted by\n"

        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"

        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"

        "\t// extension.\n"

        "\t// extension.\n"

        "\twire aux;\n"

        "\twire aux;\n"

        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"

        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"

        "\treg\t\t[(2*IWIDTH+2):0]      fifo_read;\n"

        "\treg\t\t[(2*IWIDTH+2):0]      fifo_read;\n"

        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\taux = fifo_read[2*IWIDTH+2];\n"

        "\tassign\taux = fifo_read[2*IWIDTH+2];\n"

"\n"

"\n"

"\n"

"\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"

                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"

                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] rnd;\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] rnd;\n"

        "\tgenerate\n"

        "\tgenerate\n"

        "\tif ((~ROUND)||(CWIDTH+IWIDTH-OWIDTH-SHIFT<1))\n"

        "\tif ((~ROUND)||(CWIDTH+IWIDTH-OWIDTH-SHIFT<1))\n"

                "\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"

                "\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"

        "\telse\n"

        "\telse\n"

                "\t\tassign rnd = ({ {(OWIDTH+3+SHIFT){1'b0}},1'b1,\n"

                "\t\tassign rnd = ({ {(OWIDTH+3+SHIFT){1'b0}},1'b1,\n"

                "\t\t\t\t{(CWIDTH+IWIDTH-OWIDTH-SHIFT-1){1'b0}} });\n"

                "\t\t\t\t{(CWIDTH+IWIDTH-OWIDTH-SHIFT-1){1'b0}} });\n"

        "\tendgenerate\n"

        "\tendgenerate\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\t// First clock, recover all values\n"

                        "\t\t\t// First clock, recover all values\n"

                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"

                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"

                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"

                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"

                        "\t\t\t// although they only need to be (IWIDTH+1)\n"

                        "\t\t\t// although they only need to be (IWIDTH+1)\n"

                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"

                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"

                        "\t\t\t// extra bits we need to get rid of.)\n"

                        "\t\t\t// extra bits we need to get rid of.)\n"

                        "\t\t\tmpy_r <= p_one - p_two;\n"

                        "\t\t\tmpy_r <= p_one - p_two;\n"

                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"

                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"

"\n"

"\n"

                        "\t\t\t// Second clock, round and latch for final clock\n"

                        "\t\t\t// Second clock, round and latch for final clock\n"

                        "\t\t\tb_right_r <= mpy_r + rnd;\n"

                        "\t\t\tb_right_r <= mpy_r + rnd;\n"

                        "\t\t\tb_right_i <= mpy_i + rnd;\n"

                        "\t\t\tb_right_i <= mpy_i + rnd;\n"

                        "\t\t\tb_left_r <= { {2{fifo_r[(IWIDTH+CWIDTH)]}},fifo_r } + rnd;\n"

                        "\t\t\tb_left_r <= { {2{fifo_r[(IWIDTH+CWIDTH)]}},fifo_r } + rnd;\n"

                        "\t\t\tb_left_i <= { {2{fifo_i[(IWIDTH+CWIDTH)]}},fifo_i } + rnd;\n"

                        "\t\t\tb_left_i <= { {2{fifo_i[(IWIDTH+CWIDTH)]}},fifo_i } + rnd;\n"

                        "\t\t\to_aux <= aux & ovalid;\n"

                        "\t\t\to_aux <= aux & ovalid;\n"

                "\t\tend\n"

                "\t\tend\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\t// Final clock--clock and remove unnecessary bits.\n"

        "\t// Final clock--clock and remove unnecessary bits.\n"

        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"

        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"

        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"

        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"

        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"

        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"

        "\t// them, but the actual values will never fill all these bits.\n"

        "\t// them, but the actual values will never fill all these bits.\n"

        "\t// In particular, we only need:\n"

        "\t// In particular, we only need:\n"

        "\t//\t IWIDTH bits for the input\n"

        "\t//\t IWIDTH bits for the input\n"

        "\t//\t     +1 bit for the add/subtract\n"

        "\t//\t     +1 bit for the add/subtract\n"

        "\t//\t+CWIDTH bits for the coefficient multiply\n"

        "\t//\t+CWIDTH bits for the coefficient multiply\n"

        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"

        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"

        "\t//\t ------\n"

        "\t//\t ------\n"

        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"

        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"

        "\t//\n"

        "\t//\n"

        "\t// However, the coefficient multiply multiplied by a maximum value\n"

        "\t// However, the coefficient multiply multiplied by a maximum value\n"

        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"

        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"

        "\t//\t   IWIDTH bits for the input\n"

        "\t//\t   IWIDTH bits for the input\n"

        "\t//\t       +1 bit for the add/subtract\n"

        "\t//\t       +1 bit for the add/subtract\n"

        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"

        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"

        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"

        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"

        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"

        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"

        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"

        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"

        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"

        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"

        "\t// or if he wishes to arbitrarily shift some of these off (via\n"

        "\t// or if he wishes to arbitrarily shift some of these off (via\n"

        "\t// SHIFT) we accomplish that here.\n"

        "\t// SHIFT) we accomplish that here.\n"

        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

"\n"

"\n"

        "\t// As a final step, we pack our outputs into two packed two\'s\n"

        "\t// As a final step, we pack our outputs into two packed two\'s\n"

        "\t// complement numbers per output word, so that each output word\n"

        "\t// complement numbers per output word, so that each output word\n"

        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"

        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"

        "\t// portion and the bottom half being the imaginary portion.\n"

        "\t// portion and the bottom half being the imaginary portion.\n"

        "\tassign       o_left = { o_left_r, o_left_i };\n"

        "\tassign       o_left = { o_left_r, o_left_i };\n"

        "\tassign       o_right= { o_right_r,o_right_i};\n"

        "\tassign       o_right= { o_right_r,o_right_i};\n"

"\n"

"\n"

"endmodule\n");

"endmodule\n");

        fclose(fp);

        fclose(fp);

void    build_stage(const char *fname, int stage, bool odd, int nbits, bool inv, int xtra) {

void    build_stage(const char *fname, int stage, bool odd, int nbits, bool inv, int xtra) {

        FILE    *fstage = fopen(fname, "w");

        FILE    *fstage = fopen(fname, "w");

        int     cbits = nbits + xtra;

        int     cbits = nbits + xtra;

        if ((cbits * 2) >= sizeof(long long)*8) {

        if ((cbits * 2) >= sizeof(long long)*8) {

                fprintf(stderr, "ERROR: CMEM Coefficient precision requested overflows long long data type.\n");

                fprintf(stderr, "ERROR: CMEM Coefficient precision requested overflows long long data type.\n");

                exit(-1);

                exit(-1);

        if (fstage == NULL) {

        if (fstage == NULL) {

                fprintf(stderr, "ERROR: Could not open %s for writing!\n", fname);

                fprintf(stderr, "ERROR: Could not open %s for writing!\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                fprintf(stderr, "Attempting to continue, but this file will be missing.\n");

                fprintf(stderr, "Attempting to continue, but this file will be missing.\n");

                return;

                return;

        fprintf(fstage,

        fprintf(fstage,

"////////////////////////////////////////////////////////////////////////////\n"

"////////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Filename:   %sfftstage_%c%d.v\n"

"// Filename:   %sfftstage_%c%d.v\n"

"//\n"

"//\n"

"// Project:    %s\n"

"// Project:    %s\n"

"//\n"

"//\n"

"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"

"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"

"//             be used by a FFT core compiler to generate FFTs which may be\n"

"//             be used by a FFT core compiler to generate FFTs which may be\n"

"//             used as part of an FFT core.  Specifically, this file \n"

"//             used as part of an FFT core.  Specifically, this file \n"

"//             encapsulates the options of an FFT-stage.  For any 2^N length\n"

"//             encapsulates the options of an FFT-stage.  For any 2^N length\n"

"//             FFT, there shall be (N-1) of these stages.  \n"

"//             FFT, there shall be (N-1) of these stages.  \n"

"//\n%s"

"//\n%s"

"//\n",

"//\n",

                (inv)?"i":"", (odd)?'o':'e', stage*2, prjname, creator);

                (inv)?"i":"", (odd)?'o':'e', stage*2, prjname, creator);

        fprintf(fstage, "%s", cpyleft);

        fprintf(fstage, "%s", cpyleft);

        fprintf(fstage, "module\t%sfftstage_%c%d(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n",

        fprintf(fstage, "module\t%sfftstage_%c%d(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n",

                (inv)?"i":"", (odd)?'o':'e', stage*2);

                (inv)?"i":"", (odd)?'o':'e', stage*2);

        // These parameter values are useless at this point--they are to be

        // These parameter values are useless at this point--they are to be

        // replaced by the parameter values in the calling program.  Only

        // replaced by the parameter values in the calling program.  Only

        // problem is, the CWIDTH needs to match exactly!

        // problem is, the CWIDTH needs to match exactly!

        fprintf(fstage, "\tparameter\tIWIDTH=%d,CWIDTH=%d,OWIDTH=%d;\n",

        fprintf(fstage, "\tparameter\tIWIDTH=%d,CWIDTH=%d,OWIDTH=%d;\n",

                nbits, cbits, nbits+1);

                nbits, cbits, nbits+1);

        fprintf(fstage,

        fprintf(fstage,

"\t// Parameters specific to the core that should be changed when this\n"

"\t// Parameters specific to the core that should be changed when this\n"

"\t// core is built ... Note that the minimum LGSPAN (the base two log\n"

"\t// core is built ... Note that the minimum LGSPAN (the base two log\n"

"\t// of the span, or the base two log of the current FFT size) is 3.\n"

"\t// of the span, or the base two log of the current FFT size) is 3.\n"

"\t// Smaller spans (i.e. the span of 2) must use the dblstage module.\n"

"\t// Smaller spans (i.e. the span of 2) must use the dblstage module.\n"

"\tparameter\tLGWIDTH=11, LGSPAN=9, LGBDLY=5, BFLYSHIFT=0;\n");

"\tparameter\tLGWIDTH=11, LGSPAN=9, LGBDLY=5, BFLYSHIFT=0;\n");

        fprintf(fstage,

        fprintf(fstage,

"\tinput                                        i_clk, i_rst, i_ce, i_sync;\n"

"\tinput                                        i_clk, i_rst, i_ce, i_sync;\n"

"\tinput                [(2*IWIDTH-1):0]        i_data;\n"

"\tinput                [(2*IWIDTH-1):0]        i_data;\n"

"\toutput       reg     [(2*OWIDTH-1):0]        o_data;\n"

"\toutput       reg     [(2*OWIDTH-1):0]        o_data;\n"

"\toutput       reg                             o_sync;\n"

"\toutput       reg                             o_sync;\n"

"\n"

"\n"

"\treg  wait_for_sync;\n"

"\treg  wait_for_sync;\n"

"\treg  [(2*IWIDTH-1):0]        ib_a, ib_b;\n"

"\treg  [(2*IWIDTH-1):0]        ib_a, ib_b;\n"

"\treg  [(2*CWIDTH-1):0]        ib_c;\n"

"\treg  [(2*CWIDTH-1):0]        ib_c;\n"

"\treg  ib_sync;\n"

"\treg  ib_sync;\n"

"\n"

"\n"

"\treg  b_started;\n"

"\treg  b_started;\n"

"\twire ob_sync;\n"

"\twire ob_sync;\n"

"\twire [(2*OWIDTH-1):0]        ob_a, ob_b;\n");

"\twire [(2*OWIDTH-1):0]        ob_a, ob_b;\n");

        fprintf(fstage,

        fprintf(fstage,

"\n"

"\n"

"\t// %scmem is defined as an array of real and complex values,\n"

"\t// %scmem is defined as an array of real and complex values,\n"

"\t// where the top CWIDTH bits are the real value and the bottom\n"

"\t// where the top CWIDTH bits are the real value and the bottom\n"

"\t// CWIDTH bits are the imaginary value.\n"

"\t// CWIDTH bits are the imaginary value.\n"

"\t//\n"

"\t//\n"

"\t// cmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),\n"

"\t// cmem[i] = { (2^(CWIDTH-2)) * cos(2*pi*i/(2^LGWIDTH)),\n"

"\t//           (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };\n"

"\t//           (2^(CWIDTH-2)) * sin(2*pi*i/(2^LGWIDTH)) };\n"

"\t//\n"

"\t//\n"

"\treg  [(2*CWIDTH-1):0]        %scmem [0:((1<<LGSPAN)-1)];\n"

"\treg  [(2*CWIDTH-1):0]        %scmem [0:((1<<LGSPAN)-1)];\n"

"\tinitial\t$readmemh(\"%scmem_%c%d.hex\",%scmem);\n\n",

"\tinitial\t$readmemh(\"%scmem_%c%d.hex\",%scmem);\n\n",

                (inv)?"i":"", (inv)?"i":"",

                (inv)?"i":"", (inv)?"i":"",

                (inv)?"i":"", (odd)?'o':'e',stage<<1,

                (inv)?"i":"", (odd)?'o':'e',stage<<1,

                (inv)?"i":"");

                (inv)?"i":"");

                FILE    *cmem;

                FILE    *cmem;

                char    memfile[128], *ptr;

                strncpy(memfile, fname, 125);

                        char    *memfile, *ptr;

                        memfile = new char[strlen(fname)+128];

                        strcpy(memfile, fname);

                if ((NULL != (ptr = strrchr(memfile, '/')))&&(ptr>memfile)) {

                if ((NULL != (ptr = strrchr(memfile, '/')))&&(ptr>memfile)) {

                        ptr++;

                                ptr++;

                        sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2);

                                sprintf(ptr, "%scmem_%c%d.hex", (inv)?"i":"", (odd)?'o':'e', stage*2);

                } else {

                        } else {

                        sprintf(memfile, "%s/%scmem_%c%d.hex",

                                sprintf(memfile, "%s/%scmem_%c%d.hex",

                                COREDIR, (inv)?"i":"",

                                        COREDIR, (inv)?"i":"",

                                (odd)?'o':'e', stage*2);

                                        (odd)?'o':'e', stage*2);

                // strcpy(&memfile[strlen(memfile)-2], ".hex");

                        // strcpy(&memfile[strlen(memfile)-2], ".hex");

                cmem = fopen(memfile, "w");

                cmem = fopen(memfile, "w");

                        if (NULL == cmem) {

                                fprintf(stderr, "Could not open/write \'%s\' with FFT coefficients.\n", memfile);

                                perror("Err from O/S:");

                                exit(-2);

                        delete[] memfile;

                // fprintf(cmem, "// CBITS = %d, inv = %s\n", cbits, (inv)?"true":"false");

                // fprintf(cmem, "// CBITS = %d, inv = %s\n", cbits, (inv)?"true":"false");

                for(int i=0; i<stage/2; i++) {

                for(int i=0; i<stage/2; i++) {

                        int k = 2*i+odd;

                        int k = 2*i+odd;

                        double  W = ((inv)?1:-1)*2.0*M_PI*k/(double)(2*stage);

                        double  W = ((inv)?1:-1)*2.0*M_PI*k/(double)(2*stage);

                        double  c, s;

                        double  c, s;

                        long long ic, is, vl;

                        long long ic, is, vl;

                        c = cos(W); s = sin(W);

                        c = cos(W); s = sin(W);

                        ic = (long long)((double)((1ll<<(cbits-2)) * c + 0.5));

                        ic = (long long)((double)((1ll<<(cbits-2)) * c + 0.5));

                        is = (long long)((double)((1ll<<(cbits-2)) * s + 0.5));

                        is = (long long)((double)((1ll<<(cbits-2)) * s + 0.5));

                        vl = (ic & (~(-1ll << (cbits))));

                        vl = (ic & (~(-1ll << (cbits))));

                        vl <<= (cbits);

                        vl <<= (cbits);

                        vl |= (is & (~(-1ll << (cbits))));

                        vl |= (is & (~(-1ll << (cbits))));

                        fprintf(cmem, "%0*llx\n", ((cbits*2+3)/4), vl);

                        fprintf(cmem, "%0*llx\n", ((cbits*2+3)/4), vl);

/*

/*

                        fprintf(cmem, "%0*llx\t\t// %f+j%f -> %llx +j%llx\n",

                        fprintf(cmem, "%0*llx\t\t// %f+j%f -> %llx +j%llx\n",

                                ((cbits*2+3)/4), vl, c, s,

                                ((cbits*2+3)/4), vl, c, s,

                                ic & (~(-1ll<<(((cbits+3)/4)*4))),

                                ic & (~(-1ll<<(((cbits+3)/4)*4))),

                                is & (~(-1ll<<(((cbits+3)/4)*4))));

                                is & (~(-1ll<<(((cbits+3)/4)*4))));

*/

*/

                } fclose(cmem);

                } fclose(cmem);

        fprintf(fstage,

        fprintf(fstage,

"\treg  [(LGWIDTH-2):0]         iaddr;\n"

"\treg  [(LGWIDTH-2):0]         iaddr;\n"

"\treg  [(2*IWIDTH-1):0]        imem    [0:((1<<LGSPAN)-1)];\n"

"\treg  [(2*IWIDTH-1):0]        imem    [0:((1<<LGSPAN)-1)];\n"

"\n"

"\n"

"\treg  [LGSPAN:0]              oB;\n"

"\treg  [LGSPAN:0]              oB;\n"

"\treg  [(2*OWIDTH-1):0]        omem    [0:((1<<LGSPAN)-1)];\n"

"\treg  [(2*OWIDTH-1):0]        omem    [0:((1<<LGSPAN)-1)];\n"

"\n"

"\n"

"\talways @(posedge i_clk)\n"

"\talways @(posedge i_clk)\n"

        "\t\tif (i_rst)\n"

        "\t\tif (i_rst)\n"

        "\t\tbegin\n"

        "\t\tbegin\n"

                "\t\t\twait_for_sync <= 1'b1;\n"

                "\t\t\twait_for_sync <= 1'b1;\n"

                "\t\t\tiaddr <= 0;\n"

                "\t\t\tiaddr <= 0;\n"

                "\t\t\toB <= 0;\n"

                "\t\t\toB <= 0;\n"

                "\t\t\tib_sync   <= 1'b0;\n"

                "\t\t\tib_sync   <= 1'b0;\n"

                "\t\t\to_sync    <= 1'b0;\n"

                "\t\t\to_sync    <= 1'b0;\n"

                "\t\t\tb_started <= 1'b0;\n"

                "\t\t\tb_started <= 1'b0;\n"

        "\t\tend\n"

        "\t\tend\n"

        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

        "\t\tbegin\n"

        "\t\tbegin\n"

                "\t\t\t//\n"

                "\t\t\t//\n"

                "\t\t\t// First step: Record what we\'re not ready to use yet\n"

                "\t\t\t// First step: Record what we\'re not ready to use yet\n"

                "\t\t\t//\n"

                "\t\t\t//\n"

                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data;\n"

                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data;\n"

                "\t\t\tiaddr <= iaddr + 1;\n"

                "\t\t\tiaddr <= iaddr + 1;\n"

                "\t\t\twait_for_sync <= 1'b0;\n"

                "\t\t\twait_for_sync <= 1'b0;\n"

"\n"

"\n"

                "\t\t\t//\n"

                "\t\t\t//\n"

                "\t\t\t// Now, we have all the inputs, so let\'s feed the\n"

                "\t\t\t// Now, we have all the inputs, so let\'s feed the\n"

                "\t\t\t// butterfly\n"

                "\t\t\t// butterfly\n"

                "\t\t\t//\n"

                "\t\t\t//\n"

                "\t\t\tif (iaddr[LGSPAN])\n"

                "\t\t\tif (iaddr[LGSPAN])\n"

                "\t\t\tbegin\n"

                "\t\t\tbegin\n"

                        "\t\t\t\t// One input from memory, ...\n"

                        "\t\t\t\t// One input from memory, ...\n"

                        "\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"

                        "\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"

                        "\t\t\t\t// One input clocked in from the top\n"

                        "\t\t\t\t// One input clocked in from the top\n"

                        "\t\t\t\tib_b <= i_data;\n"

                        "\t\t\t\tib_b <= i_data;\n"

                        "\t\t\t\t// Set the sync to true on the very first\n"

                        "\t\t\t\t// Set the sync to true on the very first\n"

                        "\t\t\t\t// valid input in, and hence on the very\n"

                        "\t\t\t\t// valid input in, and hence on the very\n"

                        "\t\t\t\t// first valid data out per FFT.\n"

                        "\t\t\t\t// first valid data out per FFT.\n"

                        "\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"

                        "\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"

                        "\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"

                        "\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"

                "\t\t\tend else begin\n"

                "\t\t\tend else begin\n"

                        "\t\t\t\t// Just to make debugging easier, let\'s\n"

                        "\t\t\t\t// Just to make debugging easier, let\'s\n"

                        "\t\t\t\t// clear these registers.  That\'ll make\n"

                        "\t\t\t\t// clear these registers.  That\'ll make\n"

                        "\t\t\t\t// the transition easier to watch.\n"

                        "\t\t\t\t// the transition easier to watch.\n"

                        "\t\t\t\tib_a <= {(2*IWIDTH){1'b0}};\n"

                        "\t\t\t\tib_a <= {(2*IWIDTH){1'b0}};\n"

                        "\t\t\t\tib_b <= {(2*IWIDTH){1'b0}};\n"

                        "\t\t\t\tib_b <= {(2*IWIDTH){1'b0}};\n"

                        "\t\t\t\tib_sync <= 1'b0;\n"

                        "\t\t\t\tib_sync <= 1'b0;\n"

                "\t\t\tend\n"

                "\t\t\tend\n"

"\n"

"\n"

                "\t\t\t//\n"

                "\t\t\t//\n"

                "\t\t\t// Next step: recover the outputs from the butterfly\n"

                "\t\t\t// Next step: recover the outputs from the butterfly\n"

                "\t\t\t//\n"

                "\t\t\t//\n"

                "\t\t\tif ((ob_sync||b_started)&&(~oB[LGSPAN]))\n"

                "\t\t\tif ((ob_sync||b_started)&&(~oB[LGSPAN]))\n"

                "\t\t\tbegin // A butterfly output is available\n"

                "\t\t\tbegin // A butterfly output is available\n"

                        "\t\t\t\tb_started <= 1'b1;\n"

                        "\t\t\t\tb_started <= 1'b1;\n"

                        "\t\t\t\tomem[oB[(LGSPAN-1):0]] <= ob_b;\n"

                        "\t\t\t\tomem[oB[(LGSPAN-1):0]] <= ob_b;\n"

                        "\t\t\t\toB <= oB+1;\n"

                        "\t\t\t\toB <= oB+1;\n"

"\n"

"\n"

                        "\t\t\t\to_sync <= (ob_sync);\n"

                        "\t\t\t\to_sync <= (ob_sync);\n"

                        "\t\t\t\to_data <= ob_a;\n"

                        "\t\t\t\to_data <= ob_a;\n"

                "\t\t\tend else if (b_started)\n"

                "\t\t\tend else if (b_started)\n"

                "\t\t\tbegin // and keep outputting once you start--at a rate\n"

                "\t\t\tbegin // and keep outputting once you start--at a rate\n"

                "\t\t\t// of one guaranteed output per clock that has i_ce set.\n"

                "\t\t\t// of one guaranteed output per clock that has i_ce set.\n"

                        "\t\t\t\to_data <= omem[oB[(LGSPAN-1):0]];\n"

                        "\t\t\t\to_data <= omem[oB[(LGSPAN-1):0]];\n"

                        "\t\t\t\toB <= oB + 1;\n"

                        "\t\t\t\toB <= oB + 1;\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

                "\t\t\tend else\n"

                "\t\t\tend else\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

        "\t\tend\n"

        "\t\tend\n"

"\n", (inv)?"i":"");

"\n", (inv)?"i":"");

        fprintf(fstage,

        fprintf(fstage,

"\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"

"\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"

"\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"

"\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"

"\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"

"\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"

"\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n"

"\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n"

"endmodule;\n",

"endmodule;\n",

        lgdelay(nbits, xtra), (1<xtra)?(nbits+4):(nbits+xtra+3));

        lgdelay(nbits, xtra), bflydelay(nbits, xtra));

void    usage(void) {

void    usage(void) {

        fprintf(stderr,

        fprintf(stderr,

"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"

"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"

// "\tfftgen -i\n"

// "\tfftgen -i\n"

"\t-c <cbits>\tCauses all internal complex coefficients to be\n"

"\t-c <cbits>\tCauses all internal complex coefficients to be\n"

"\t\tlonger than the corresponding data bits, to help avoid\n"

"\t\tlonger than the corresponding data bits, to help avoid\n"

"\t\tcoefficient truncation errors.\n"

"\t\tcoefficient truncation errors.\n"

"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n"

"\t-d <dir>\tPlaces all of the generated verilog files into <dir>.\n"

"\t-f <size>\tSets the size of the FFT as the number of complex\n"

"\t-f <size>\tSets the size of the FFT as the number of complex\n"

"\t\tsamples input to the transform.\n"

"\t\tsamples input to the transform.\n"

"\t-n <nbits>\tSets the number of bits in the twos complement input\n"

"\t-n <nbits>\tSets the number of bits in the twos complement input\n"

"\t\tto the FFT routine.\n"

"\t\tto the FFT routine.\n"

"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"

"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"

"\t\tproduce.  Internal values greater than this value will be\n"

"\t\tproduce.  Internal values greater than this value will be\n"

"\t\ttruncated to this value.\n"

"\t\ttruncated to this value.\n"

"\t-s\tSkip the final bit reversal stage.  This is useful in\n"

"\t-s\tSkip the final bit reversal stage.  This is useful in\n"

"\t\talgorithms that need to apply a filter without needing to do\n"

"\t\talgorithms that need to apply a filter without needing to do\n"

"\t\tbin shifting, as these algorithms can, with this option, just\n"

"\t\tbin shifting, as these algorithms can, with this option, just\n"

"\t\tmultiply by a bit reversed correlation sequence and then\n"

"\t\tmultiply by a bit reversed correlation sequence and then\n"

"\t\tinverse FFT the (still bit reversed) result.\n"

"\t\tinverse FFT the (still bit reversed) result.\n"

"\t-S\tInclude the final bit reversal stage (default).\n"

"\t-S\tInclude the final bit reversal stage (default).\n"

"\t-0\tA forward FFT (default), meaning that the coefficients are\n"

"\t-0\tA forward FFT (default), meaning that the coefficients are\n"

"\t\tgiven by e^{-j 2 pi k/N n }.\n"

"\t\tgiven by e^{-j 2 pi k/N n }.\n"

"\t-1\tAn inverse FFT, meaning that the coefficients are\n"

"\t-1\tAn inverse FFT, meaning that the coefficients are\n"

"\t\tgiven by e^{ j 2 pi k/N n }.\n");

"\t\tgiven by e^{ j 2 pi k/N n }.\n");

// Features still needed:

// Features still needed:

//      Interactivity.

//      Interactivity.

//      Some number of maximum bits, beyond which we won't accumulate any more.

//      Some number of maximum bits, beyond which we won't accumulate any more.

//      Obviously, the build_stage above.

//      Obviously, the build_stage above.

//      Copying the files of interest into the fft-core directory, from

//      Copying the files of interest into the fft-core directory, from

//              whatever directory this file is run out of.

//              whatever directory this file is run out of.

int main(int argc, char **argv) {

int main(int argc, char **argv) {

        int     fftsize = -1, lgsize = -1;

        int     fftsize = -1, lgsize = -1;

        int     nbitsin = 16, xtracbits = 4;

        int     nbitsin = 16, xtracbits = 4;

        int     nbitsout, maxbitsout = -1;

        int     nbitsout, maxbitsout = -1;

        bool    bitreverse = true, inverse=false, interactive = false,

        bool    bitreverse = true, inverse=false, interactive = false,

                verbose_flag = false;

                verbose_flag = false;

        FILE    *vmain;

        FILE    *vmain;

        char    fname[128], coredir[1024] = "fft-core";

        std::string     coredir = "fft-core", cmdline = "";

        if (argc <= 1)

        if (argc <= 1)

                usage();

                usage();

        cmdline = argv[0];

        for(int argn=1; argn<argc; argn++) {

                cmdline += " ";

                cmdline += argv[argn];

        for(int argn=1; argn<argc; argn++) {

        for(int argn=1; argn<argc; argn++) {

                if ('-' == argv[argn][0]) {

                if ('-' == argv[argn][0]) {

                        for(int j=1; (argv[argn][j])&&(j<100); j++) {

                        for(int j=1; (argv[argn][j])&&(j<100); j++) {

                                switch(argv[argn][j]) {

                                switch(argv[argn][j]) {

                                        case '0':

                                        case '0':

                                                inverse = false;

                                                inverse = false;

                                                break;

                                                break;

                                        case '1':

                                        case '1':

                                                inverse = true;

                                                inverse = true;

                                                break;

                                                break;

                                        case 'c':

                                        case 'c':

                                                if (argn+1 >= argc) {

                                                if (argn+1 >= argc) {

                                                        printf("No extra number of coefficient bits given\n");

                                                        printf("No extra number of coefficient bits given\n");

                                                        usage(); exit(-1);

                                                        usage(); exit(-1);

                                                xtracbits = atoi(argv[++argn]);

                                                xtracbits = atoi(argv[++argn]);

                                                j+= 200;

                                                j+= 200;

                                                break;

                                                break;

                                        case 'd':

                                        case 'd':

                                                if (argn+1 >= argc) {

                                                if (argn+1 >= argc) {

                                                        printf("No extra number of coefficient bits given\n");

                                                        printf("No extra number of coefficient bits given\n");

                                                        usage(); exit(-1);

                                                        usage(); exit(-1);

                                                strcpy(coredir, argv[++argn]);

                                                coredir = argv[++argn];

                                                j += 200;

                                                j += 200;

                                                break;

                                                break;

                                        case 'f':

                                        case 'f':

                                                if (argn+1 >= argc) {

                                                if (argn+1 >= argc) {

                                                        printf("No FFT Size given\n");

                                                        printf("No FFT Size given\n");

                                                        usage(); exit(-1);

                                                        usage(); exit(-1);

                                                fftsize = atoi(argv[++argn]);

                                                fftsize = atoi(argv[++argn]);

                                                { int sln = strlen(argv[argn]);

                                                { int sln = strlen(argv[argn]);

                                                if (!isdigit(argv[argn][sln-1])){

                                                if (!isdigit(argv[argn][sln-1])){

                                                        switch(argv[argn][sln-1]) {

                                                        switch(argv[argn][sln-1]) {

                                                        case 'k': case 'K':

                                                        case 'k': case 'K':

                                                                fftsize <<= 10;

                                                                fftsize <<= 10;

                                                                break;

                                                                break;

                                                        case 'm': case 'M':

                                                        case 'm': case 'M':

                                                                fftsize <<= 20;

                                                                fftsize <<= 20;

                                                                break;

                                                                break;

                                                        case 'g': case 'G':

                                                        case 'g': case 'G':

                                                                fftsize <<= 30;

                                                                fftsize <<= 30;

                                                                break;

                                                                break;

                                                        default:

                                                        default:

                                                                printf("Unknown FFT size, %s\n", argv[argn]);

                                                                printf("Unknown FFT size, %s\n", argv[argn]);

                                                                exit(-1);

                                                                exit(-1);

}}

}}

                                                j += 200;

                                                j += 200;

                                                break;

                                                break;

                                        case 'h':

                                        case 'h':

                                                usage();

                                                usage();

                                                exit(0);

                                                exit(0);

                                                break;

                                                break;

                                        case 'i':

                                        case 'i':

                                                interactive = true;

                                                interactive = true;

                                                break;

                                                break;

                                        case 'm':

                                        case 'm':

                                                if (argn+1 >= argc) {

                                                if (argn+1 >= argc) {

                                                        printf("No maximum output bit value given\n");

                                                        printf("No maximum output bit value given\n");

                                                        exit(-1);

                                                        exit(-1);

                                                maxbitsout = atoi(argv[++argn]);

                                                maxbitsout = atoi(argv[++argn]);

                                                j += 200;

                                                j += 200;

                                                break;

                                                break;

                                        case 'n':

                                        case 'n':

                                                if (argn+1 >= argc) {

                                                if (argn+1 >= argc) {

                                                        printf("No input bit size given\n");

                                                        printf("No input bit size given\n");

                                                        exit(-1);

                                                        exit(-1);

                                                nbitsin = atoi(argv[++argn]);

                                                nbitsin = atoi(argv[++argn]);

                                                j += 200;

                                                j += 200;

                                                break;

                                                break;

                                        case 'S':

                                        case 'S':

                                                bitreverse = true;

                                                bitreverse = true;

                                                break;

                                                break;

                                        case 's':

                                        case 's':

                                                bitreverse = false;

                                                bitreverse = false;

                                                break;

                                                break;

                                        case 'v':

                                        case 'v':

                                                verbose_flag = true;

                                                verbose_flag = true;

                                                break;

                                                break;

                                        default:

                                        default:

                                                printf("Unknown argument, -%c\n", argv[argn][j]);

                                                printf("Unknown argument, -%c\n", argv[argn][j]);

                                                usage();

                                                usage();

                                                exit(-1);

                                                exit(-1);

                } else {

                } else {

                        printf("Unrecognized argument, %s\n", argv[argn]);

                        printf("Unrecognized argument, %s\n", argv[argn]);

                        usage();

                        usage();

                        exit(-1);

                        exit(-1);

        if ((lgsize < 0)&&(fftsize > 1)) {

        if ((lgsize < 0)&&(fftsize > 1)) {

                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)

                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)

        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {

        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {

                printf("INVALID PARAMETERS!!!!\n");

                printf("INVALID PARAMETERS!!!!\n");

                exit(-1);

                exit(-1);

        if (nextlg(fftsize) != fftsize) {

        if (nextlg(fftsize) != fftsize) {

                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",

                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",

                                fftsize);

                                fftsize);

                exit(-1);

                exit(-1);

        } else if (fftsize < 2) {

        } else if (fftsize < 2) {

                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",

                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",

                                fftsize);

                                fftsize);

                if (fftsize == 1) {

                if (fftsize == 1) {

                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");

                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");

                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");

                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");

                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");

                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");

                        fprintf(stderr, "can be connected straight to the input.\n");

                        fprintf(stderr, "can be connected straight to the input.\n");

                } else {

                } else {

                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);

                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);

                        fprintf(stderr, "Is such an operation even defined?\n");

                        fprintf(stderr, "Is such an operation even defined?\n");

                exit(-1);

                exit(-1);

        // Calculate how many output bits we'll have, and what the log

        // Calculate how many output bits we'll have, and what the log

        // based two size of our FFT is.

        // based two size of our FFT is.

                int     tmp_size = fftsize;

                int     tmp_size = fftsize;

                // The first stage always accumulates one bit, regardless

                // The first stage always accumulates one bit, regardless

                // of whether you need to or not.

                // of whether you need to or not.

                nbitsout = nbitsin + 1;

                nbitsout = nbitsin + 1;

                tmp_size >>= 1;

                tmp_size >>= 1;

                while(tmp_size > 4) {

                while(tmp_size > 4) {

                        nbitsout += 1;

                        nbitsout += 1;

                        tmp_size >>= 2;

                        tmp_size >>= 2;

                if (tmp_size > 1)

                if (tmp_size > 1)

                        nbitsout ++;

                        nbitsout ++;

                if (fftsize <= 2)

                if (fftsize <= 2)

                        bitreverse = false;

                        bitreverse = false;

        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))

        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))

                nbitsout = maxbitsout;

                nbitsout = maxbitsout;

                struct stat     sbuf;

                struct stat     sbuf;

                if (lstat(coredir, &sbuf)==0) {

                if (lstat(coredir.c_str(), &sbuf)==0) {

                        if (!S_ISDIR(sbuf.st_mode)) {

                        if (!S_ISDIR(sbuf.st_mode)) {

                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir);

                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());

                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");

                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");

                                fprintf(stderr, "To try again, please remove this file.\n");

                                fprintf(stderr, "To try again, please remove this file.\n");

                                exit(-1);

                                exit(-1);

                } else

                } else

                        mkdir(coredir, 0755);

                        mkdir(coredir.c_str(), 0755);

                if (access(coredir, X_OK|W_OK) != 0) {

                if (access(coredir.c_str(), X_OK|W_OK) != 0) {

                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir);

                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());

                        exit(-1);

                        exit(-1);

        sprintf(fname, "%s/%sfftmain.v", coredir, (inverse)?"i":"");

        vmain = fopen(fname, "w");

                std::string     fname_string;

                fname_string = coredir;

                fname_string += "/";

                if (inverse) fname_string += "i";

                fname_string += "fftmain.v";

                vmain = fopen(fname_string.c_str(), "w");

        if (NULL == vmain) {

        if (NULL == vmain) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());

                perror("Err from O/S:");

                perror("Err from O/S:");

                exit(-1);

                        exit(-1);

        fprintf(vmain, "/////////////////////////////////////////////////////////////////////////////\n");

        fprintf(vmain, "/////////////////////////////////////////////////////////////////////////////\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "// Filename:    %sfftmain.v\n", (inverse)?"i":"");

        fprintf(vmain, "// Filename:    %sfftmain.v\n", (inverse)?"i":"");

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "// Project:     %s\n", prjname);

        fprintf(vmain, "// Project:     %s\n", prjname);

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "// Purpose:     This is the main module in the Doubletime FPGA FFT project.\n");

        fprintf(vmain, "// Purpose:     This is the main module in the Doubletime FPGA FFT project.\n");

        fprintf(vmain, "//              As such, all other modules are subordinate to this one.\n");

        fprintf(vmain, "//              As such, all other modules are subordinate to this one.\n");

        fprintf(vmain, "//              (I have been reading too much legalese this week ...)\n");

        fprintf(vmain, "//              (I have been reading too much legalese this week ...)\n");

        fprintf(vmain, "//              This module accomplish a fixed size Complex FFT on %d data\n", fftsize);

        fprintf(vmain, "//              This module accomplish a fixed size Complex FFT on %d data\n", fftsize);

        fprintf(vmain, "//              points.  The FFT is fully pipelined, and accepts as inputs\n");

        fprintf(vmain, "//              points.  The FFT is fully pipelined, and accepts as inputs\n");

        fprintf(vmain, "//              two complex two\'s complement samples per clock.\n");

        fprintf(vmain, "//              two complex two\'s complement samples per clock.\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "// Parameters:\n");

        fprintf(vmain, "// Parameters:\n");

        fprintf(vmain, "//      i_clk\tThe clock.  All operations are synchronous with this clock.\n");

        fprintf(vmain, "//      i_clk\tThe clock.  All operations are synchronous with this clock.\n");

        fprintf(vmain, "//\ti_rst\tSynchronous reset, active high.  Setting this line will\n");

        fprintf(vmain, "//\ti_rst\tSynchronous reset, active high.  Setting this line will\n");

        fprintf(vmain, "//\t\t\tforce the reset of all of the internals to this routine.\n");

        fprintf(vmain, "//\t\t\tforce the reset of all of the internals to this routine.\n");

        fprintf(vmain, "//\t\t\tFurther, following a reset, the o_sync line will go\n");

        fprintf(vmain, "//\t\t\tFurther, following a reset, the o_sync line will go\n");

        fprintf(vmain, "//\t\t\thigh the same time the first output sample is valid.\n");

        fprintf(vmain, "//\t\t\thigh the same time the first output sample is valid.\n");

        fprintf(vmain, "//      i_ce\tA clock enable line.  If this line is set, this module\n");

        fprintf(vmain, "//      i_ce\tA clock enable line.  If this line is set, this module\n");

        fprintf(vmain, "//\t\t\twill accept two complex values as inputs, and produce\n");

        fprintf(vmain, "//\t\t\twill accept two complex values as inputs, and produce\n");

        fprintf(vmain, "//\t\t\ttwo (possibly empty) complex values as outputs.\n");

        fprintf(vmain, "//\t\t\ttwo (possibly empty) complex values as outputs.\n");

        fprintf(vmain, "//\t\ti_left\tThe first of two complex input samples.  This value\n");

        fprintf(vmain, "//\t\ti_left\tThe first of two complex input samples.  This value\n");

        fprintf(vmain, "//\t\t\tis split into two two\'s complement numbers, of \n");

        fprintf(vmain, "//\t\t\tis split into two two\'s complement numbers, of \n");

        fprintf(vmain, "//\t\t\t%d bits each, with the real portion in the high\n", nbitsin);

        fprintf(vmain, "//\t\t\t%d bits each, with the real portion in the high\n", nbitsin);

        fprintf(vmain, "//\t\t\torder bits, and the imaginary portion taking the\n");

        fprintf(vmain, "//\t\t\torder bits, and the imaginary portion taking the\n");

        fprintf(vmain, "//\t\t\tbottom %d bits.\n", nbitsin);

        fprintf(vmain, "//\t\t\tbottom %d bits.\n", nbitsin);

        fprintf(vmain, "//\t\ti_right\tThis is the same thing as i_left, only this is the\n");

        fprintf(vmain, "//\t\ti_right\tThis is the same thing as i_left, only this is the\n");

        fprintf(vmain, "//\t\t\tsecond of two such samples.  Hence, i_left would\n");

        fprintf(vmain, "//\t\t\tsecond of two such samples.  Hence, i_left would\n");

        fprintf(vmain, "//\t\t\tcontain input sample zero, i_right would contain\n");

        fprintf(vmain, "//\t\t\tcontain input sample zero, i_right would contain\n");

        fprintf(vmain, "//\t\t\tsample one.  On the next clock i_left would contain\n");

        fprintf(vmain, "//\t\t\tsample one.  On the next clock i_left would contain\n");

        fprintf(vmain, "//\t\t\tinput sample two, i_right number three and so forth.\n");

        fprintf(vmain, "//\t\t\tinput sample two, i_right number three and so forth.\n");

        fprintf(vmain, "//\t\to_left\tThe first of two output samples, of the same\n");

        fprintf(vmain, "//\t\to_left\tThe first of two output samples, of the same\n");

        fprintf(vmain, "//\t\t\tformat as i_left, only having %d bits for each of\n", nbitsout);

        fprintf(vmain, "//\t\t\tformat as i_left, only having %d bits for each of\n", nbitsout);

        fprintf(vmain, "//\t\t\tthe real and imaginary components, leading to %d\n", nbitsout*2);

        fprintf(vmain, "//\t\t\tthe real and imaginary components, leading to %d\n", nbitsout*2);

        fprintf(vmain, "//\t\t\tbits total.\n");

        fprintf(vmain, "//\t\t\tbits total.\n");

        fprintf(vmain, "//\t\to_right\tThe second of two output samples produced each clock.\n");

        fprintf(vmain, "//\t\to_right\tThe second of two output samples produced each clock.\n");

        fprintf(vmain, "//\t\t\tThis has the same format as o_left.\n");

        fprintf(vmain, "//\t\t\tThis has the same format as o_left.\n");

        fprintf(vmain, "//\t\to_sync\tA one bit output indicating the first valid sample\n");

        fprintf(vmain, "//\t\to_sync\tA one bit output indicating the first valid sample\n");

        fprintf(vmain, "//\t\t\tproduced by this FFT following a reset.  Ever after,\n");

        fprintf(vmain, "//\t\t\tproduced by this FFT following a reset.  Ever after,\n");

        fprintf(vmain, "//\t\t\tthis will indicate the first sample of an FFT frame.\n");

        fprintf(vmain, "//\t\t\tthis will indicate the first sample of an FFT frame.\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "// Arguments:\tThis file was computer generated using the\n");

        fprintf(vmain, "//\t\tfollowing command line:\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());

        fprintf(vmain, "//\n");

        fprintf(vmain, "%s", creator);

        fprintf(vmain, "%s", creator);

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "%s", cpyleft);

        fprintf(vmain, "%s", cpyleft);

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":"");

        fprintf(vmain, "module %sfftmain(i_clk, i_rst, i_ce,\n", (inverse)?"i":"");

        fprintf(vmain, "\t\ti_left, i_right,\n");

        fprintf(vmain, "\t\ti_left, i_right,\n");

        fprintf(vmain, "\t\to_left, o_right, o_sync);\n");

        fprintf(vmain, "\t\to_left, o_right, o_sync);\n");

        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize);

        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n", nbitsin, nbitsout, lgsize);

        assert(lgsize > 0);

        assert(lgsize > 0);

        fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n");

        fprintf(vmain, "\tinput\t\ti_clk, i_rst, i_ce;\n");

        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");

        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");

        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");

        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");

        fprintf(vmain, "\toutput\treg\t\t\to_sync;\n");

        fprintf(vmain, "\toutput\treg\t\t\to_sync;\n");

        fprintf(vmain, "\n\n");

        fprintf(vmain, "\n\n");

        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");

        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");

        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");

        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");

        fprintf(vmain, "\n\n");

        fprintf(vmain, "\n\n");

        int     tmp_size = fftsize, lgtmp = lgsize;

        int     tmp_size = fftsize, lgtmp = lgsize;

        if (fftsize == 2) {

        if (fftsize == 2) {

                if (bitreverse) {

                if (bitreverse) {

                        fprintf(vmain, "\treg\tbr_start;\n");

                        fprintf(vmain, "\treg\tbr_start;\n");

                        fprintf(vmain, "\talways @(posedge i_clk)\n");

                        fprintf(vmain, "\talways @(posedge i_clk)\n");

                        fprintf(vmain, "\t\tif (i_rst)\n");

                        fprintf(vmain, "\t\tif (i_rst)\n");

                        fprintf(vmain, "\t\t\tbr_start <= 1'b0;\n");

                        fprintf(vmain, "\t\t\tbr_start <= 1'b0;\n");

                        fprintf(vmain, "\t\telse if (i_ce)\n");

                        fprintf(vmain, "\t\telse if (i_ce)\n");

                        fprintf(vmain, "\t\t\tbr_start <= 1'b1;\n");

                        fprintf(vmain, "\t\t\tbr_start <= 1'b1;\n");

                fprintf(vmain, "\n\n");

                fprintf(vmain, "\n\n");

                fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n");

                fprintf(vmain, "\tdblstage\t#(IWIDTH)\tstage_2(i_clk, i_rst, i_ce,\n");

                fprintf(vmain, "\t\t\t(~i_rst), i_left, i_right, br_left, br_right);\n");

                fprintf(vmain, "\t\t\t(~i_rst), i_left, i_right, br_left, br_right);\n");

                fprintf(vmain, "\n\n");

                fprintf(vmain, "\n\n");

        } else {

        } else {

                int     nbits = nbitsin, dropbit=0;

                int     nbits = nbitsin, dropbit=0;

                // Always do a first stage

                // Always do a first stage

                fprintf(vmain, "\n\n");

                fprintf(vmain, "\n\n");

                fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);

                fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);

                fprintf(vmain, "\twire\t[(2*IWIDTH+1):0]\tw_e%d, w_o%d;\n", fftsize, fftsize);

                fprintf(vmain, "\twire\t[(2*IWIDTH+1):0]\tw_e%d, w_o%d;\n", fftsize, fftsize);

                fprintf(vmain, "\t%sfftstage_e%d\t#(IWIDTH,IWIDTH+%d,IWIDTH+1,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",

                fprintf(vmain, "\t%sfftstage_e%d\t#(IWIDTH,IWIDTH+%d,IWIDTH+1,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",

                        (inverse)?"i":"", fftsize,

                        (inverse)?"i":"", fftsize,

                        xtracbits,

                        xtracbits,

                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),

                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),

                        fftsize);

                        fftsize);

                fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d);\n", fftsize, fftsize);

                fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d);\n", fftsize, fftsize);

                fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,IWIDTH+1,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",

                fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,IWIDTH+1,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",

                        (inverse)?"i":"", fftsize,

                        (inverse)?"i":"", fftsize,

                        xtracbits,

                        xtracbits,

                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),

                        lgsize, lgtmp-2, lgdelay(nbits,xtracbits),

                        fftsize);

                        fftsize);

                fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);

                fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);

                fprintf(vmain, "\n\n");

                fprintf(vmain, "\n\n");

                sprintf(fname, "%s/%sfftstage_e%d.v", coredir, (inverse)?"i":"", fftsize);

                build_stage(fname, fftsize/2, 0, nbits, inverse, xtracbits);     // Even stage

                        std::string     fname;

                sprintf(fname, "%s/%sfftstage_o%d.v", coredir, (inverse)?"i":"", fftsize);

                        char    numstr[12];

                build_stage(fname, fftsize/2, 1, nbits, inverse, xtracbits);    // Odd  stage

                        fname = coredir + "/";

                        if (inverse) fname += "i";

                        fname += "fftstage_e";

                        sprintf(numstr, "%d", fftsize);

                        fname += numstr;

                        fname += ".v";

                        build_stage(fname.c_str(), fftsize/2, 0, nbits, inverse, xtracbits);     // Even stage

                        fname = coredir + "/";

                        if (inverse) fname += "i";

                        fname += "fftstage_o";

                        sprintf(numstr, "%d", fftsize);

                        fname += numstr;

                        fname += ".v";

                        build_stage(fname.c_str(), fftsize/2, 1, nbits, inverse, xtracbits);    // Odd  stage

                nbits += 1;     // New number of input bits

                nbits += 1;     // New number of input bits

                tmp_size >>= 1; lgtmp--;

                tmp_size >>= 1; lgtmp--;

                dropbit = 0;

                dropbit = 0;

                fprintf(vmain, "\n\n");

                fprintf(vmain, "\n\n");

                while(tmp_size >= 8) {

                while(tmp_size >= 8) {

                        int     obits = nbits+((dropbit)?0:1);

                        int     obits = nbits+((dropbit)?0:1);

                        if ((maxbitsout > 0)&&(obits > maxbitsout))

                        if ((maxbitsout > 0)&&(obits > maxbitsout))

                                obits = maxbitsout;

                                obits = maxbitsout;

                        fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", tmp_size, tmp_size);

                        fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", tmp_size, tmp_size);

                        fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*obits-1, tmp_size, tmp_size);

                        fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*obits-1, tmp_size, tmp_size);

                        fprintf(vmain, "\t%sfftstage_e%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",

                        fprintf(vmain, "\t%sfftstage_e%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",

                                (inverse)?"i":"", tmp_size,

                                (inverse)?"i":"", tmp_size,

                                nbits, nbits+xtracbits, obits,

                                nbits, nbits+xtracbits, obits,

                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits), (dropbit)?0:0,

                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits), (dropbit)?0:0,

                                tmp_size);

                                tmp_size);

                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);

                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);

                        fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",

                        fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",

                                (inverse)?"i":"", tmp_size,

                                (inverse)?"i":"", tmp_size,

                                nbits, nbits+xtracbits, obits,

                                nbits, nbits+xtracbits, obits,

                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits), (dropbit)?0:0,

                                lgsize, lgtmp-2, lgdelay(nbits,xtracbits), (dropbit)?0:0,

                                tmp_size);

                                tmp_size);

                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);

                        fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);

                        fprintf(vmain, "\n\n");

                        fprintf(vmain, "\n\n");

                        sprintf(fname, "%s/%sfftstage_e%d.v", coredir, (inverse)?"i":"", tmp_size);

                        build_stage(fname, tmp_size/2, 0, nbits, inverse, xtracbits);    // Even stage

                                std::string     fname;

                        sprintf(fname, "%s/%sfftstage_o%d.v", coredir, (inverse)?"i":"", tmp_size);

                                char            numstr[12];

                        build_stage(fname, tmp_size/2, 1, nbits, inverse, xtracbits);   // Odd  stage

                                fname = coredir + "/";

                                if (inverse) fname += "i";

                                fname += "fftstage_e";

                                sprintf(numstr, "%d", tmp_size);

                                fname += numstr;

                                fname += ".v";

                                build_stage(fname.c_str(), tmp_size/2, 0, nbits, inverse, xtracbits);    // Even stage

                                fname = coredir + "/";

                                if (inverse) fname += "i";

                                fname += "fftstage_o";

                                sprintf(numstr, "%d", tmp_size);

                                fname += numstr;

                                fname += ".v";

                                build_stage(fname.c_str(), tmp_size/2, 1, nbits, inverse, xtracbits);   // Odd  stage

                        dropbit ^= 1;

                        dropbit ^= 1;

                        nbits = obits;

                        nbits = obits;

                        tmp_size >>= 1; lgtmp--;

                        tmp_size >>= 1; lgtmp--;

                if (tmp_size == 4) {

                if (tmp_size == 4) {

                        int     obits = nbits+((dropbit)?0:1);

                        int     obits = nbits+((dropbit)?0:1);

                        if ((maxbitsout > 0)&&(obits > maxbitsout))

                        if ((maxbitsout > 0)&&(obits > maxbitsout))

                                obits = maxbitsout;

                                obits = maxbitsout;

                        fprintf(vmain, "\twire\t\tw_s4, w_os4;\n");

                        fprintf(vmain, "\twire\t\tw_s4, w_os4;\n");

                        fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*obits-1);

                        fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*obits-1);

                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",

                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, i_rst, i_ce,\n",

                                nbits, obits, lgsize, (inverse)?1:0, (dropbit)?0:0);

                                nbits, obits, lgsize, (inverse)?1:0, (dropbit)?0:0);

                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4);\n");

                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4);\n");

                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n",

                        fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, i_rst, i_ce,\n",

                                nbits, obits, lgsize, (inverse)?1:0, (dropbit)?0:0);

                                nbits, obits, lgsize, (inverse)?1:0, (dropbit)?0:0);

                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");

                        fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");

                        dropbit ^= 1;

                        dropbit ^= 1;

                        nbits = obits;

                        nbits = obits;

                        tmp_size >>= 1; lgtmp--;

                        tmp_size >>= 1; lgtmp--;

                        int obits = nbits+((dropbit)?0:1);

                        int obits = nbits+((dropbit)?0:1);

                        if (obits > nbitsout)

                        if (obits > nbitsout)

                                obits = nbitsout;

                                obits = nbitsout;

                        if ((maxbitsout>0)&&(obits > maxbitsout))

                        if ((maxbitsout>0)&&(obits > maxbitsout))

                                obits = maxbitsout;

                                obits = maxbitsout;

                        fprintf(vmain, "\twire\t\tw_s2;\n");

                        fprintf(vmain, "\twire\t\tw_s2;\n");

                        fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1);

                        fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1);

                        fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits, obits,(dropbit)?0:1);

                        fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits, obits,(dropbit)?0:1);

                        fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");

                        fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");

                        fprintf(vmain, "\n\n");

                        fprintf(vmain, "\n\n");

                        nbits = obits;

                        nbits = obits;

                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");

                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");

                fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");

                fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");

                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");

                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");

                fprintf(vmain, "\n");

                fprintf(vmain, "\n");

                if (bitreverse) {

                if (bitreverse) {

                        fprintf(vmain, "\twire\tbr_start;\n");

                        fprintf(vmain, "\twire\tbr_start;\n");

                        fprintf(vmain, "\treg\tr_br_started;\n");

                        fprintf(vmain, "\treg\tr_br_started;\n");

                        fprintf(vmain, "\t// A delay of one clock here is perfect, as it matches the delay in\n");

                        fprintf(vmain, "\t// A delay of one clock here is perfect, as it matches the delay in\n");

                        fprintf(vmain, "\t// our dblstage.\n");

                        fprintf(vmain, "\t// our dblstage.\n");

                        fprintf(vmain, "\talways @(posedge i_clk)\n");

                        fprintf(vmain, "\talways @(posedge i_clk)\n");

                        fprintf(vmain, "\t\tif (i_rst)\n");

                        fprintf(vmain, "\t\tif (i_rst)\n");

                        fprintf(vmain, "\t\t\tr_br_started <= 1'b0;\n");

                        fprintf(vmain, "\t\t\tr_br_started <= 1'b0;\n");

                        fprintf(vmain, "\t\telse\n");

                        fprintf(vmain, "\t\telse\n");

                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s4;\n");

                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s4;\n");

                        fprintf(vmain, "\tassign\tbr_start = r_br_started;\n");

                        fprintf(vmain, "\tassign\tbr_start = r_br_started;\n");

        fprintf(vmain, "\n");

        fprintf(vmain, "\n");

        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");

        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");

        fprintf(vmain, "\twire\tbr_sync;\n");

        fprintf(vmain, "\twire\tbr_sync;\n");

        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");

        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");

        if (bitreverse) {

        if (bitreverse) {

                fprintf(vmain, "\tdblreverse\t#(%d,%d)\trevstage(i_clk, i_rst,\n", lgsize, nbitsout);

                fprintf(vmain, "\tdblreverse\t#(%d,%d)\trevstage(i_clk, i_rst,\n", lgsize, nbitsout);

                fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");

                fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");

                fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");

                fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");

        } else {

        } else {

                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");

                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");

                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");

                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");

                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");

                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");

        fprintf(vmain, "\n\n");

        fprintf(vmain, "\n\n");

        fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n");

        fprintf(vmain, "\t// Last clock: Register our outputs, we\'re done.\n");

        fprintf(vmain, "\talways @(posedge i_clk)\n");

        fprintf(vmain, "\talways @(posedge i_clk)\n");

        fprintf(vmain, "\t\tbegin\n");

        fprintf(vmain, "\t\tbegin\n");

        fprintf(vmain, "\t\t\to_left  <= br_o_left;\n");

        fprintf(vmain, "\t\t\to_left  <= br_o_left;\n");

        fprintf(vmain, "\t\t\to_right <= br_o_right;\n");

        fprintf(vmain, "\t\t\to_right <= br_o_right;\n");

        fprintf(vmain, "\t\t\to_sync  <= br_sync;\n");

        fprintf(vmain, "\t\t\to_sync  <= br_sync;\n");

        fprintf(vmain, "\t\tend\n");

        fprintf(vmain, "\t\tend\n");

        fprintf(vmain, "\n\n");

        fprintf(vmain, "\n\n");

        fprintf(vmain, "endmodule\n");

        fprintf(vmain, "endmodule\n");

        fclose(vmain);

        fclose(vmain);

        sprintf(fname, "%s/butterfly.v", coredir);

        build_butterfly(fname);

                std::string     fname;

                fname = coredir + "/butterfly.v";

                build_butterfly(fname.c_str(), xtracbits);

        sprintf(fname, "%s/shiftaddmpy.v", coredir);

                fname = coredir + "/shiftaddmpy.v";

        build_multiply(fname);

                build_multiply(fname.c_str());

        sprintf(fname, "%s/qtrstage.v", coredir);

                fname = coredir + "/qtrstage.v";

        build_quarters(fname);

                build_quarters(fname.c_str());

        sprintf(fname, "%s/dblstage.v", coredir);

                fname = coredir + "/dblstage.v";

        build_dblstage(fname);

                build_dblstage(fname.c_str());

        if (bitreverse) {

        if (bitreverse) {

                sprintf(fname, "%s/dblreverse.v", coredir);

                        fname = coredir + "/dblreverse.v";

                build_dblreverse(fname);

                        build_dblreverse(fname.c_str());

Browse

Tools

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Diff between revs 9 and 14