OpenCores

Rev 36	Rev 37
`////////////////////////////////////////////////////////////////////////////////`	`////////////////////////////////////////////////////////////////////////////////`
`//`	`//`
`// Filename: fftgen.cpp`	`// Filename: fftgen.cpp`
`//`	`//`
`// Project: A General Purpose Pipelined FFT Implementation`	`// Project: A General Purpose Pipelined FFT Implementation`
`//`	`//`
`// Purpose: This is the core generator for the project. Every part`	`// Purpose: This is the core generator for the project. Every part`
`// and piece of this project begins and ends in this program.`	`// and piece of this project begins and ends in this program.`
`// Once built, this program will build an FFT (or IFFT) core of arbitrary`	`// Once built, this program will build an FFT (or IFFT) core of arbitrary`
`// width, precision, etc., that will run at two samples per clock.`	`// width, precision, etc., that will run at two samples per clock.`
`// (Incidentally, I didn't pick two samples per clock because it was`	`// (Incidentally, I didn't pick two samples per clock because it was`
`// easier, but rather because there weren't any two-sample per clock`	`// easier, but rather because there weren't any two-sample per clock`
`// FFT's posted on opencores.com. Further, FFT's running at one sample`	`// FFT's posted on opencores.com. Further, FFT's running at one sample`
`// per aren't that hard to find.)`	`// per aren't that hard to find.)`
`//`	`//`
`// You can find the documentation for this program in two places. One is`	`// You can find the documentation for this program in two places. One is`
`// in the usage() function below. The second is in the 'doc'uments`	`// in the usage() function below. The second is in the 'doc'uments`
`// directory that comes with this package, specifically in the spec.pdf`	`// directory that comes with this package, specifically in the spec.pdf`
`// file. If it's not there, type make in the documents directory to`	`// file. If it's not there, type make in the documents directory to`
`// build it.`	`// build it.`
`//`	`//`
`// 20160123 - Thanks to Lesha Birukov, adjusted for MS Visual Studio 2012.`	`// 20160123 - Thanks to Lesha Birukov, adjusted for MS Visual Studio 2012.`
`// (Adjustments are at the top of the file ...)`	`// (Adjustments are at the top of the file ...)`
`//`	`//`
`// Creator: Dan Gisselquist, Ph.D.`	`// Creator: Dan Gisselquist, Ph.D.`
`// Gisselquist Technology, LLC`	`// Gisselquist Technology, LLC`
`//`	`//`
`////////////////////////////////////////////////////////////////////////////////`	`////////////////////////////////////////////////////////////////////////////////`
`//`	`//`
`// Copyright (C) 2015-2018, Gisselquist Technology, LLC`	`// Copyright (C) 2015-2018, Gisselquist Technology, LLC`
`//`	`//`
`// This program is free software (firmware): you can redistribute it and/or`	`// This program is free software (firmware): you can redistribute it and/or`
`// modify it under the terms of the GNU General Public License as published`	`// modify it under the terms of the GNU General Public License as published`
`// by the Free Software Foundation, either version 3 of the License, or (at`	`// by the Free Software Foundation, either version 3 of the License, or (at`
`// your option) any later version.`	`// your option) any later version.`
`//`	`//`
`// This program is distributed in the hope that it will be useful, but WITHOUT`	`// This program is distributed in the hope that it will be useful, but WITHOUT`
`// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or`	`// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or`
`// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`	`// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
`// for more details.`	`// for more details.`
`//`	`//`
`// You should have received a copy of the GNU General Public License along`	`// You should have received a copy of the GNU General Public License along`
`// with this program. (It's in the $(ROOT)/doc directory, run make with no`	`// with this program. (It's in the $(ROOT)/doc directory. Run make with no`
`// target there if the PDF file isn't present.) If not, see`	`// target there if the PDF file isn't present.) If not, see`
`// <http://www.gnu.org/licenses/> for a copy.`	`// <http://www.gnu.org/licenses/> for a copy.`
`//`	`//`
`// License: GPL, v3, as defined and found on www.gnu.org,`	`// License: GPL, v3, as defined and found on www.gnu.org,`
`// http://www.gnu.org/licenses/gpl.html`	`// http://www.gnu.org/licenses/gpl.html`
`//`	`//`
`//`	`//`
`////////////////////////////////////////////////////////////////////////////////`	`////////////////////////////////////////////////////////////////////////////////`
`//`	`//`
`//`	`//`
`#define _CRT_SECURE_NO_WARNINGS // ms vs 2012 doesn't like fopen`	`#define _CRT_SECURE_NO_WARNINGS // ms vs 2012 doesn't like fopen`
`#include <stdio.h>`	`#include <stdio.h>`
`#include <stdlib.h>`	`#include <stdlib.h>`

`#ifdef _MSC_VER // added for ms vs compatibility`	`#ifdef _MSC_VER // added for ms vs compatibility`

`#include <io.h>`	`#include <io.h>`
`#include <direct.h>`	`#include <direct.h>`
`#define _USE_MATH_DEFINES`	`#define _USE_MATH_DEFINES`
`#define R_OK 4 /* Test for read permission. */`	`#define R_OK 4 /* Test for read permission. */`
`#define W_OK 2 /* Test for write permission. */`	`#define W_OK 2 /* Test for write permission. */`
`#define X_OK 0 /* !!!!!! execute permission - unsupported in windows*/`	`#define X_OK 0 /* !!!!!! execute permission - unsupported in windows*/`
`#define F_OK 0 /* Test for existence. */`	`#define F_OK 0 /* Test for existence. */`

`#if _MSC_VER <= 1700`	`#if _MSC_VER <= 1700`

`int lstat(const char filename, struct stat buf) { return 1; };`	`int lstat(const char filename, struct stat buf) { return 1; };`
`#define S_ISDIR(A) 0`	`#define S_ISDIR(A) 0`

`#else`	`#else`

`#define lstat _stat`	`#define lstat _stat`
`#define S_ISDIR _S_IFDIR`	`#define S_ISDIR _S_IFDIR`

`#endif`	`#endif`

`#define mkdir(A,B) _mkdir(A)`	`#define mkdir(A,B) _mkdir(A)`

`#define access _access`	`#define access _access`

`#else`	`#else`
`// And for G++/Linux environment`	`// And for G++/Linux environment`

`#include <unistd.h> // Defines the R_OK/W_OK/etc. macros`	`#include <unistd.h> // Defines the R_OK/W_OK/etc. macros`
`#include <sys/stat.h>`	`#include <sys/stat.h>`
`#endif`	`#endif`

`#include <string.h>`	`#include <string.h>`
`#include <string>`	`#include <string>`
`#include <math.h>`	`#include <math.h>`
`#include <ctype.h>`	`#include <ctype.h>`
`#include <assert.h>`	`#include <assert.h>`

`#include "defaults.h"`	`#include "defaults.h"`
`#include "legal.h"`	`#include "legal.h"`
`#include "rounding.h"`	`#include "rounding.h"`
`#include "fftlib.h"`	`#include "fftlib.h"`
`#include "bldstage.h"`	`#include "bldstage.h"`
`#include "bitreverse.h"`	`#include "bitreverse.h"`
`#include "softmpy.h"`	`#include "softmpy.h"`
`#include "butterfly.h"`	`#include "butterfly.h"`

`void build_dblquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {`	`void build_dblquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {`
`FILE *fp = fopen(fname, "w");`	`FILE *fp = fopen(fname, "w");`
`if (NULL == fp) {`	`if (NULL == fp) {`
`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
`perror("O/S Err was:");`	`perror("O/S Err was:");`
`return;`	`return;`
`}`	`}`
`const char *rnd_string;`	`const char *rnd_string;`
`if (rounding == RND_TRUNCATE)`	`if (rounding == RND_TRUNCATE)`
`rnd_string = "truncate";`	`rnd_string = "truncate";`
`else if (rounding == RND_FROMZERO)`	`else if (rounding == RND_FROMZERO)`
`rnd_string = "roundfromzero";`	`rnd_string = "roundfromzero";`
`else if (rounding == RND_HALFUP)`	`else if (rounding == RND_HALFUP)`
`rnd_string = "roundhalfup";`	`rnd_string = "roundhalfup";`
`else`	`else`
`rnd_string = "convround";`	`rnd_string = "convround";`


`fprintf(fp,`	`fprintf(fp,`
`SLASHLINE`	`SLASHLINE`
`"//\n"`	`"//\n"`
`"// Filename:\tqtrstage%s.v\n"`	`"// Filename:\tqtrstage%s.v\n"`
`"//\n"`	`"//\n"`
`"// Project:\t%s\n"`	`"// Project:\t%s\n"`
`"//\n"`	`"//\n"`
`"// Purpose: This file encapsulates the 4 point stage of a decimation in\n"`	`"// Purpose: This file encapsulates the 4 point stage of a decimation in\n"`
`"// frequency FFT. This particular implementation is optimized\n"`	`"// frequency FFT. This particular implementation is optimized\n"`
`"// so that all of the multiplies are accomplished by additions and\n"`	`"// so that all of the multiplies are accomplished by additions and\n"`
`"// multiplexers only.\n"`	`"// multiplexers only.\n"`
`"//\n"`	`"//\n"`
`"//\n%s"`	`"//\n%s"`
`"//\n",`	`"//\n",`
`(dbg)?"_dbg":"", prjname, creator);`	`(dbg)?"_dbg":"", prjname, creator);`
`fprintf(fp, "%s", cpyleft);`	`fprintf(fp, "%s", cpyleft);`
fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");	fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");

`std::string resetw("i_reset");`	`std::string resetw("i_reset");`
`if (async_reset)`	`if (async_reset)`
`resetw = std::string("i_areset_n");`	`resetw = std::string("i_areset_n");`

`fprintf(fp,`	`fprintf(fp,`
`"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"`	`"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"`
`"\tparameter IWIDTH=%d, OWIDTH=IWIDTH+1;\n"`	`"\tparameter IWIDTH=%d, OWIDTH=IWIDTH+1;\n"`
`"\t// Parameters specific to the core that should be changed when this\n"`	`"\t// Parameters specific to the core that should be changed when this\n"`
`"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller\n"`	`"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller\n"`
`"\t// spans must use the fftdoubles stage.\n"`	`"\t// spans must use the fftdoubles stage.\n"`
`"\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"`	`"\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"`
`"\tinput\t i_clk, %s, i_ce, i_sync;\n"`	`"\tinput\twire i_clk, %s, i_ce, i_sync;\n"`
`"\tinput\t [(2*IWIDTH-1):0] i_data;\n"`	`"\tinput\twire [(2*IWIDTH-1):0] i_data;\n"`
`"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"`	`"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"`
`"\toutput\treg o_sync;\n"`	`"\toutput\treg o_sync;\n"`
`"\t\n", (dbg)?"_dbg":"",`	`"\t\n", (dbg)?"_dbg":"",`
`resetw.c_str(),`	`resetw.c_str(),`
`(dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,`	`(dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,`
`TST_QTRSTAGE_LGWIDTH, resetw.c_str());`	`TST_QTRSTAGE_LGWIDTH, resetw.c_str());`
`if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"`	`if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"`
`"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2OWIDTH-1):(2OWIDTH-16)],\n"`	`"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2OWIDTH-1):(2OWIDTH-16)],\n"`
`"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"`	`"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"`
`"\n");`	`"\n");`
`}`	`}`
`fprintf(fp,`	`fprintf(fp,`
`"\treg\t wait_for_sync;\n"`	`"\treg\t wait_for_sync;\n"`
`"\treg\t[3:0] pipeline;\n"`	`"\treg\t[3:0] pipeline;\n"`
`"\n"`	`"\n"`
`"\treg\t[(IWIDTH):0] sum_r, sum_i, diff_r, diff_i;\n"`	`"\treg\t[(IWIDTH):0] sum_r, sum_i, diff_r, diff_i;\n"`
`"\n"`	`"\n"`
`"\treg\t[(2*OWIDTH-1):0]\tob_a;\n"`	`"\treg\t[(2*OWIDTH-1):0]\tob_a;\n"`
`"\twire\t[(2*OWIDTH-1):0]\tob_b;\n"`	`"\twire\t[(2*OWIDTH-1):0]\tob_b;\n"`
`"\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"`	`"\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"`
`"\tassign\tob_b = { ob_b_r, ob_b_i };\n"`	`"\tassign\tob_b = { ob_b_r, ob_b_i };\n"`
`"\n"`	`"\n"`
`"\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"`	`"\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"`
`"\treg\t[(2*IWIDTH-1):0]\timem;\n"`	`"\treg\t[(2*IWIDTH-1):0]\timem;\n"`
`"\n"`	`"\n"`
`"\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"`	`"\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"`
`"\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"`	`"\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"`
`"\tassign\timem_i = imem[(IWIDTH-1):0];\n"`	`"\tassign\timem_i = imem[(IWIDTH-1):0];\n"`
`"\n"`	`"\n"`
`"\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"`	`"\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"`
`"\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"`	`"\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"`
`"\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"`	`"\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"`
`"\n"`	`"\n"`
`"\treg [(2*OWIDTH-1):0] omem;\n"`	`"\treg [(2*OWIDTH-1):0] omem;\n"`
`"\n");`	`"\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");`	`"\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");`	`"\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"`	`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"`
`"\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);`	`"\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);`
`fprintf(fp,`	`fprintf(fp,`
`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"`	`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"`
`"\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);`	`"\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);`
`fprintf(fp,`	`fprintf(fp,`
`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"`	`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"`
`"\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);`	`"\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);`
`fprintf(fp,`	`fprintf(fp,`
`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"`	`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"`
`"\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);`	`"\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);`
`fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"`	`fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"`
`"\tassign n_rnd_diff_i = - rnd_diff_i;\n");`	`"\tassign n_rnd_diff_i = - rnd_diff_i;\n");`
`/*`	`/*`
`fprintf(fp,`	`fprintf(fp,`
`"\twire [(IWIDTH-1):0] rnd;\n"`	`"\twire [(IWIDTH-1):0] rnd;\n"`
`"\tgenerate\n"`	`"\tgenerate\n"`
`"\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"`	`"\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"`
`"\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"`	`"\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"`
`"\telse\n"`	`"\telse\n"`
`"\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"`	`"\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"`
`"\tendgenerate\n"`	`"\tendgenerate\n"`
`"\n"`	`"\n"`
`*/`	`*/`
`fprintf(fp,`	`fprintf(fp,`
`"\tinitial wait_for_sync = 1\'b1;\n"`	`"\tinitial wait_for_sync = 1\'b1;\n"`
`"\tinitial iaddr = 0;\n");`	`"\tinitial iaddr = 0;\n");`
`if (async_reset)`	`if (async_reset)`
`fprintf(fp,`	`fprintf(fp,`
`"\talways @(posedge i_clk, negedge i_areset_n)\n"`	`"\talways @(posedge i_clk, negedge i_areset_n)\n"`
`"\t\tif (!i_reset)\n");`	`"\t\tif (!i_reset)\n");`
`else`	`else`
`fprintf(fp,`	`fprintf(fp,`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_reset)\n");`	`"\t\tif (i_reset)\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\twait_for_sync <= 1\'b1;\n"`	`"\t\t\twait_for_sync <= 1\'b1;\n"`
`"\t\t\tiaddr <= 0;\n"`	`"\t\t\tiaddr <= 0;\n"`
`"\t\tend else if ((i_ce)&&((!wait_for_sync)\|\|(i_sync)))\n"`	`"\t\tend else if ((i_ce)&&((!wait_for_sync)\|\|(i_sync)))\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"`	`"\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"`
`"\t\t\twait_for_sync <= 1\'b0;\n"`	`"\t\t\twait_for_sync <= 1\'b0;\n"`
`"\t\tend\n\n"`	`"\t\tend\n\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\t\timem <= i_data;\n"`	`"\t\t\timem <= i_data;\n"`
`"\n\n");`	`"\n\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"`	`"\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"`
`"\t// Why not? Because iaddr will always be zero until after the\n"`	`"\t// Why not? Because iaddr will always be zero until after the\n"`
`"\t// first i_ce, so we are safe.\n"`	`"\t// first i_ce, so we are safe.\n"`
`"\tinitial pipeline = 4\'h0;\n");`	`"\tinitial pipeline = 4\'h0;\n");`
`if (async_reset)`	`if (async_reset)`
`fprintf(fp,`	`fprintf(fp,`
`"\talways\t@(posedge i_clk, negedge i_areset_n)\n"`	`"\talways\t@(posedge i_clk, negedge i_areset_n)\n"`
`"\t\tif (!i_reset)\n");`	`"\t\tif (!i_reset)\n");`
`else`	`else`
`fprintf(fp,`	`fprintf(fp,`
`"\talways\t@(posedge i_clk)\n"`	`"\talways\t@(posedge i_clk)\n"`
`"\t\tif (i_reset)\n");`	`"\t\tif (i_reset)\n");`

`fprintf(fp,`	`fprintf(fp,`
`"\t\t\tpipeline <= 4\'h0;\n"`	`"\t\t\tpipeline <= 4\'h0;\n"`
`"\t\telse if (i_ce) // is our pipeline process full? Which stages?\n"`	`"\t\telse if (i_ce) // is our pipeline process full? Which stages?\n"`
`"\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");`	`"\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"`	`"\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"`
`"\talways\t@(posedge i_clk)\n"`	`"\talways\t@(posedge i_clk)\n"`
`"\t\tif ((i_ce)&&(iaddr[0]))\n"`	`"\t\tif ((i_ce)&&(iaddr[0]))\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\tsum_r <= imem_r + i_data_r;\n"`	`"\t\t\tsum_r <= imem_r + i_data_r;\n"`
`"\t\t\tsum_i <= imem_i + i_data_i;\n"`	`"\t\t\tsum_i <= imem_i + i_data_i;\n"`
`"\t\t\tdiff_r <= imem_r - i_data_r;\n"`	`"\t\t\tdiff_r <= imem_r - i_data_r;\n"`
`"\t\t\tdiff_i <= imem_i - i_data_i;\n"`	`"\t\t\tdiff_i <= imem_i - i_data_i;\n"`
`"\t\tend\n\n");`	`"\t\tend\n\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");`	`"\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\t// Now for pipeline[2]. We can actually do this at all i_ce\n"`	`"\t// Now for pipeline[2]. We can actually do this at all i_ce\n"`
`"\t// clock times, since nothing will listen unless pipeline[3]\n"`	`"\t// clock times, since nothing will listen unless pipeline[3]\n"`
`"\t// on the next clock. Thus, we simplify this logic and do\n"`	`"\t// on the next clock. Thus, we simplify this logic and do\n"`
`"\t// it independent of pipeline[2].\n"`	`"\t// it independent of pipeline[2].\n"`
`"\talways\t@(posedge i_clk)\n"`	`"\talways\t@(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"`	`"\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"`
`"\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"`	`"\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"`
`"\t\t\tif (ODD == 0)\n"`	`"\t\t\tif (ODD == 0)\n"`
`"\t\t\tbegin\n"`	`"\t\t\tbegin\n"`
`"\t\t\t\tob_b_r <= rnd_diff_r;\n"`	`"\t\t\t\tob_b_r <= rnd_diff_r;\n"`
`"\t\t\t\tob_b_i <= rnd_diff_i;\n"`	`"\t\t\t\tob_b_i <= rnd_diff_i;\n"`
`"\t\t\tend else if (INVERSE==0) begin\n"`	`"\t\t\tend else if (INVERSE==0) begin\n"`
`"\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"`	`"\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"`
`"\t\t\t\tob_b_r <= rnd_diff_i;\n"`	`"\t\t\t\tob_b_r <= rnd_diff_i;\n"`
`"\t\t\t\tob_b_i <= n_rnd_diff_r;\n"`	`"\t\t\t\tob_b_i <= n_rnd_diff_r;\n"`
`"\t\t\tend else begin\n"`	`"\t\t\tend else begin\n"`
`"\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"`	`"\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"`
`"\t\t\t\tob_b_r <= n_rnd_diff_i;\n"`	`"\t\t\t\tob_b_r <= n_rnd_diff_i;\n"`
`"\t\t\t\tob_b_i <= rnd_diff_r;\n"`	`"\t\t\t\tob_b_i <= rnd_diff_r;\n"`
`"\t\t\tend\n"`	`"\t\t\tend\n"`
`"\t\tend\n\n");`	`"\t\tend\n\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\talways\t@(posedge i_clk)\n"`	`"\talways\t@(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\tbegin // In sequence, clock = 3\n"`	`"\t\tbegin // In sequence, clock = 3\n"`
`"\t\t\tif (pipeline[3])\n"`	`"\t\t\tif (pipeline[3])\n"`
`"\t\t\tbegin\n"`	`"\t\t\tbegin\n"`
`"\t\t\t\tomem <= ob_b;\n"`	`"\t\t\t\tomem <= ob_b;\n"`
`"\t\t\t\to_data <= ob_a;\n"`	`"\t\t\t\to_data <= ob_a;\n"`
`"\t\t\tend else\n"`	`"\t\t\tend else\n"`
`"\t\t\t\to_data <= omem;\n"`	`"\t\t\t\to_data <= omem;\n"`
`"\t\tend\n\n");`	`"\t\tend\n\n");`

`fprintf(fp,`	`fprintf(fp,`
`"\t// Don\'t forget in the sync check that we are running\n"`	`"\t// Don\'t forget in the sync check that we are running\n"`
`"\t// at two clocks per sample. Thus we need to\n"`	`"\t// at two clocks per sample. Thus we need to\n"`
`"\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"`	`"\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"`
`"\tinitial\to_sync = 1\'b0;\n");`	`"\tinitial\to_sync = 1\'b0;\n");`

`if (async_reset)`	`if (async_reset)`
`fprintf(fp,`	`fprintf(fp,`
`"\talways\t@(posedge i_clk, negedge i_areset_n)\n"`	`"\talways\t@(posedge i_clk, negedge i_areset_n)\n"`
`"\t\tif (!i_areset_n)\n");`	`"\t\tif (!i_areset_n)\n");`
`else`	`else`
`fprintf(fp,`	`fprintf(fp,`
`"\talways\t@(posedge i_clk)\n"`	`"\talways\t@(posedge i_clk)\n"`
`"\t\tif (i_reset)\n");`	`"\t\tif (i_reset)\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\t\t\to_sync <= 1\'b0;\n"`	`"\t\t\to_sync <= 1\'b0;\n"`
`"\t\telse if (i_ce)\n"`	`"\t\telse if (i_ce)\n"`
`"\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");`	`"\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");`
`fprintf(fp, "endmodule\n");`	`fprintf(fp, "endmodule\n");`
`}`	`}`

`void build_snglquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {`	`void build_snglquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {`
`FILE *fp = fopen(fname, "w");`	`FILE *fp = fopen(fname, "w");`
`if (NULL == fp) {`	`if (NULL == fp) {`
`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
`perror("O/S Err was:");`	`perror("O/S Err was:");`
`return;`	`return;`
`}`	`}`
`const char *rnd_string;`	`const char *rnd_string;`
`if (rounding == RND_TRUNCATE)`	`if (rounding == RND_TRUNCATE)`
`rnd_string = "truncate";`	`rnd_string = "truncate";`
`else if (rounding == RND_FROMZERO)`	`else if (rounding == RND_FROMZERO)`
`rnd_string = "roundfromzero";`	`rnd_string = "roundfromzero";`
`else if (rounding == RND_HALFUP)`	`else if (rounding == RND_HALFUP)`
`rnd_string = "roundhalfup";`	`rnd_string = "roundhalfup";`
`else`	`else`
`rnd_string = "convround";`	`rnd_string = "convround";`


`fprintf(fp,`	`fprintf(fp,`
`SLASHLINE`	`SLASHLINE`
`"//\n"`	`"//\n"`
`"// Filename:\tqtrstage%s.v\n"`	`"// Filename:\tqtrstage%s.v\n"`
`"//\n"`	`"//\n"`
`"// Project:\t%s\n"`	`"// Project:\t%s\n"`
`"//\n"`	`"//\n"`
`"// Purpose: This file encapsulates the 4 point stage of a decimation in\n"`	`"// Purpose: This file encapsulates the 4 point stage of a decimation in\n"`
`"// frequency FFT. This particular implementation is optimized\n"`	`"// frequency FFT. This particular implementation is optimized\n"`
`"// so that all of the multiplies are accomplished by additions and\n"`	`"// so that all of the multiplies are accomplished by additions and\n"`
`"// multiplexers only.\n"`	`"// multiplexers only.\n"`
`"//\n"`	`"//\n"`
`"// Operation:\n"`	`"// Operation:\n"`
`"// The operation of this stage is identical to the regular stages of\n"`	`"// The operation of this stage is identical to the regular stages of\n"`
`"// the FFT (see them for details), with one additional and critical\n"`	`"// the FFT (see them for details), with one additional and critical\n"`
`"// difference: this stage doesn't require any hardware multiplication.\n"`	`"// difference: this stage doesn't require any hardware multiplication.\n"`
`"// The multiplies within it may all be accomplished using additions and\n"`	`"// The multiplies within it may all be accomplished using additions and\n"`
`"// subtractions.\n"`	`"// subtractions.\n"`
`"//\n"`	`"//\n"`
`"// Let's see how this is done. Given x[n] and x[n+2], cause thats the\n"`	`"// Let's see how this is done. Given x[n] and x[n+2], cause thats the\n"`
`"// stage we are working on, with i_sync true for x[0] being input,\n"`	`"// stage we are working on, with i_sync true for x[0] being input,\n"`
`"// produce the output:\n"`	`"// produce the output:\n"`
`"//\n"`	`"//\n"`
`"// y[n ] = x[n] + x[n+2]\n"`	`"// y[n ] = x[n] + x[n+2]\n"`
`"// y[n+2] = (x[n] - x[n+2]) * e^{-j2pi n/2} (forward transform)\n"`	`"// y[n+2] = (x[n] - x[n+2]) * e^{-j2pi n/2} (forward transform)\n"`
`"// = (x[n] - x[n+2]) * -j^n\n"`	`"// = (x[n] - x[n+2]) * -j^n\n"`
`"//\n"`	`"//\n"`
`"// y[n].r = x[n].r + x[n+2].r (This is the easy part)\n"`	`"// y[n].r = x[n].r + x[n+2].r (This is the easy part)\n"`
`"// y[n].i = x[n].i + x[n+2].i\n"`	`"// y[n].i = x[n].i + x[n+2].i\n"`
`"//\n"`	`"//\n"`
`"// y[2].r = x[0].r - x[2].r\n"`	`"// y[2].r = x[0].r - x[2].r\n"`
`"// y[2].i = x[0].i - x[2].i\n"`	`"// y[2].i = x[0].i - x[2].i\n"`
`"//\n"`	`"//\n"`
`"// y[3].r = (x[1].i - x[3].i) (forward transform)\n"`	`"// y[3].r = (x[1].i - x[3].i) (forward transform)\n"`
`"// y[3].i = - (x[1].r - x[3].r)\n"`	`"// y[3].i = - (x[1].r - x[3].r)\n"`
`"//\n"`	`"//\n"`
`"// y[3].r = - (x[1].i - x[3].i) (inverse transform)\n"`	`"// y[3].r = - (x[1].i - x[3].i) (inverse transform)\n"`
`"// y[3].i = (x[1].r - x[3].r) (INVERSE = 1)\n"`	`"// y[3].i = (x[1].r - x[3].r) (INVERSE = 1)\n"`
`// "//\n"`	`// "//\n"`
`// "// When the FFT is run in the two samples per clock mode, this quarter\n"`	`// "// When the FFT is run in the two samples per clock mode, this quarter\n"`
`// "// stage will operate on either x[0] and x[2] (ODD = 0), or x[1] and\n"`	`// "// stage will operate on either x[0] and x[2] (ODD = 0), or x[1] and\n"`
`// "// x[3] (ODD = 1). In all other cases, it will operate on all four\n"`	`// "// x[3] (ODD = 1). In all other cases, it will operate on all four\n"`
`// "// values.\n"`	`// "// values.\n"`
`"//\n%s"`	`"//\n%s"`
`"//\n",`	`"//\n",`
`(dbg)?"_dbg":"", prjname, creator);`	`(dbg)?"_dbg":"", prjname, creator);`
`fprintf(fp, "%s", cpyleft);`	`fprintf(fp, "%s", cpyleft);`
fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");	fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");

`std::string resetw("i_reset");`	`std::string resetw("i_reset");`
`if (async_reset)`	`if (async_reset)`
`resetw = std::string("i_areset_n");`	`resetw = std::string("i_areset_n");`

`fprintf(fp,`	`fprintf(fp,`
`"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"`	`"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"`
`"\tparameter IWIDTH=%d, OWIDTH=IWIDTH+1;\n"`	`"\tparameter IWIDTH=%d, OWIDTH=IWIDTH+1;\n"`
`"\tparameter\tLGWIDTH=%d, INVERSE=0,SHIFT=0;\n"`	`"\tparameter\tLGWIDTH=%d, INVERSE=0,SHIFT=0;\n"`
`"\tinput\t i_clk, %s, i_ce, i_sync;\n"`	`"\tinput\twire i_clk, %s, i_ce, i_sync;\n"`
`"\tinput\t [(2*IWIDTH-1):0] i_data;\n"`	`"\tinput\twire [(2*IWIDTH-1):0] i_data;\n"`
`"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"`	`"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"`
`"\toutput\treg o_sync;\n"`	`"\toutput\treg o_sync;\n"`
`"\t\n", (dbg)?"_dbg":"", resetw.c_str(),`	`"\t\n", (dbg)?"_dbg":"", resetw.c_str(),`
`(dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,`	`(dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,`
`TST_QTRSTAGE_LGWIDTH, resetw.c_str());`	`TST_QTRSTAGE_LGWIDTH, resetw.c_str());`
`if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"`	`if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"`
`"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2OWIDTH-1):(2OWIDTH-16)],\n"`	`"\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2OWIDTH-1):(2OWIDTH-16)],\n"`
`"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"`	`"\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"`
`"\n");`	`"\n");`
`}`	`}`

`fprintf(fp,`	`fprintf(fp,`
`"\treg\t wait_for_sync;\n"`	`"\treg\t wait_for_sync;\n"`
`"\treg\t[2:0] pipeline;\n"`	`"\treg\t[2:0] pipeline;\n"`
`"\n"`	`"\n"`
`"\treg\tsigned [(IWIDTH):0] sum_r, sum_i, diff_r, diff_i;\n"`	`"\treg\tsigned [(IWIDTH):0] sum_r, sum_i, diff_r, diff_i;\n"`
`"\n"`	`"\n"`
`"\treg\t[(2*OWIDTH-1):0]\tob_a;\n"`	`"\treg\t[(2*OWIDTH-1):0]\tob_a;\n"`
`"\twire\t[(2*OWIDTH-1):0]\tob_b;\n"`	`"\twire\t[(2*OWIDTH-1):0]\tob_b;\n"`
`"\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"`	`"\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"`
`"\tassign\tob_b = { ob_b_r, ob_b_i };\n"`	`"\tassign\tob_b = { ob_b_r, ob_b_i };\n"`
`"\n"`	`"\n"`
`"\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"`	`"\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"`
`"\treg\t[(2*IWIDTH-1):0]\timem\t[0:1];\n"`	`"\treg\t[(2*IWIDTH-1):0]\timem\t[0:1];\n"`
`"\n"`	`"\n"`
`"\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"`	`"\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"`
`"\tassign\timem_r = imem[1][(2*IWIDTH-1):(IWIDTH)];\n"`	`"\tassign\timem_r = imem[1][(2*IWIDTH-1):(IWIDTH)];\n"`
`"\tassign\timem_i = imem[1][(IWIDTH-1):0];\n"`	`"\tassign\timem_i = imem[1][(IWIDTH-1):0];\n"`
`"\n"`	`"\n"`
`"\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"`	`"\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"`
`"\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"`	`"\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"`
`"\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"`	`"\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"`
`"\n"`	`"\n"`
`"\treg [(2*OWIDTH-1):0] omem [0:1];\n"`	`"\treg [(2*OWIDTH-1):0] omem [0:1];\n"`
`"\n");`	`"\n");`

`fprintf(fp, "\t//\n"`	`fprintf(fp, "\t//\n"`
`"\t// Round our output values down to OWIDTH bits\n"`	`"\t// Round our output values down to OWIDTH bits\n"`
`"\t//\n");`	`"\t//\n");`

`fprintf(fp,`	`fprintf(fp,`
`"\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i,\n"`	`"\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i,\n"`
`"\t\t\trnd_diff_r, rnd_diff_i, n_rnd_diff_r, n_rnd_diff_i;\n"`	`"\t\t\trnd_diff_r, rnd_diff_i, n_rnd_diff_r, n_rnd_diff_i;\n"`
`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"`	`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"`
`"\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);`	`"\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);`
`fprintf(fp,`	`fprintf(fp,`
`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"`	`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"`
`"\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);`	`"\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);`
`fprintf(fp,`	`fprintf(fp,`
`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"`	`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"`
`"\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);`	`"\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);`
`fprintf(fp,`	`fprintf(fp,`
`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"`	`"\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"`
`"\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);`	`"\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);`
`fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"`	`fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"`
`"\tassign n_rnd_diff_i = - rnd_diff_i;\n");`	`"\tassign n_rnd_diff_i = - rnd_diff_i;\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\tinitial wait_for_sync = 1\'b1;\n"`	`"\tinitial wait_for_sync = 1\'b1;\n"`
`"\tinitial iaddr = 0;\n");`	`"\tinitial iaddr = 0;\n");`
`if (async_reset)`	`if (async_reset)`
`fprintf(fp,`	`fprintf(fp,`
`"\talways @(posedge i_clk, negedge i_areset_n)\n"`	`"\talways @(posedge i_clk, negedge i_areset_n)\n"`
`"\t\tif (!i_reset)\n");`	`"\t\tif (!i_reset)\n");`
`else`	`else`
`fprintf(fp,`	`fprintf(fp,`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_reset)\n");`	`"\t\tif (i_reset)\n");`

`fprintf(fp, "\t\tbegin\n"`	`fprintf(fp, "\t\tbegin\n"`
`"\t\t\twait_for_sync <= 1\'b1;\n"`	`"\t\t\twait_for_sync <= 1\'b1;\n"`
`"\t\t\tiaddr <= 0;\n"`	`"\t\t\tiaddr <= 0;\n"`
`"\t\tend else if ((i_ce)&&((!wait_for_sync)\|\|(i_sync)))\n"`	`"\t\tend else if ((i_ce)&&((!wait_for_sync)\|\|(i_sync)))\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\tiaddr <= iaddr + 1\'b1;\n"`	`"\t\t\tiaddr <= iaddr + 1\'b1;\n"`
`"\t\t\twait_for_sync <= 1\'b0;\n"`	`"\t\t\twait_for_sync <= 1\'b0;\n"`
`"\t\tend\n\n"`	`"\t\tend\n\n"`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_ce)\n"`	`"\t\tif (i_ce)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\timem[0] <= i_data;\n"`	`"\t\t\timem[0] <= i_data;\n"`
`"\t\t\timem[1] <= imem[0];\n"`	`"\t\t\timem[1] <= imem[0];\n"`
`"\t\tend\n"`	`"\t\tend\n"`
`"\n\n");`	`"\n\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"`	`"\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"`
`"\t// Why not? Because iaddr will always be zero until after the\n"`	`"\t// Why not? Because iaddr will always be zero until after the\n"`
`"\t// first i_ce, so we are safe.\n"`	`"\t// first i_ce, so we are safe.\n"`
`"\tinitial pipeline = 3\'h0;\n");`	`"\tinitial pipeline = 3\'h0;\n");`

`if (async_reset)`	`if (async_reset)`
`fprintf(fp,`	`fprintf(fp,`
`"\talways\t@(posedge i_clk, negedge i_areset_n)\n"`	`"\talways\t@(posedge i_clk, negedge i_areset_n)\n"`
`"\t\tif (!i_reset)\n");`	`"\t\tif (!i_reset)\n");`
`else`	`else`

////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////

//

//

// Filename:    fftgen.cpp

// Filename:    fftgen.cpp

//

//

// Project:     A General Purpose Pipelined FFT Implementation

// Project:     A General Purpose Pipelined FFT Implementation

//

//

// Purpose:     This is the core generator for the project.  Every part

// Purpose:     This is the core generator for the project.  Every part

//              and piece of this project begins and ends in this program.

//              and piece of this project begins and ends in this program.

//      Once built, this program will build an FFT (or IFFT) core of arbitrary

//      Once built, this program will build an FFT (or IFFT) core of arbitrary

//      width, precision, etc., that will run at two samples per clock.

//      width, precision, etc., that will run at two samples per clock.

//      (Incidentally, I didn't pick two samples per clock because it was

//      (Incidentally, I didn't pick two samples per clock because it was

//      easier, but rather because there weren't any two-sample per clock

//      easier, but rather because there weren't any two-sample per clock

//      FFT's posted on opencores.com.  Further, FFT's running at one sample

//      FFT's posted on opencores.com.  Further, FFT's running at one sample

//      per aren't that hard to find.)

//      per aren't that hard to find.)

//

//

//      You can find the documentation for this program in two places.  One is

//      You can find the documentation for this program in two places.  One is

//      in the usage() function below.  The second is in the 'doc'uments

//      in the usage() function below.  The second is in the 'doc'uments

//      directory that comes with this package, specifically in the spec.pdf

//      directory that comes with this package, specifically in the spec.pdf

//      file.  If it's not there, type make in the documents directory to

//      file.  If it's not there, type make in the documents directory to

//      build it.

//      build it.

//

//

//      20160123 - Thanks to Lesha Birukov, adjusted for MS Visual Studio 2012.

//      20160123 - Thanks to Lesha Birukov, adjusted for MS Visual Studio 2012.

//              (Adjustments are at the top of the file ...)

//              (Adjustments are at the top of the file ...)

//

//

// Creator:     Dan Gisselquist, Ph.D.

// Creator:     Dan Gisselquist, Ph.D.

//              Gisselquist Technology, LLC

//              Gisselquist Technology, LLC

//

//

////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////

//

//

// Copyright (C) 2015-2018, Gisselquist Technology, LLC

// Copyright (C) 2015-2018, Gisselquist Technology, LLC

//

//

// This program is free software (firmware): you can redistribute it and/or

// This program is free software (firmware): you can redistribute it and/or

// modify it under the terms of  the GNU General Public License as published

// modify it under the terms of  the GNU General Public License as published

// by the Free Software Foundation, either version 3 of the License, or (at

// by the Free Software Foundation, either version 3 of the License, or (at

// your option) any later version.

// your option) any later version.

//

//

// This program is distributed in the hope that it will be useful, but WITHOUT

// This program is distributed in the hope that it will be useful, but WITHOUT

// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or

// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or

// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

// for more details.

// for more details.

//

//

// You should have received a copy of the GNU General Public License along

// You should have received a copy of the GNU General Public License along

// with this program.  (It's in the $(ROOT)/doc directory, run make with no

// with this program.  (It's in the $(ROOT)/doc directory.  Run make with no

// target there if the PDF file isn't present.)  If not, see

// target there if the PDF file isn't present.)  If not, see

// <http://www.gnu.org/licenses/> for a copy.

// <http://www.gnu.org/licenses/> for a copy.

//

//

// License:     GPL, v3, as defined and found on www.gnu.org,

// License:     GPL, v3, as defined and found on www.gnu.org,

//              http://www.gnu.org/licenses/gpl.html

//              http://www.gnu.org/licenses/gpl.html

//

//

//

//

////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////

//

//

//

//

#define _CRT_SECURE_NO_WARNINGS   //  ms vs 2012 doesn't like fopen

#define _CRT_SECURE_NO_WARNINGS   //  ms vs 2012 doesn't like fopen

#include <stdio.h>

#include <stdio.h>

#include <stdlib.h>

#include <stdlib.h>

#ifdef _MSC_VER //  added for ms vs compatibility

#ifdef _MSC_VER //  added for ms vs compatibility

#include <io.h>

#include <io.h>

#include <direct.h>

#include <direct.h>

#define _USE_MATH_DEFINES

#define _USE_MATH_DEFINES

#define R_OK    4       /* Test for read permission.  */

#define R_OK    4       /* Test for read permission.  */

#define W_OK    2       /* Test for write permission.  */

#define W_OK    2       /* Test for write permission.  */

#define X_OK    0       /* !!!!!! execute permission - unsupported in windows*/

#define X_OK    0       /* !!!!!! execute permission - unsupported in windows*/

#define F_OK    0       /* Test for existence.  */

#define F_OK    0       /* Test for existence.  */

#if _MSC_VER <= 1700

#if _MSC_VER <= 1700

int lstat(const char *filename, struct stat *buf) { return 1; };

int lstat(const char *filename, struct stat *buf) { return 1; };

#define S_ISDIR(A)      0

#define S_ISDIR(A)      0

#else

#else

#define lstat   _stat

#define lstat   _stat

#define S_ISDIR _S_IFDIR

#define S_ISDIR _S_IFDIR

#endif

#endif

#define mkdir(A,B)      _mkdir(A)

#define mkdir(A,B)      _mkdir(A)

#define access _access

#define access _access

#else

#else

// And for G++/Linux environment

// And for G++/Linux environment

#include <unistd.h>     // Defines the R_OK/W_OK/etc. macros

#include <unistd.h>     // Defines the R_OK/W_OK/etc. macros

#include <sys/stat.h>

#include <sys/stat.h>

#endif

#endif

#include <string.h>

#include <string.h>

#include <string>

#include <string>

#include <math.h>

#include <math.h>

#include <ctype.h>

#include <ctype.h>

#include <assert.h>

#include <assert.h>

#include "defaults.h"

#include "defaults.h"

#include "legal.h"

#include "legal.h"

#include "rounding.h"

#include "rounding.h"

#include "fftlib.h"

#include "fftlib.h"

#include "bldstage.h"

#include "bldstage.h"

#include "bitreverse.h"

#include "bitreverse.h"

#include "softmpy.h"

#include "softmpy.h"

#include "butterfly.h"

#include "butterfly.h"

void    build_dblquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {

void    build_dblquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        const   char    *rnd_string;

        const   char    *rnd_string;

        if (rounding == RND_TRUNCATE)

        if (rounding == RND_TRUNCATE)

                rnd_string = "truncate";

                rnd_string = "truncate";

        else if (rounding == RND_FROMZERO)

        else if (rounding == RND_FROMZERO)

                rnd_string = "roundfromzero";

                rnd_string = "roundfromzero";

        else if (rounding == RND_HALFUP)

        else if (rounding == RND_HALFUP)

                rnd_string = "roundhalfup";

                rnd_string = "roundhalfup";

        else

        else

                rnd_string = "convround";

                rnd_string = "convround";

        fprintf(fp,

        fprintf(fp,

SLASHLINE

SLASHLINE

"//\n"

"//\n"

"// Filename:\tqtrstage%s.v\n"

"// Filename:\tqtrstage%s.v\n"

"//\n"

"//\n"

"// Project:\t%s\n"

"// Project:\t%s\n"

"//\n"

"//\n"

"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"

"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"

"//             frequency FFT.  This particular implementation is optimized\n"

"//             frequency FFT.  This particular implementation is optimized\n"

"//     so that all of the multiplies are accomplished by additions and\n"

"//     so that all of the multiplies are accomplished by additions and\n"

"//     multiplexers only.\n"

"//     multiplexers only.\n"

"//\n"

"//\n"

"//\n%s"

"//\n%s"

"//\n",

"//\n",

                (dbg)?"_dbg":"", prjname, creator);

                (dbg)?"_dbg":"", prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");

        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");

        std::string     resetw("i_reset");

        std::string     resetw("i_reset");

        if (async_reset)

        if (async_reset)

                resetw = std::string("i_areset_n");

                resetw = std::string("i_areset_n");

        fprintf(fp,

        fprintf(fp,

"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"

"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"

        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"

        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"

        "\t// Parameters specific to the core that should be changed when this\n"

        "\t// Parameters specific to the core that should be changed when this\n"

        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller\n"

        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller\n"

        "\t// spans must use the fftdoubles stage.\n"

        "\t// spans must use the fftdoubles stage.\n"

        "\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"

        "\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"

        "\tinput\t                              i_clk, %s, i_ce, i_sync;\n"

        "\tinput\twire                          i_clk, %s, i_ce, i_sync;\n"

        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"

        "\tinput\twire  [(2*IWIDTH-1):0]        i_data;\n"

        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"

        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"

        "\toutput\treg                          o_sync;\n"

        "\toutput\treg                          o_sync;\n"

        "\t\n", (dbg)?"_dbg":"",

        "\t\n", (dbg)?"_dbg":"",

        resetw.c_str(),

        resetw.c_str(),

        (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,

        (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,

        TST_QTRSTAGE_LGWIDTH, resetw.c_str());

        TST_QTRSTAGE_LGWIDTH, resetw.c_str());

        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"

        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"

                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"

                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"

                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"

                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\treg\t        wait_for_sync;\n"

        "\treg\t        wait_for_sync;\n"

        "\treg\t[3:0]   pipeline;\n"

        "\treg\t[3:0]   pipeline;\n"

"\n"

"\n"

        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"

        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"

"\n"

"\n"

        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"

        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"

        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"

        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"

        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"

        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"

        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"

        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"

"\n"

"\n"

        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"

        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"

        "\treg\t[(2*IWIDTH-1):0]\timem;\n"

        "\treg\t[(2*IWIDTH-1):0]\timem;\n"

"\n"

"\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"

        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"

        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"

"\n"

"\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"

        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"

        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"

"\n"

"\n"

        "\treg  [(2*OWIDTH-1):0]        omem;\n"

        "\treg  [(2*OWIDTH-1):0]        omem;\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");

        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");

        fprintf(fp,

        fprintf(fp,

        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");

        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");

        fprintf(fp,

        fprintf(fp,

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"

        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);

        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);

        fprintf(fp,

        fprintf(fp,

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"

        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);

        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);

        fprintf(fp,

        fprintf(fp,

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"

        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);

        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);

        fprintf(fp,

        fprintf(fp,

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"

        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);

        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);

        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"

        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"

                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");

                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");

/*

/*

        fprintf(fp,

        fprintf(fp,

        "\twire [(IWIDTH-1):0]  rnd;\n"

        "\twire [(IWIDTH-1):0]  rnd;\n"

        "\tgenerate\n"

        "\tgenerate\n"

        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"

        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"

                "\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"

                "\t\tassign rnd = { {(IWIDTH-1){1\'b0}}, 1\'b1 };\n"

        "\telse\n"

        "\telse\n"

                "\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"

                "\t\tassign rnd = { {(IWIDTH){1\'b0}}};\n"

        "\tendgenerate\n"

        "\tendgenerate\n"

"\n"

"\n"

*/

*/

        fprintf(fp,

        fprintf(fp,

        "\tinitial wait_for_sync = 1\'b1;\n"

        "\tinitial wait_for_sync = 1\'b1;\n"

        "\tinitial iaddr = 0;\n");

        "\tinitial iaddr = 0;\n");

        if (async_reset)

        if (async_reset)

                fprintf(fp,

                fprintf(fp,

                        "\talways @(posedge i_clk, negedge i_areset_n)\n"

                        "\talways @(posedge i_clk, negedge i_areset_n)\n"

                                "\t\tif (!i_reset)\n");

                                "\t\tif (!i_reset)\n");

        else

        else

                fprintf(fp,

                fprintf(fp,

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_reset)\n");

                "\t\tif (i_reset)\n");

        fprintf(fp,

        fprintf(fp,

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\twait_for_sync <= 1\'b1;\n"

                        "\t\t\twait_for_sync <= 1\'b1;\n"

                        "\t\t\tiaddr <= 0;\n"

                        "\t\t\tiaddr <= 0;\n"

                "\t\tend else if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"

                "\t\tend else if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"

                        "\t\t\tiaddr <= iaddr + { {(LGWIDTH-1){1\'b0}}, 1\'b1 };\n"

                        "\t\t\twait_for_sync <= 1\'b0;\n"

                        "\t\t\twait_for_sync <= 1\'b0;\n"

                "\t\tend\n\n"

                "\t\tend\n\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                        "\t\t\timem <= i_data;\n"

                        "\t\t\timem <= i_data;\n"

                "\n\n");

                "\n\n");

        fprintf(fp,

        fprintf(fp,

        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"

        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"

        "\t// Why not?  Because iaddr will always be zero until after the\n"

        "\t// Why not?  Because iaddr will always be zero until after the\n"

        "\t// first i_ce, so we are safe.\n"

        "\t// first i_ce, so we are safe.\n"

        "\tinitial pipeline = 4\'h0;\n");

        "\tinitial pipeline = 4\'h0;\n");

        if (async_reset)

        if (async_reset)

                fprintf(fp,

                fprintf(fp,

        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"

        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"

                "\t\tif (!i_reset)\n");

                "\t\tif (!i_reset)\n");

        else

        else

                fprintf(fp,

                fprintf(fp,

        "\talways\t@(posedge i_clk)\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif (i_reset)\n");

                "\t\tif (i_reset)\n");

        fprintf(fp,

        fprintf(fp,

                        "\t\t\tpipeline <= 4\'h0;\n"

                        "\t\t\tpipeline <= 4\'h0;\n"

                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"

                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"

                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");

                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");

        fprintf(fp,

        fprintf(fp,

        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"

        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"

        "\talways\t@(posedge i_clk)\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif ((i_ce)&&(iaddr[0]))\n"

                "\t\tif ((i_ce)&&(iaddr[0]))\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"

                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"

                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"

                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"

                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"

                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"

                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"

                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"

                "\t\tend\n\n");

                "\t\tend\n\n");

        fprintf(fp,

        fprintf(fp,

        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");

        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");

        fprintf(fp,

        fprintf(fp,

        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"

        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"

        "\t// clock times, since nothing will listen unless pipeline[3]\n"

        "\t// clock times, since nothing will listen unless pipeline[3]\n"

        "\t// on the next clock.  Thus, we simplify this logic and do\n"

        "\t// on the next clock.  Thus, we simplify this logic and do\n"

        "\t// it independent of pipeline[2].\n"

        "\t// it independent of pipeline[2].\n"

        "\talways\t@(posedge i_clk)\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"

                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"

                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"

                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"

                        "\t\t\tif (ODD == 0)\n"

                        "\t\t\tif (ODD == 0)\n"

                        "\t\t\tbegin\n"

                        "\t\t\tbegin\n"

                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"

                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"

                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"

                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"

                        "\t\t\tend else if (INVERSE==0) begin\n"

                        "\t\t\tend else if (INVERSE==0) begin\n"

                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"

                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"

                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"

                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"

                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"

                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"

                        "\t\t\tend else begin\n"

                        "\t\t\tend else begin\n"

                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"

                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"

                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"

                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"

                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"

                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"

                        "\t\t\tend\n"

                        "\t\t\tend\n"

                "\t\tend\n\n");

                "\t\tend\n\n");

        fprintf(fp,

        fprintf(fp,

        "\talways\t@(posedge i_clk)\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin // In sequence, clock = 3\n"

                "\t\tbegin // In sequence, clock = 3\n"

                        "\t\t\tif (pipeline[3])\n"

                        "\t\t\tif (pipeline[3])\n"

                        "\t\t\tbegin\n"

                        "\t\t\tbegin\n"

                                "\t\t\t\tomem <= ob_b;\n"

                                "\t\t\t\tomem <= ob_b;\n"

                                "\t\t\t\to_data <= ob_a;\n"

                                "\t\t\t\to_data <= ob_a;\n"

                        "\t\t\tend else\n"

                        "\t\t\tend else\n"

                                "\t\t\t\to_data <= omem;\n"

                                "\t\t\t\to_data <= omem;\n"

                "\t\tend\n\n");

                "\t\tend\n\n");

        fprintf(fp,

        fprintf(fp,

        "\t// Don\'t forget in the sync check that we are running\n"

        "\t// Don\'t forget in the sync check that we are running\n"

        "\t// at two clocks per sample.  Thus we need to\n"

        "\t// at two clocks per sample.  Thus we need to\n"

        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"

        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"

        "\tinitial\to_sync = 1\'b0;\n");

        "\tinitial\to_sync = 1\'b0;\n");

        if (async_reset)

        if (async_reset)

                fprintf(fp,

                fprintf(fp,

        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"

        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"

                "\t\tif (!i_areset_n)\n");

                "\t\tif (!i_areset_n)\n");

        else

        else

                fprintf(fp,

                fprintf(fp,

        "\talways\t@(posedge i_clk)\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif (i_reset)\n");

                "\t\tif (i_reset)\n");

        fprintf(fp,

        fprintf(fp,

                "\t\t\to_sync <= 1\'b0;\n"

                "\t\t\to_sync <= 1\'b0;\n"

                "\t\telse if (i_ce)\n"

                "\t\telse if (i_ce)\n"

                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");

                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");

        fprintf(fp, "endmodule\n");

        fprintf(fp, "endmodule\n");

void    build_snglquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {

void    build_snglquarters(const char *fname, ROUND_T rounding, const bool async_reset=false, const bool dbg=false) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        const   char    *rnd_string;

        const   char    *rnd_string;

        if (rounding == RND_TRUNCATE)

        if (rounding == RND_TRUNCATE)

                rnd_string = "truncate";

                rnd_string = "truncate";

        else if (rounding == RND_FROMZERO)

        else if (rounding == RND_FROMZERO)

                rnd_string = "roundfromzero";

                rnd_string = "roundfromzero";

        else if (rounding == RND_HALFUP)

        else if (rounding == RND_HALFUP)

                rnd_string = "roundhalfup";

                rnd_string = "roundhalfup";

        else

        else

                rnd_string = "convround";

                rnd_string = "convround";

        fprintf(fp,

        fprintf(fp,

SLASHLINE

SLASHLINE

"//\n"

"//\n"

"// Filename:\tqtrstage%s.v\n"

"// Filename:\tqtrstage%s.v\n"

"//\n"

"//\n"

"// Project:\t%s\n"

"// Project:\t%s\n"

"//\n"

"//\n"

"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"

"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"

"//             frequency FFT.  This particular implementation is optimized\n"

"//             frequency FFT.  This particular implementation is optimized\n"

"//     so that all of the multiplies are accomplished by additions and\n"

"//     so that all of the multiplies are accomplished by additions and\n"

"//     multiplexers only.\n"

"//     multiplexers only.\n"

"//\n"

"//\n"

"// Operation:\n"

"// Operation:\n"

"//     The operation of this stage is identical to the regular stages of\n"

"//     The operation of this stage is identical to the regular stages of\n"

"//     the FFT (see them for details), with one additional and critical\n"

"//     the FFT (see them for details), with one additional and critical\n"

"//     difference: this stage doesn't require any hardware multiplication.\n"

"//     difference: this stage doesn't require any hardware multiplication.\n"

"//     The multiplies within it may all be accomplished using additions and\n"

"//     The multiplies within it may all be accomplished using additions and\n"

"//     subtractions.\n"

"//     subtractions.\n"

"//\n"

"//\n"

"//     Let's see how this is done.  Given x[n] and x[n+2], cause thats the\n"

"//     Let's see how this is done.  Given x[n] and x[n+2], cause thats the\n"

"//     stage we are working on, with i_sync true for x[0] being input,\n"

"//     stage we are working on, with i_sync true for x[0] being input,\n"

"//     produce the output:\n"

"//     produce the output:\n"

"//\n"

"//\n"

"//     y[n  ] = x[n] + x[n+2]\n"

"//     y[n  ] = x[n] + x[n+2]\n"

"//     y[n+2] = (x[n] - x[n+2]) * e^{-j2pi n/2}        (forward transform)\n"

"//     y[n+2] = (x[n] - x[n+2]) * e^{-j2pi n/2}        (forward transform)\n"

"//            = (x[n] - x[n+2]) * -j^n\n"

"//            = (x[n] - x[n+2]) * -j^n\n"

"//\n"

"//\n"

"//     y[n].r = x[n].r + x[n+2].r      (This is the easy part)\n"

"//     y[n].r = x[n].r + x[n+2].r      (This is the easy part)\n"

"//     y[n].i = x[n].i + x[n+2].i\n"

"//     y[n].i = x[n].i + x[n+2].i\n"

"//\n"

"//\n"

"//     y[2].r = x[0].r - x[2].r\n"

"//     y[2].r = x[0].r - x[2].r\n"

"//     y[2].i = x[0].i - x[2].i\n"

"//     y[2].i = x[0].i - x[2].i\n"

"//\n"

"//\n"

"//     y[3].r =   (x[1].i - x[3].i)            (forward transform)\n"

"//     y[3].r =   (x[1].i - x[3].i)            (forward transform)\n"

"//     y[3].i = - (x[1].r - x[3].r)\n"

"//     y[3].i = - (x[1].r - x[3].r)\n"

"//\n"

"//\n"

"//     y[3].r = - (x[1].i - x[3].i)            (inverse transform)\n"

"//     y[3].r = - (x[1].i - x[3].i)            (inverse transform)\n"

"//     y[3].i =   (x[1].r - x[3].r)            (INVERSE = 1)\n"

"//     y[3].i =   (x[1].r - x[3].r)            (INVERSE = 1)\n"

// "//\n"

// "//\n"

// "//  When the FFT is run in the two samples per clock mode, this quarter\n"

// "//  When the FFT is run in the two samples per clock mode, this quarter\n"

// "//  stage will operate on either x[0] and x[2] (ODD = 0), or x[1] and\n"

// "//  stage will operate on either x[0] and x[2] (ODD = 0), or x[1] and\n"

// "//  x[3] (ODD = 1).  In all other cases, it will operate on all four\n"

// "//  x[3] (ODD = 1).  In all other cases, it will operate on all four\n"

// "//  values.\n"

// "//  values.\n"

"//\n%s"

"//\n%s"

"//\n",

"//\n",

                (dbg)?"_dbg":"", prjname, creator);

                (dbg)?"_dbg":"", prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");

        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");

        std::string     resetw("i_reset");

        std::string     resetw("i_reset");

        if (async_reset)

        if (async_reset)

                resetw = std::string("i_areset_n");

                resetw = std::string("i_areset_n");

        fprintf(fp,

        fprintf(fp,

"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"

"module\tqtrstage%s(i_clk, %s, i_ce, i_sync, i_data, o_data, o_sync%s);\n"

        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"

        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"

        "\tparameter\tLGWIDTH=%d, INVERSE=0,SHIFT=0;\n"

        "\tparameter\tLGWIDTH=%d, INVERSE=0,SHIFT=0;\n"

        "\tinput\t                              i_clk, %s, i_ce, i_sync;\n"

        "\tinput\twire                          i_clk, %s, i_ce, i_sync;\n"

        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"

        "\tinput\twire  [(2*IWIDTH-1):0]        i_data;\n"

        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"

        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"

        "\toutput\treg                          o_sync;\n"

        "\toutput\treg                          o_sync;\n"

                "\t\n", (dbg)?"_dbg":"", resetw.c_str(),

                "\t\n", (dbg)?"_dbg":"", resetw.c_str(),

                (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,

                (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,

                TST_QTRSTAGE_LGWIDTH, resetw.c_str());

                TST_QTRSTAGE_LGWIDTH, resetw.c_str());

        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"

        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"

                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"

                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"

                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"

                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\treg\t        wait_for_sync;\n"

        "\treg\t        wait_for_sync;\n"

        "\treg\t[2:0]   pipeline;\n"

        "\treg\t[2:0]   pipeline;\n"

"\n"

"\n"

        "\treg\tsigned [(IWIDTH):0]     sum_r, sum_i, diff_r, diff_i;\n"

        "\treg\tsigned [(IWIDTH):0]     sum_r, sum_i, diff_r, diff_i;\n"

"\n"

"\n"

        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"

        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"

        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"

        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"

        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"

        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"

        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"

        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"

"\n"

"\n"

        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"

        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"

        "\treg\t[(2*IWIDTH-1):0]\timem\t[0:1];\n"

        "\treg\t[(2*IWIDTH-1):0]\timem\t[0:1];\n"

"\n"

"\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"

        "\tassign\timem_r = imem[1][(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\timem_r = imem[1][(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\timem_i = imem[1][(IWIDTH-1):0];\n"

        "\tassign\timem_i = imem[1][(IWIDTH-1):0];\n"

"\n"

"\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\ti_data_r, i_data_i;\n"

        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\ti_data_r = i_data[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"

        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"

"\n"

"\n"

        "\treg  [(2*OWIDTH-1):0]        omem [0:1];\n"

        "\treg  [(2*OWIDTH-1):0]        omem [0:1];\n"

"\n");

"\n");

        fprintf(fp, "\t//\n"

        fprintf(fp, "\t//\n"

        "\t// Round our output values down to OWIDTH bits\n"

        "\t// Round our output values down to OWIDTH bits\n"

        "\t//\n");

        "\t//\n");

        fprintf(fp,

        fprintf(fp,

        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i,\n"

        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i,\n"

        "\t\t\trnd_diff_r, rnd_diff_i, n_rnd_diff_r, n_rnd_diff_i;\n"

        "\t\t\trnd_diff_r, rnd_diff_i, n_rnd_diff_r, n_rnd_diff_i;\n"

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"

        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);

        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);

        fprintf(fp,

        fprintf(fp,

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"

        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);

        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);

        fprintf(fp,

        fprintf(fp,

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"

        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);

        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);

        fprintf(fp,

        fprintf(fp,

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"

        "\t%s #(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"

        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);

        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);

        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"

        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"

                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");

                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");

        fprintf(fp,

        fprintf(fp,

        "\tinitial wait_for_sync = 1\'b1;\n"

        "\tinitial wait_for_sync = 1\'b1;\n"

        "\tinitial iaddr = 0;\n");

        "\tinitial iaddr = 0;\n");

        if (async_reset)

        if (async_reset)

                fprintf(fp,

                fprintf(fp,

                        "\talways @(posedge i_clk, negedge i_areset_n)\n"

                        "\talways @(posedge i_clk, negedge i_areset_n)\n"

                                "\t\tif (!i_reset)\n");

                                "\t\tif (!i_reset)\n");

        else

        else

                fprintf(fp,

                fprintf(fp,

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_reset)\n");

                "\t\tif (i_reset)\n");

        fprintf(fp, "\t\tbegin\n"

        fprintf(fp, "\t\tbegin\n"

                        "\t\t\twait_for_sync <= 1\'b1;\n"

                        "\t\t\twait_for_sync <= 1\'b1;\n"

                        "\t\t\tiaddr <= 0;\n"

                        "\t\t\tiaddr <= 0;\n"

                "\t\tend else if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"

                "\t\tend else if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tiaddr <= iaddr + 1\'b1;\n"

                        "\t\t\tiaddr <= iaddr + 1\'b1;\n"

                        "\t\t\twait_for_sync <= 1\'b0;\n"

                        "\t\t\twait_for_sync <= 1\'b0;\n"

                "\t\tend\n\n"

                "\t\tend\n\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\timem[0] <= i_data;\n"

                        "\t\t\timem[0] <= i_data;\n"

                        "\t\t\timem[1] <= imem[0];\n"

                        "\t\t\timem[1] <= imem[0];\n"

                "\t\tend\n"

                "\t\tend\n"

                "\n\n");

                "\n\n");

        fprintf(fp,

        fprintf(fp,

        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"

        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"

        "\t// Why not?  Because iaddr will always be zero until after the\n"

        "\t// Why not?  Because iaddr will always be zero until after the\n"

        "\t// first i_ce, so we are safe.\n"

        "\t// first i_ce, so we are safe.\n"

        "\tinitial pipeline = 3\'h0;\n");

        "\tinitial pipeline = 3\'h0;\n");

        if (async_reset)

        if (async_reset)

                fprintf(fp,

                fprintf(fp,

        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"

        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"

                "\t\tif (!i_reset)\n");

                "\t\tif (!i_reset)\n");

        else

        else

                fprintf(fp,

                fprintf(fp,

        "\talways\t@(posedge i_clk)\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif (i_reset)\n");

                "\t\tif (i_reset)\n");

        fprintf(fp,

        fprintf(fp,

                        "\t\t\tpipeline <= 3\'h0;\n"

                        "\t\t\tpipeline <= 3\'h0;\n"

                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"

                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"

                        "\t\t\tpipeline <= { pipeline[1:0], iaddr[1] };\n\n");

                        "\t\t\tpipeline <= { pipeline[1:0], iaddr[1] };\n\n");

        fprintf(fp,

        fprintf(fp,

        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"

        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"

        "\talways\t@(posedge i_clk)\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif ((i_ce)&&(iaddr[1]))\n"

                "\t\tif ((i_ce)&&(iaddr[1]))\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"

                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"

                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"

                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"

                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"

                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"

                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"

                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"

                "\t\tend\n\n");

                "\t\tend\n\n");

        fprintf(fp,

        fprintf(fp,

        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");

        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");

        fprintf(fp,

        fprintf(fp,

        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"

        "\t// Now for pipeline[2].  We can actually do this at all i_ce\n"

        "\t// clock times, since nothing will listen unless pipeline[3]\n"

        "\t// clock times, since nothing will listen unless pipeline[3]\n"

        "\t// on the next clock.  Thus, we simplify this logic and do\n"

        "\t// on the next clock.  Thus, we simplify this logic and do\n"

        "\t// it independent of pipeline[2].\n"

        "\t// it independent of pipeline[2].\n"

        "\talways\t@(posedge i_clk)\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"

                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"

                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"

                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"

                        "\t\t\tif (!iaddr[0])\n"

                        "\t\t\tif (!iaddr[0])\n"

                        "\t\t\tbegin\n"

                        "\t\t\tbegin\n"

                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"

                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"

                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"

                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"

                        "\t\t\tend else if (INVERSE==0) begin\n"

                        "\t\t\tend else if (INVERSE==0) begin\n"

                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"

                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"

                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"

                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"

                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"

                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"

                        "\t\t\tend else begin\n"

                        "\t\t\tend else begin\n"

                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"

                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"

                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"

                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"

                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"

                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"

                        "\t\t\tend\n"

                        "\t\t\tend\n"

                "\t\tend\n\n");

                "\t\tend\n\n");

        fprintf(fp,

        fprintf(fp,

        "\talways\t@(posedge i_clk)\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin // In sequence, clock = 3\n"

                "\t\tbegin // In sequence, clock = 3\n"

                        "\t\t\tomem[0] <= ob_b;\n"

                        "\t\t\tomem[0] <= ob_b;\n"

                        "\t\t\tomem[1] <= omem[0];\n"

                        "\t\t\tomem[1] <= omem[0];\n"

                        "\t\t\tif (pipeline[2])\n"

                        "\t\t\tif (pipeline[2])\n"

                                "\t\t\t\to_data <= ob_a;\n"

                                "\t\t\t\to_data <= ob_a;\n"

                        "\t\t\telse\n"

                        "\t\t\telse\n"

                                "\t\t\t\to_data <= omem[1];\n"

                                "\t\t\t\to_data <= omem[1];\n"

                "\t\tend\n\n");

                "\t\tend\n\n");

        fprintf(fp,

        fprintf(fp,

        "\tinitial\to_sync = 1\'b0;\n");

        "\tinitial\to_sync = 1\'b0;\n");

        if (async_reset)

        if (async_reset)

                fprintf(fp,

                fprintf(fp,

        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"

        "\talways\t@(posedge i_clk, negedge i_areset_n)\n"

                "\t\tif (!i_areset_n)\n");

                "\t\tif (!i_areset_n)\n");

        else

        else

                fprintf(fp,

                fprintf(fp,

        "\talways\t@(posedge i_clk)\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif (i_reset)\n");

                "\t\tif (i_reset)\n");

        fprintf(fp,

        fprintf(fp,

                "\t\t\to_sync <= 1\'b0;\n"

                "\t\t\to_sync <= 1\'b0;\n"

                "\t\telse if (i_ce)\n"

                "\t\telse if (i_ce)\n"

                        "\t\t\to_sync <= (iaddr[2:0] == 3'b101);\n\n");

                        "\t\t\to_sync <= (iaddr[2:0] == 3'b101);\n\n");

        if (formal_property_flag) {

        if (formal_property_flag) {

                fprintf(fp,

                fprintf(fp,

"`ifdef FORMAL\n"

"`ifdef FORMAL\n"

        "\treg  f_past_valid;\n"

        "\treg  f_past_valid;\n"

        "\tinitial      f_past_valid = 1'b0;\n"

        "\tinitial      f_past_valid = 1'b0;\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\t     f_past_valid = 1'b1;\n"

        "\t     f_past_valid = 1'b1;\n"

"\n"

"\n"

"`ifdef QTRSTAGE\n"

"`ifdef QTRSTAGE\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\t     assume((i_ce)||($past(i_ce))||($past(i_ce,2)));\n"

        "\t     assume((i_ce)||($past(i_ce))||($past(i_ce,2)));\n"

"`endif\n"

"`endif\n"

"\n"

"\n"

        "\t// The below logic only works if the rounding stage does nothing\n"

        "\t// The below logic only works if the rounding stage does nothing\n"

        "\tinitial      assert(IWIDTH+1 == OWIDTH);\n"

        "\tinitial      assert(IWIDTH+1 == OWIDTH);\n"

"\n"

"\n"

        "\treg  signed [IWIDTH-1:0]     f_piped_real    [0:7];\n"

        "\treg  signed [IWIDTH-1:0]     f_piped_real    [0:7];\n"

        "\treg  signed [IWIDTH-1:0]     f_piped_imag    [0:7];\n"

        "\treg  signed [IWIDTH-1:0]     f_piped_imag    [0:7];\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\tif (i_ce)\n"

        "\tif (i_ce)\n"

        "\tbegin\n"

        "\tbegin\n"

        "\t     f_piped_real[0] <= i_data[2*IWIDTH-1:IWIDTH];\n"

        "\t     f_piped_real[0] <= i_data[2*IWIDTH-1:IWIDTH];\n"

        "\t     f_piped_imag[0] <= i_data[  IWIDTH-1:0];\n"

        "\t     f_piped_imag[0] <= i_data[  IWIDTH-1:0];\n"

"\n"

"\n"

        "\t     f_piped_real[1] <= f_piped_real[0];\n"

        "\t     f_piped_real[1] <= f_piped_real[0];\n"

        "\t     f_piped_imag[1] <= f_piped_imag[0];\n"

        "\t     f_piped_imag[1] <= f_piped_imag[0];\n"

"\n"

"\n"

        "\t     f_piped_real[2] <= f_piped_real[1];\n"

        "\t     f_piped_real[2] <= f_piped_real[1];\n"

        "\t     f_piped_imag[2] <= f_piped_imag[1];\n"

        "\t     f_piped_imag[2] <= f_piped_imag[1];\n"

"\n"

"\n"

        "\t     f_piped_real[3] <= f_piped_real[2];\n"

        "\t     f_piped_real[3] <= f_piped_real[2];\n"

        "\t     f_piped_imag[3] <= f_piped_imag[2];\n"

        "\t     f_piped_imag[3] <= f_piped_imag[2];\n"

"\n"

"\n"

        "\t     f_piped_real[4] <= f_piped_real[3];\n"

        "\t     f_piped_real[4] <= f_piped_real[3];\n"

        "\t     f_piped_imag[4] <= f_piped_imag[3];\n"

        "\t     f_piped_imag[4] <= f_piped_imag[3];\n"

"\n"

"\n"

        "\t     f_piped_real[5] <= f_piped_real[4];\n"

        "\t     f_piped_real[5] <= f_piped_real[4];\n"

        "\t     f_piped_imag[5] <= f_piped_imag[4];\n"

        "\t     f_piped_imag[5] <= f_piped_imag[4];\n"

"\n"

"\n"

        "\t     f_piped_real[6] <= f_piped_real[5];\n"

        "\t     f_piped_real[6] <= f_piped_real[5];\n"

        "\t     f_piped_imag[6] <= f_piped_imag[5];\n"

        "\t     f_piped_imag[6] <= f_piped_imag[5];\n"

"\n"

"\n"

        "\t     f_piped_real[7] <= f_piped_real[6];\n"

        "\t     f_piped_real[7] <= f_piped_real[6];\n"

        "\t     f_piped_imag[7] <= f_piped_imag[6];\n"

        "\t     f_piped_imag[7] <= f_piped_imag[6];\n"

        "\tend\n"

        "\tend\n"

"\n"

"\n"

        "\treg  f_rsyncd;\n"

        "\treg  f_rsyncd;\n"

        "\twire f_syncd;\n"

        "\twire f_syncd;\n"

"\n"

"\n"

        "\tinitial      f_rsyncd = 0;\n"

        "\tinitial      f_rsyncd = 0;\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\tif(i_reset)\n"

        "\tif(i_reset)\n"

        "\t     f_rsyncd <= 1'b0;\n"

        "\t     f_rsyncd <= 1'b0;\n"

        "\telse if (!f_rsyncd)\n"

        "\telse if (!f_rsyncd)\n"

        "\t     f_rsyncd <= (o_sync);\n"

        "\t     f_rsyncd <= (o_sync);\n"

        "\tassign       f_syncd = (f_rsyncd)||(o_sync);\n"

        "\tassign       f_syncd = (f_rsyncd)||(o_sync);\n"

"\n"

"\n"

        "\treg  [1:0]   f_state;\n"

        "\treg  [1:0]   f_state;\n"

"\n"

"\n"

"\n"

"\n"

        "\tinitial      f_state = 0;\n"

        "\tinitial      f_state = 0;\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\tif (i_reset)\n"

        "\tif (i_reset)\n"

        "\t     f_state <= 0;\n"

        "\t     f_state <= 0;\n"

        "\telse if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"

        "\telse if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"

        "\t     f_state <= f_state + 1;\n"

        "\t     f_state <= f_state + 1;\n"

"\n"

"\n"

        "\talways @(*)\n"

        "\talways @(*)\n"

        "\tif (f_state != 0)\n"

        "\tif (f_state != 0)\n"

        "\t     assume(!i_sync);\n"

        "\t     assume(!i_sync);\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\t     assert(f_state[1:0] == iaddr[1:0]);\n"

        "\t     assert(f_state[1:0] == iaddr[1:0]);\n"

"\n"

"\n"

        "\twire signed [2*IWIDTH-1:0]   f_i_real, f_i_imag;\n"

        "\twire signed [2*IWIDTH-1:0]   f_i_real, f_i_imag;\n"

        "\tassign                       f_i_real = i_data[2*IWIDTH-1:IWIDTH];\n"

        "\tassign                       f_i_real = i_data[2*IWIDTH-1:IWIDTH];\n"

        "\tassign                       f_i_imag = i_data[  IWIDTH-1:0];\n"

        "\tassign                       f_i_imag = i_data[  IWIDTH-1:0];\n"

"\n"

"\n"

        "\twire signed [OWIDTH-1:0]     f_o_real, f_o_imag;\n"

        "\twire signed [OWIDTH-1:0]     f_o_real, f_o_imag;\n"

        "\tassign                       f_o_real = o_data[2*OWIDTH-1:OWIDTH];\n"

        "\tassign                       f_o_real = o_data[2*OWIDTH-1:OWIDTH];\n"

        "\tassign                       f_o_imag = o_data[  OWIDTH-1:0];\n"

        "\tassign                       f_o_imag = o_data[  OWIDTH-1:0];\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\tif (f_state == 2'b11)\n"

        "\tif (f_state == 2'b11)\n"

        "\tbegin\n"

        "\tbegin\n"

        "\t     assume(f_piped_real[0] != 3'sb100);\n"

        "\t     assume(f_piped_real[0] != 3'sb100);\n"

        "\t     assume(f_piped_real[2] != 3'sb100);\n"

        "\t     assume(f_piped_real[2] != 3'sb100);\n"

        "\t     assert(sum_r  == f_piped_real[2] + f_piped_real[0]);\n"

        "\t     assert(sum_r  == f_piped_real[2] + f_piped_real[0]);\n"

        "\t     assert(sum_i  == f_piped_imag[2] + f_piped_imag[0]);\n"

        "\t     assert(sum_i  == f_piped_imag[2] + f_piped_imag[0]);\n"

"\n"

"\n"

        "\t     assert(diff_r == f_piped_real[2] - f_piped_real[0]);\n"

        "\t     assert(diff_r == f_piped_real[2] - f_piped_real[0]);\n"

        "\t     assert(diff_i == f_piped_imag[2] - f_piped_imag[0]);\n"

        "\t     assert(diff_i == f_piped_imag[2] - f_piped_imag[0]);\n"

        "\tend\n"

        "\tend\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\tif ((f_state == 2'b00)&&((f_syncd)||(iaddr >= 4)))\n"

        "\tif ((f_state == 2'b00)&&((f_syncd)||(iaddr >= 4)))\n"

        "\tbegin\n"

        "\tbegin\n"

        "\t     assert(rnd_sum_r  == f_piped_real[3]+f_piped_real[1]);\n"

        "\t     assert(rnd_sum_r  == f_piped_real[3]+f_piped_real[1]);\n"

        "\t     assert(rnd_sum_i  == f_piped_imag[3]+f_piped_imag[1]);\n"

        "\t     assert(rnd_sum_i  == f_piped_imag[3]+f_piped_imag[1]);\n"

        "\t     assert(rnd_diff_r == f_piped_real[3]-f_piped_real[1]);\n"

        "\t     assert(rnd_diff_r == f_piped_real[3]-f_piped_real[1]);\n"

        "\t     assert(rnd_diff_i == f_piped_imag[3]-f_piped_imag[1]);\n"

        "\t     assert(rnd_diff_i == f_piped_imag[3]-f_piped_imag[1]);\n"

        "\tend\n"

        "\tend\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\tif ((f_state == 2'b10)&&(f_syncd))\n"

        "\tif ((f_state == 2'b10)&&(f_syncd))\n"

        "\tbegin\n"

        "\tbegin\n"

        "\t     // assert(o_sync);\n"

        "\t     // assert(o_sync);\n"

        "\t     assert(f_o_real == f_piped_real[5] + f_piped_real[3]);\n"

        "\t     assert(f_o_real == f_piped_real[5] + f_piped_real[3]);\n"

        "\t     assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]);\n"

        "\t     assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]);\n"

        "\tend\n"

        "\tend\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\tif ((f_state == 2'b11)&&(f_syncd))\n"

        "\tif ((f_state == 2'b11)&&(f_syncd))\n"

        "\tbegin\n"

        "\tbegin\n"

        "\t     assert(!o_sync);\n"

        "\t     assert(!o_sync);\n"

        "\t     assert(f_o_real == f_piped_real[5] + f_piped_real[3]);\n"

        "\t     assert(f_o_real == f_piped_real[5] + f_piped_real[3]);\n"

        "\t     assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]);\n"

        "\t     assert(f_o_imag == f_piped_imag[5] + f_piped_imag[3]);\n"

        "\tend\n"

        "\tend\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\tif ((f_state == 2'b00)&&(f_syncd))\n"

        "\tif ((f_state == 2'b00)&&(f_syncd))\n"

        "\tbegin\n"

        "\tbegin\n"

        "\t     assert(!o_sync);\n"

        "\t     assert(!o_sync);\n"

        "\t     assert(f_o_real == f_piped_real[7] - f_piped_real[5]);\n"

        "\t     assert(f_o_real == f_piped_real[7] - f_piped_real[5]);\n"

        "\t     assert(f_o_imag == f_piped_imag[7] - f_piped_imag[5]);\n"

        "\t     assert(f_o_imag == f_piped_imag[7] - f_piped_imag[5]);\n"

        "\tend\n"

        "\tend\n"

"\n"

"\n"

        "\talways @(*)\n"

        "\talways @(*)\n"

        "\tif ((iaddr[2:0] == 0)&&(!wait_for_sync))\n"

        "\tif ((iaddr[2:0] == 0)&&(!wait_for_sync))\n"

        "\t     assume(i_sync);\n"

        "\t     assume(i_sync);\n"

"\n"

"\n"

        "\talways @(*)\n"

        "\talways @(*)\n"

        "\tif (wait_for_sync)\n"

        "\tif (wait_for_sync)\n"

        "\t     assert((iaddr == 0)&&(f_state == 2'b00)&&(!o_sync)&&(!f_rsyncd));\n"

        "\t     assert((iaddr == 0)&&(f_state == 2'b00)&&(!o_sync)&&(!f_rsyncd));\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\tif ((f_past_valid)&&($past(i_ce))&&($past(i_sync))&&(!$past(i_reset)))\n"

        "\tif ((f_past_valid)&&($past(i_ce))&&($past(i_sync))&&(!$past(i_reset)))\n"

        "\t     assert(!wait_for_sync);\n"

        "\t     assert(!wait_for_sync);\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\tif ((f_state == 2'b01)&&(f_syncd))\n"

        "\tif ((f_state == 2'b01)&&(f_syncd))\n"

        "\tbegin\n"

        "\tbegin\n"

        "\t     assert(!o_sync);\n"

        "\t     assert(!o_sync);\n"

        "\t     if (INVERSE)\n"

        "\t     if (INVERSE)\n"

        "\t     begin\n"

        "\t     begin\n"

        "\t             assert(f_o_real == -f_piped_imag[7]+f_piped_imag[5]);\n"

        "\t             assert(f_o_real == -f_piped_imag[7]+f_piped_imag[5]);\n"

        "\t             assert(f_o_imag ==  f_piped_real[7]-f_piped_real[5]);\n"

        "\t             assert(f_o_imag ==  f_piped_real[7]-f_piped_real[5]);\n"

        "\t     end else begin\n"

        "\t     end else begin\n"

        "\t             assert(f_o_real ==  f_piped_imag[7]-f_piped_imag[5]);\n"

        "\t             assert(f_o_real ==  f_piped_imag[7]-f_piped_imag[5]);\n"

        "\t             assert(f_o_imag == -f_piped_real[7]+f_piped_real[5]);\n"

        "\t             assert(f_o_imag == -f_piped_real[7]+f_piped_real[5]);\n"

        "\t     end\n"

        "\t     end\n"

        "\tend\n"

        "\tend\n"

"\n"

"\n"

"`endif\n");

"`endif\n");

        fprintf(fp, "endmodule\n");

        fprintf(fp, "endmodule\n");

void    build_sngllast(const char *fname, const bool async_reset = false) {

void    build_sngllast(const char *fname, const bool async_reset = false) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        std::string     resetw("i_reset");

        std::string     resetw("i_reset");

        if (async_reset)

        if (async_reset)

                resetw = std::string("i_areset_n");

                resetw = std::string("i_areset_n");

        fprintf(fp,

        fprintf(fp,

SLASHLINE

SLASHLINE

"//\n"

"//\n"

"// Filename:\tlaststage.v\n"

"// Filename:\tlaststage.v\n"

"//\n"

"//\n"

"// Project:    %s\n"

"// Project:    %s\n"

"//\n"

"//\n"

"// Purpose:    This is part of an FPGA implementation that will process\n"

"// Purpose:    This is part of an FPGA implementation that will process\n"

"//             the final stage of a decimate-in-frequency FFT, running\n"

"//             the final stage of a decimate-in-frequency FFT, running\n"

"//     through the data at one sample per clock.\n"

"//     through the data at one sample per clock.\n"

"//\n"

"//\n"

"//\n%s"

"//\n%s"

"//\n", prjname, creator);

"//\n", prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");

        fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n");

        fprintf(fp,

        fprintf(fp,

"module laststage(i_clk, %s, i_ce, i_sync, i_val, o_val, o_sync);\n"

"module laststage(i_clk, %s, i_ce, i_sync, i_val, o_val, o_sync);\n"

"       parameter       IWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"

"       parameter       IWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"

"       input                                   i_clk, %s, i_ce, i_sync;\n"

"       input   wire                            i_clk, %s, i_ce, i_sync;\n"

"       input           [(2*IWIDTH-1):0]        i_val;\n"

"       input   wire    [(2*IWIDTH-1):0]        i_val;\n"

"       output  wire    [(2*OWIDTH-1):0]        o_val;\n"

"       output  wire    [(2*OWIDTH-1):0]        o_val;\n"

"       output  reg                             o_sync;\n\n",

"       output  reg                             o_sync;\n\n",

                resetw.c_str(), resetw.c_str());

                resetw.c_str(), resetw.c_str());

        fprintf(fp,

        fprintf(fp,

"       reg     signed  [(IWIDTH-1):0]  m_r, m_i;\n"

"       reg     signed  [(IWIDTH-1):0]  m_r, m_i;\n"

"       wire    signed  [(IWIDTH-1):0]  i_r, i_i;\n"

"       wire    signed  [(IWIDTH-1):0]  i_r, i_i;\n"

"\n"

"\n"

"       assign  i_r = i_val[(2*IWIDTH-1):(IWIDTH)]; \n"

"       assign  i_r = i_val[(2*IWIDTH-1):(IWIDTH)]; \n"

"       assign  i_i = i_val[(IWIDTH-1):0]; \n"

"       assign  i_i = i_val[(IWIDTH-1):0]; \n"

"\n"

"\n"

"       // Don't forget that we accumulate a bit by adding two values\n"

"       // Don't forget that we accumulate a bit by adding two values\n"

"       // together. Therefore our intermediate value must have one more\n"

"       // together. Therefore our intermediate value must have one more\n"

"       // bit than the two originals.\n"

"       // bit than the two originals.\n"

"       reg     signed  [(IWIDTH):0]    rnd_r, rnd_i, sto_r, sto_i;\n"

"       reg     signed  [(IWIDTH):0]    rnd_r, rnd_i, sto_r, sto_i;\n"

"       reg                             wait_for_sync, stage;\n"

"       reg                             wait_for_sync, stage;\n"

"       reg             [1:0]           sync_pipe;\n"

"       reg             [1:0]           sync_pipe;\n"

"\n"

"\n"

"       initial wait_for_sync = 1'b1;\n"

"       initial wait_for_sync = 1'b1;\n"

"       initial stage         = 1'b0;\n");

"       initial stage         = 1'b0;\n");

        if (async_reset)

        if (async_reset)

                fprintf(fp, "\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");

                fprintf(fp, "\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");

        else

        else

                fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_reset)\n");

                fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_reset)\n");

        fprintf(fp,

        fprintf(fp,

"               begin\n"

"               begin\n"

"                       wait_for_sync <= 1'b1;\n"

"                       wait_for_sync <= 1'b1;\n"

"                       stage         <= 1'b0;\n"

"                       stage         <= 1'b0;\n"

"               end else if ((i_ce)&&((!wait_for_sync)||(i_sync))&&(!stage))\n"

"               end else if ((i_ce)&&((!wait_for_sync)||(i_sync))&&(!stage))\n"

"               begin\n"

"               begin\n"

"                       wait_for_sync <= 1'b0;\n"

"                       wait_for_sync <= 1'b0;\n"

"                       //\n"

"                       //\n"

"                       stage <= 1'b1;\n"

"                       stage <= 1'b1;\n"

"                       //\n"

"                       //\n"

"               end else if (i_ce)\n"

"               end else if (i_ce)\n"

"                       stage <= 1'b0;\n\n");

"                       stage <= 1'b0;\n\n");

        fprintf(fp, "\tinitial\tsync_pipe = 0;\n");

        fprintf(fp, "\tinitial\tsync_pipe = 0;\n");

        if (async_reset)

        if (async_reset)

                fprintf(fp,

                fprintf(fp,

                "\talways @(posedge i_clk, negedge i_areset_n)\n"

                "\talways @(posedge i_clk, negedge i_areset_n)\n"

                "\tif (!i_areset_n)\n");

                "\tif (!i_areset_n)\n");

        else

        else

                fprintf(fp,

                fprintf(fp,

                "\talways @(posedge i_clk)\n"

                "\talways @(posedge i_clk)\n"

                "\tif (i_reset)\n");

                "\tif (i_reset)\n");

        fprintf(fp,

        fprintf(fp,

                "\t\tsync_pipe <= 0;\n"

                "\t\tsync_pipe <= 0;\n"

                "\telse if (i_ce)\n"

                "\telse if (i_ce)\n"

                "\t\tsync_pipe <= { sync_pipe[0], i_sync };\n\n");

                "\t\tsync_pipe <= { sync_pipe[0], i_sync };\n\n");

        fprintf(fp, "\tinitial\to_sync = 1\'b0;\n");

        fprintf(fp, "\tinitial\to_sync = 1\'b0;\n");

        if (async_reset)

        if (async_reset)

                fprintf(fp,

                fprintf(fp,

                "\talways @(posedge i_clk, negedge i_areset_n)\n"

                "\talways @(posedge i_clk, negedge i_areset_n)\n"

                "\tif (!i_areset_n)\n");

                "\tif (!i_areset_n)\n");

        else

        else

                fprintf(fp,

                fprintf(fp,

                "\talways @(posedge i_clk)\n"

                "\talways @(posedge i_clk)\n"

                "\tif (i_reset)\n");

                "\tif (i_reset)\n");

        fprintf(fp,

        fprintf(fp,

                "\t\to_sync <= 1\'b0;\n"

                "\t\to_sync <= 1\'b0;\n"

                "\telse if (i_ce)\n"

                "\telse if (i_ce)\n"

                "\t\to_sync <= sync_pipe[1];\n\n");

                "\t\to_sync <= sync_pipe[1];\n\n");

        fprintf(fp,

        fprintf(fp,

"       always @(posedge i_clk)\n"

"       always @(posedge i_clk)\n"

"       if (i_ce)\n"

"       if (i_ce)\n"

"       begin\n"

"       begin\n"

"               if (!stage)\n"

"               if (!stage)\n"

"               begin\n"

"               begin\n"

"                       // Clock 1\n"

"                       // Clock 1\n"

"                       m_r <= i_r;\n"

"                       m_r <= i_r;\n"

"                       m_i <= i_i;\n"

"                       m_i <= i_i;\n"

"                       // Clock 3\n"

"                       // Clock 3\n"

"                       rnd_r <= sto_r;\n"

"                       rnd_r <= sto_r;\n"

"                       rnd_i <= sto_i;\n"

"                       rnd_i <= sto_i;\n"

"                       //\n"

"                       //\n"

"               end else begin\n"

"               end else begin\n"

"                       // Clock 2\n"

"                       // Clock 2\n"

"                       rnd_r <= m_r + i_r;\n"

"                       rnd_r <= m_r + i_r;\n"

"                       rnd_i <= m_i + i_i;\n"

"                       rnd_i <= m_i + i_i;\n"

"                       //\n"

"                       //\n"

"                       sto_r <= m_r - i_r;\n"

"                       sto_r <= m_r - i_r;\n"

"                       sto_i <= m_i - i_i;\n"

"                       sto_i <= m_i - i_i;\n"

"                       //\n"

"                       //\n"

"               end\n"

"               end\n"

"       end\n"

"       end\n"

"\n"

"\n"

"       // Now that we have our results, let's round them and report them\n"

"       // Now that we have our results, let's round them and report them\n"

"       wire    signed  [(OWIDTH-1):0]  o_r, o_i;\n"

"       wire    signed  [(OWIDTH-1):0]  o_r, o_i;\n"

"\n"

"\n"

"       convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_r(i_clk, i_ce, rnd_r, o_r);\n"

"       convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_r(i_clk, i_ce, rnd_r, o_r);\n"

"       convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_i(i_clk, i_ce, rnd_i, o_i);\n"

"       convround #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_i(i_clk, i_ce, rnd_i, o_i);\n"

"\n"

"\n"

"       assign  o_val  = { o_r, o_i };\n"

"       assign  o_val  = { o_r, o_i };\n"

"\n");

"\n");

        if (formal_property_flag) {

        if (formal_property_flag) {

                fprintf(fp,

                fprintf(fp,

        "`ifdef FORMAL\n"

        "`ifdef FORMAL\n"

                "\treg  f_past_valid;\n"

                "\treg  f_past_valid;\n"

                "\tinitial      f_past_valid = 1'b0;\n"

                "\tinitial      f_past_valid = 1'b0;\n"

                "\talways @(posedge i_clk)\n"

                "\talways @(posedge i_clk)\n"

                "\t     f_past_valid <= 1'b1;\n"

                "\t     f_past_valid <= 1'b1;\n"

        "\n"

        "\n"

        "`ifdef LASTSTAGE\n"

        "`ifdef LASTSTAGE\n"

                "\talways @(posedge i_clk)\n"

                "\talways @(posedge i_clk)\n"

                "\t     assume((i_ce)||($past(i_ce))||($past(i_ce,2)));\n"

                "\t     assume((i_ce)||($past(i_ce))||($past(i_ce,2)));\n"

        "`endif\n"

        "`endif\n"

        "\n"

        "\n"

                "\tinitial      assert(IWIDTH+1 == OWIDTH);\n"

                "\tinitial      assert(IWIDTH+1 == OWIDTH);\n"

        "\n"

        "\n"

                "\treg  signed  [IWIDTH-1:0]    f_piped_real    [0:3];\n"

                "\treg  signed  [IWIDTH-1:0]    f_piped_real    [0:3];\n"

                "\treg  signed  [IWIDTH-1:0]    f_piped_imag    [0:3];\n"

                "\treg  signed  [IWIDTH-1:0]    f_piped_imag    [0:3];\n"

                "\talways @(posedge i_clk)\n"

                "\talways @(posedge i_clk)\n"

                "\tif (i_ce)\n"

                "\tif (i_ce)\n"

                "\tbegin\n"

                "\tbegin\n"

                "\t     f_piped_real[0] <= i_val[2*IWIDTH-1:IWIDTH];\n"

                "\t     f_piped_real[0] <= i_val[2*IWIDTH-1:IWIDTH];\n"

                "\t     f_piped_imag[0] <= i_val[  IWIDTH-1:0];\n"

                "\t     f_piped_imag[0] <= i_val[  IWIDTH-1:0];\n"

        "\n"

        "\n"

                "\t     f_piped_real[1] <= f_piped_real[0];\n"

                "\t     f_piped_real[1] <= f_piped_real[0];\n"

                "\t     f_piped_imag[1] <= f_piped_imag[0];\n"

                "\t     f_piped_imag[1] <= f_piped_imag[0];\n"

        "\n"

        "\n"

                "\t     f_piped_real[2] <= f_piped_real[1];\n"

                "\t     f_piped_real[2] <= f_piped_real[1];\n"

                "\t     f_piped_imag[2] <= f_piped_imag[1];\n"

                "\t     f_piped_imag[2] <= f_piped_imag[1];\n"

        "\n"

        "\n"

                "\t     f_piped_real[3] <= f_piped_real[2];\n"

                "\t     f_piped_real[3] <= f_piped_real[2];\n"

                "\t     f_piped_imag[3] <= f_piped_imag[2];\n"

                "\t     f_piped_imag[3] <= f_piped_imag[2];\n"

                "\tend\n"

                "\tend\n"

        "\n"

        "\n"

                "\twire f_syncd;\n"

                "\twire f_syncd;\n"

                "\treg  f_rsyncd;\n"

                "\treg  f_rsyncd;\n"

        "\n"

        "\n"

                "\tinitial      f_rsyncd        = 0;\n"

                "\tinitial      f_rsyncd        = 0;\n"

                "\talways @(posedge i_clk)\n"

                "\talways @(posedge i_clk)\n"

                "\tif (i_reset)\n"

                "\tif (i_reset)\n"

                "\t     f_rsyncd <= 1'b0;\n"

                "\t     f_rsyncd <= 1'b0;\n"

                "\telse if (!f_rsyncd)\n"

                "\telse if (!f_rsyncd)\n"

                "\t     f_rsyncd <= o_sync;\n"

                "\t     f_rsyncd <= o_sync;\n"

                "\tassign       f_syncd = (f_rsyncd)||(o_sync);\n"

                "\tassign       f_syncd = (f_rsyncd)||(o_sync);\n"

        "\n"

        "\n"

                "\treg  f_state;\n"

                "\treg  f_state;\n"

                "\tinitial      f_state = 0;\n"

                "\tinitial      f_state = 0;\n"

                "\talways @(posedge i_clk)\n"

                "\talways @(posedge i_clk)\n"

                "\tif (i_reset)\n"

                "\tif (i_reset)\n"

                "\t     f_state <= 0;\n"

                "\t     f_state <= 0;\n"

                "\telse if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"

                "\telse if ((i_ce)&&((!wait_for_sync)||(i_sync)))\n"

                "\t     f_state <= f_state + 1;\n"

                "\t     f_state <= f_state + 1;\n"

        "\n"

        "\n"

                "\talways @(*)\n"

                "\talways @(*)\n"

                "\tif (f_state != 0)\n"

                "\tif (f_state != 0)\n"

                "\t     assume(!i_sync);\n"

                "\t     assume(!i_sync);\n"

        "\n"

        "\n"

                "\talways @(*)\n"

                "\talways @(*)\n"

                "\t     assert(stage == f_state[0]);\n"

                "\t     assert(stage == f_state[0]);\n"

        "\n"

        "\n"

                "\talways @(posedge i_clk)\n"

                "\talways @(posedge i_clk)\n"

                "\tif ((f_state == 1'b1)&&(f_syncd))\n"

                "\tif ((f_state == 1'b1)&&(f_syncd))\n"

                "\tbegin\n"

                "\tbegin\n"

                "\t     assert(o_r == f_piped_real[2] + f_piped_real[1]);\n"

                "\t     assert(o_r == f_piped_real[2] + f_piped_real[1]);\n"

                "\t     assert(o_i == f_piped_imag[2] + f_piped_imag[1]);\n"

                "\t     assert(o_i == f_piped_imag[2] + f_piped_imag[1]);\n"

                "\tend\n"

                "\tend\n"

        "\n"

        "\n"

                "\talways @(posedge i_clk)\n"

                "\talways @(posedge i_clk)\n"

                "\tif ((f_state == 1'b0)&&(f_syncd))\n"

                "\tif ((f_state == 1'b0)&&(f_syncd))\n"

                "\tbegin\n"

                "\tbegin\n"

                "\t     assert(!o_sync);\n"

                "\t     assert(!o_sync);\n"

                "\t     assert(o_r == f_piped_real[3] - f_piped_real[2]);\n"

                "\t     assert(o_r == f_piped_real[3] - f_piped_real[2]);\n"

                "\t     assert(o_i == f_piped_imag[3] - f_piped_imag[2]);\n"

                "\t     assert(o_i == f_piped_imag[3] - f_piped_imag[2]);\n"

                "\tend\n"

                "\tend\n"

        "\n"

        "\n"

                "\talways @(*)\n"

                "\talways @(*)\n"

                "\tif (wait_for_sync)\n"

                "\tif (wait_for_sync)\n"

                "\tbegin\n"

                "\tbegin\n"

                "\t     assert(!f_rsyncd);\n"

                "\t     assert(!f_rsyncd);\n"

                "\t     assert(!o_sync);\n"

                "\t     assert(!o_sync);\n"

                "\t     assert(f_state == 0);\n"

                "\t     assert(f_state == 0);\n"

                "\tend\n\n");

                "\tend\n\n");

        fprintf(fp,

        fprintf(fp,

"`endif // FORMAL\n"

"`endif // FORMAL\n"

"endmodule\n");

"endmodule\n");

        fclose(fp);

        fclose(fp);

void    usage(void) {

void    usage(void) {

        fprintf(stderr,

        fprintf(stderr,

"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s]\n"

"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s]\n"

// "\tfftgen -i\n"

// "\tfftgen -i\n"

"\t-1\tBuild a normal FFT, running at one clock per complex sample, or\n"

"\t-1\tBuild a normal FFT, running at one clock per complex sample, or\n"

"\t\t(for a real FFT) at one clock per two real input samples.\n"

"\t\t(for a real FFT) at one clock per two real input samples.\n"

"\t-a <hdrname>  Create a header of information describing the built-in\n"

"\t-a <hdrname>  Create a header of information describing the built-in\n"

"\t\tparameters, useful for module-level testing with Verilator\n"

"\t\tparameters, useful for module-level testing with Verilator\n"

"\t-c <cbits>\tCauses all internal complex coefficients to be\n"

"\t-c <cbits>\tCauses all internal complex coefficients to be\n"

"\t\tlonger than the corresponding data bits, to help avoid\n"

"\t\tlonger than the corresponding data bits, to help avoid\n"

"\t\tcoefficient truncation errors.  The default is %d bits longer\n"

"\t\tcoefficient truncation errors.  The default is %d bits longer\n"

"\t\tthan the data bits.\n"

"\t\tthan the data bits.\n"

"\t-d <dir>  Places all of the generated verilog files into <dir>.\n"

"\t-d <dir>  Places all of the generated verilog files into <dir>.\n"

"\t\tThe default is a subdirectory of the current directory\n"

"\t\tThe default is a subdirectory of the current directory\n"

"\t\tnamed %s.\n"

"\t\tnamed %s.\n"

"\t-f <size>  Sets the size of the FFT as the number of complex\n"

"\t-f <size>  Sets the size of the FFT as the number of complex\n"

"\t\tsamples input to the transform.  (No default value, this is\n"

"\t\tsamples input to the transform.  (No default value, this is\n"

"\t\ta required parameter.)\n"

"\t\ta required parameter.)\n"

"\t-i\tAn inverse FFT, meaning that the coefficients are\n"

"\t-i\tAn inverse FFT, meaning that the coefficients are\n"

"\t\tgiven by e^{ j 2 pi k/N n }.  The default is a forward FFT, with\n"

"\t\tgiven by e^{ j 2 pi k/N n }.  The default is a forward FFT, with\n"

"\t\tcoefficients given by e^{ -j 2 pi k/N n }.\n"

"\t\tcoefficients given by e^{ -j 2 pi k/N n }.\n"

"\t-k #\tSets # clocks per sample, used to minimize multiplies.  Also\n"

"\t-k #\tSets # clocks per sample, used to minimize multiplies.  Also\n"

"\t\tsets one sample in per i_ce clock (opt -1)\n"

"\t\tsets one sample in per i_ce clock (opt -1)\n"

"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"

"\t-m <mxbits>\tSets the maximum bit width that the FFT should ever\n"

"\t\tproduce.  Internal values greater than this value will be\n"

"\t\tproduce.  Internal values greater than this value will be\n"

"\t\ttruncated to this value.  (The default value grows the input\n"

"\t\ttruncated to this value.  (The default value grows the input\n"

"\t\tsize by one bit for every two FFT stages.)\n"

"\t\tsize by one bit for every two FFT stages.)\n"

"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"

"\t-n <nbits>\tSets the bitwidth for values coming into the (i)FFT.\n"

"\t\tThe default is %d bits input for each component of the two\n"

"\t\tThe default is %d bits input for each component of the two\n"

"\t\tcomplex values into the FFT.\n"

"\t\tcomplex values into the FFT.\n"

"\t-p <nmpy>  Sets the number of hardware multiplies (DSPs) to use, versus\n"

"\t-p <nmpy>  Sets the number of hardware multiplies (DSPs) to use, versus\n"

"\t\tshift-add emulation.  The default is not to use any hardware\n"

"\t\tshift-add emulation.  The default is not to use any hardware\n"

"\t\tmultipliers.\n"

"\t\tmultipliers.\n"

"\t-r\tBuild a real-FFT at four input points per sample, rather than a\n"

"\t-r\tBuild a real-FFT at four input points per sample, rather than a\n"

"\t\tcomplex FFT.  (Default is a Complex FFT.)\n"

"\t\tcomplex FFT.  (Default is a Complex FFT.)\n"

"\t\tThis option is a place-holder.  The real-FFT has not (yet) been\n"

"\t\timplemented.\n"

"\t-s\tSkip the final bit reversal stage.  This is useful in\n"

"\t-s\tSkip the final bit reversal stage.  This is useful in\n"

"\t\talgorithms that need to apply a filter without needing to do\n"

"\t\talgorithms that need to apply a filter without needing to do\n"

"\t\tbin shifting, as these algorithms can, with this option, just\n"

"\t\tbin shifting, as these algorithms can, with this option, just\n"

"\t\tmultiply by a bit reversed correlation sequence and then\n"

"\t\tmultiply by a bit reversed correlation sequence and then\n"

"\t\tinverse FFT the (still bit reversed) result.  (You would need\n"

"\t\tinverse FFT the (still bit reversed) result.  (You would need\n"

"\t\ta decimation in time inverse to do this, which this program does\n"

"\t\ta decimation in time inverse to do this, which this program does\n"

"\t\tnot yet provide.)\n"

"\t\tnot yet provide.)\n"

"\t-S\tInclude the final bit reversal stage (default).\n"

"\t-S\tInclude the final bit reversal stage (default).\n"

"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"

"\t-x <xtrabits>\tUse this many extra bits internally, before any final\n"

"\t\trounding or truncation of the answer to the final number of\n"

"\t\trounding or truncation of the answer to the final number of\n"

"\t\tbits.  The default is to use %d extra bits internally.\n",

"\t\tbits.  The default is to use %d extra bits internally.\n",

/*

/*

"\t-0\tA forward FFT (default), meaning that the coefficients are\n"

"\t-0\tA forward FFT (default), meaning that the coefficients are\n"

"\t\tgiven by e^{-j 2 pi k/N n }.\n"

"\t\tgiven by e^{-j 2 pi k/N n }.\n"

"\t-1\tAn inverse FFT, meaning that the coefficients are\n"

"\t-1\tAn inverse FFT, meaning that the coefficients are\n"

"\t\tgiven by e^{ j 2 pi k/N n }.\n",

"\t\tgiven by e^{ j 2 pi k/N n }.\n",

*/

*/

        DEF_XTRACBITS, DEF_COREDIR, DEF_NBITSIN, DEF_XTRAPBITS);

        DEF_XTRACBITS, DEF_COREDIR, DEF_NBITSIN, DEF_XTRAPBITS);

// Features still needed:

// Features still needed:

//      Interactivity.

//      Interactivity.

int main(int argc, char **argv) {

int main(int argc, char **argv) {

        int     fftsize = -1, lgsize = -1;

        int     fftsize = -1, lgsize = -1;

        int     nbitsin = DEF_NBITSIN, xtracbits = DEF_XTRACBITS,

        int     nbitsin = DEF_NBITSIN, xtracbits = DEF_XTRACBITS,

                        nummpy=DEF_NMPY, nmpypstage=6, mpy_stages;

                        nummpy=DEF_NMPY, nmpypstage=6, mpy_stages;

        int     nbitsout, maxbitsout = -1, xtrapbits=DEF_XTRAPBITS, ckpce = 0;

        int     nbitsout, maxbitsout = -1, xtrapbits=DEF_XTRAPBITS, ckpce = 0;

        const char *EMPTYSTR = "";

        const char *EMPTYSTR = "";

        bool    bitreverse = true, inverse=false,

        bool    bitreverse = true, inverse=false,

                verbose_flag = false,

                verbose_flag = false,

                single_clock = false,

                single_clock = true,

                real_fft = false,

                real_fft = false,

                async_reset = false;

                async_reset = false;

        FILE    *vmain;

        FILE    *vmain;

        std::string     coredir = DEF_COREDIR, cmdline = "", hdrname = "";

        std::string     coredir = DEF_COREDIR, cmdline = "", hdrname = "";

        ROUND_T rounding = RND_CONVERGENT;

        ROUND_T rounding = RND_CONVERGENT;

        // ROUND_T      rounding = RND_HALFUP;

        // ROUND_T      rounding = RND_HALFUP;

        bool    dbg = false;

        bool    dbg = false;

        int     dbgstage = 128;

        int     dbgstage = 128;

        if (argc <= 1)

        if (argc <= 1)

                usage();

                usage();

        // Copy the original command line before we mess with it

        // Copy the original command line before we mess with it

        cmdline = argv[0];

        cmdline = argv[0];

        for(int argn=1; argn<argc; argn++) {

        for(int argn=1; argn<argc; argn++) {

                cmdline += " ";

                cmdline += " ";

                cmdline += argv[argn];

                cmdline += argv[argn];

        { int c;

        { int c;

        while((c = getopt(argc, argv, "12Aa:c:d:D:f:hik:m:n:p:rsSx:v")) != -1) {

        while((c = getopt(argc, argv, "12Aa:c:d:D:f:hik:m:n:p:rsSx:v")) != -1) {

                switch(c) {

                switch(c) {

                case '1':       single_clock = true;  break;

                case '1':       single_clock = true;  break;

                case '2':       single_clock = false; break;

                case '2':       single_clock = false; break;

                case 'A':       async_reset  = true;  break;

                case 'A':       async_reset  = true;  break;

                case 'a':       hdrname = strdup(optarg);       break;

                case 'a':       hdrname = strdup(optarg);       break;

                case 'c':       xtracbits = atoi(optarg);       break;

                case 'c':       xtracbits = atoi(optarg);       break;

                case 'd':       coredir = std::string(optarg);  break;

                case 'd':       coredir = std::string(optarg);  break;

                case 'D':       dbgstage = atoi(optarg);        break;

                case 'D':       dbgstage = atoi(optarg);        break;

                case 'f':       fftsize = atoi(optarg);

                case 'f':       fftsize = atoi(optarg);

                                { int sln = strlen(optarg);

                                { int sln = strlen(optarg);

                                if (!isdigit(optarg[sln-1])){

                                if (!isdigit(optarg[sln-1])){

                                        switch(optarg[sln-1]) {

                                        switch(optarg[sln-1]) {

                                        case 'k': case 'K':

                                        case 'k': case 'K':

                                                fftsize <<= 10;

                                                fftsize <<= 10;

                                                break;

                                                break;

                                        case 'm': case 'M':

                                        case 'm': case 'M':

                                                fftsize <<= 20;

                                                fftsize <<= 20;

                                                break;

                                                break;

                                        case 'g': case 'G':

                                        case 'g': case 'G':

                                                fftsize <<= 30;

                                                fftsize <<= 30;

                                                break;

                                                break;

                                        default:

                                        default:

                                                printf("ERR: Unknown FFT size, %s!\n", optarg);

                                                printf("ERR: Unknown FFT size, %s!\n", optarg);

                                                exit(EXIT_FAILURE);

                                                exit(EXIT_FAILURE);

                                }} break;

                                }} break;

                case 'h':       usage(); exit(EXIT_SUCCESS);    break;

                case 'h':       usage(); exit(EXIT_SUCCESS);    break;

                case 'i':       inverse = true;                 break;

                case 'i':       inverse = true;                 break;

                case 'k':       ckpce = atoi(optarg);

                case 'k':       ckpce = atoi(optarg);

                                single_clock = true;

                                single_clock = true;

                                break;

                                break;

                case 'm':       maxbitsout = atoi(optarg);      break;

                case 'm':       maxbitsout = atoi(optarg);      break;

                case 'n':       nbitsin = atoi(optarg);         break;

                case 'n':       nbitsin = atoi(optarg);         break;

                case 'p':       nummpy = atoi(optarg);          break;

                case 'p':       nummpy = atoi(optarg);          break;

                case 'r':       real_fft = true;                break;

                case 'r':       real_fft = true;                break;

                case 'S':       bitreverse = true;              break;

                case 'S':       bitreverse = true;              break;

                case 's':       bitreverse = false;             break;

                case 's':       bitreverse = false;             break;

                case 'x':       xtrapbits = atoi(optarg);       break;

                case 'x':       xtrapbits = atoi(optarg);       break;

                case 'v':       verbose_flag = true;            break;

                case 'v':       verbose_flag = true;            break;

                // case 'z':    variable_size = true;           break;

                // case 'z':    variable_size = true;           break;

                default:

                default:

                        printf("Unknown argument, -%c\n", c);

                        printf("Unknown argument, -%c\n", c);

                        usage();

                        usage();

                        exit(EXIT_FAILURE);

                        exit(EXIT_FAILURE);

}}

}}

        if (verbose_flag) {

        if (verbose_flag) {

                if (inverse)

                if (inverse)

                        printf("Building a %d point inverse FFT module, with %s outputs\n",

                        printf("Building a %d point inverse FFT module, with %s outputs\n",

                                fftsize,

                                fftsize,

                                (real_fft)?"real ":"complex");

                                (real_fft)?"real ":"complex");

                else

                else

                        printf("Building a %d point %sforward FFT module\n",

                        printf("Building a %d point %sforward FFT module\n",

                                fftsize,

                                fftsize,

                                (real_fft)?"real ":"");

                                (real_fft)?"real ":"");

                if (!single_clock)

                if (!single_clock)

                        printf("  that accepts two inputs per clock\n");

                        printf("  that accepts two inputs per clock\n");

                if (async_reset)

                if (async_reset)

                        printf("  using a negative logic ASYNC reset\n");

                        printf("  using a negative logic ASYNC reset\n");

                printf("The core will be placed into the %s/ directory\n", coredir.c_str());

                printf("The core will be placed into the %s/ directory\n", coredir.c_str());

                if (hdrname[0])

                if (hdrname[0])

                        printf("A C header file, %s, will be written capturing these\n"

                        printf("A C header file, %s, will be written capturing these\n"

                                "options for a Verilator testbench\n",

                                "options for a Verilator testbench\n",

                                        hdrname.c_str());

                                        hdrname.c_str());

                // nummpy

                // nummpy

                // xtrapbits

                // xtrapbits

        if (real_fft) {

        if (real_fft) {

                printf("The real FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");

                printf("The real FFT option is not implemented yet, but still on\nmy to do list.  Please try again later.\n");

                exit(EXIT_FAILURE);

                exit(EXIT_FAILURE);

        if (ckpce < 1)

        if (ckpce < 1)

                ckpce = 1;

                ckpce = 1;

        if (!bitreverse) {

        if (!bitreverse) {

                printf("WARNING: While I can skip the bit reverse stage, the code to do\n");

                printf("WARNING: While I can skip the bit reverse stage, the code to do\n");

                printf("an inverse FFT on a bit--reversed input has not yet been\n");

                printf("an inverse FFT on a bit--reversed input has not yet been\n");

                printf("built.\n");

                printf("built.\n");

        if ((lgsize < 0)&&(fftsize > 1)) {

        if ((lgsize < 0)&&(fftsize > 1)) {

                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)

                for(lgsize=1; (1<<lgsize) < fftsize; lgsize++)

        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {

        if ((fftsize <= 0)||(nbitsin < 1)||(nbitsin>48)) {

                printf("INVALID PARAMETERS!!!!\n");

                printf("INVALID PARAMETERS!!!!\n");

                exit(EXIT_FAILURE);

                exit(EXIT_FAILURE);

        if (nextlg(fftsize) != fftsize) {

        if (nextlg(fftsize) != fftsize) {

                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",

                fprintf(stderr, "ERR: FFTSize (%d) *must* be a power of two\n",

                                fftsize);

                                fftsize);

                exit(EXIT_FAILURE);

                exit(EXIT_FAILURE);

        } else if (fftsize < 2) {

        } else if (fftsize < 2) {

                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",

                fprintf(stderr, "ERR: Minimum FFTSize is 2, not %d\n",

                                fftsize);

                                fftsize);

                if (fftsize == 1) {

                if (fftsize == 1) {

                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");

                        fprintf(stderr, "You do realize that a 1 point FFT makes very little sense\n");

                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");

                        fprintf(stderr, "in an FFT operation that handles two samples per clock?\n");

                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");

                        fprintf(stderr, "If you really need to do an FFT of this size, the output\n");

                        fprintf(stderr, "can be connected straight to the input.\n");

                        fprintf(stderr, "can be connected straight to the input.\n");

                } else {

                } else {

                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);

                        fprintf(stderr, "Indeed, a size of %d doesn\'t make much sense to me at all.\n", fftsize);

                        fprintf(stderr, "Is such an operation even defined?\n");

                        fprintf(stderr, "Is such an operation even defined?\n");

                exit(EXIT_FAILURE);

                exit(EXIT_FAILURE);

        // Calculate how many output bits we'll have, and what the log

        // Calculate how many output bits we'll have, and what the log

        // based two size of our FFT is.

        // based two size of our FFT is.

                int     tmp_size = fftsize;

                int     tmp_size = fftsize;

                // The first stage always accumulates one bit, regardless

                // The first stage always accumulates one bit, regardless

                // of whether you need to or not.

                // of whether you need to or not.

                nbitsout = nbitsin + 1;

                nbitsout = nbitsin + 1;

                tmp_size >>= 1;

                tmp_size >>= 1;

                while(tmp_size > 4) {

                while(tmp_size > 4) {

                        nbitsout += 1;

                        nbitsout += 1;

                        tmp_size >>= 2;

                        tmp_size >>= 2;

                if (tmp_size > 1)

                if (tmp_size > 1)

                        nbitsout ++;

                        nbitsout ++;

                if (fftsize <= 2)

                if (fftsize <= 2)

                        bitreverse = false;

                        bitreverse = false;

        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))

        } if ((maxbitsout > 0)&&(nbitsout > maxbitsout))

                nbitsout = maxbitsout;

                nbitsout = maxbitsout;

        if (verbose_flag) {

        if (verbose_flag) {

                printf("Output samples will be %d bits wide\n", nbitsout);

                printf("Output samples will be %d bits wide\n", nbitsout);

                printf("This %sFFT will take %d-bit samples in, and produce %d samples out\n", (inverse)?"i":"", nbitsin, nbitsout);

                printf("This %sFFT will take %d-bit samples in, and produce %d samples out\n", (inverse)?"i":"", nbitsin, nbitsout);

                if (maxbitsout > 0)

                if (maxbitsout > 0)

                        printf("  Internally, it will allow items to accumulate to %d bits\n", maxbitsout);

                        printf("  Internally, it will allow items to accumulate to %d bits\n", maxbitsout);

                printf("  Twiddle-factors of %d bits will be used\n",

                printf("  Twiddle-factors of %d bits will be used\n",

                        nbitsin+xtracbits);

                        nbitsin+xtracbits);

                if (!bitreverse)

                if (!bitreverse)

                printf("  The output will be left in bit-reversed order\n");

                printf("  The output will be left in bit-reversed order\n");

        // Figure out how many multiply stages to use, and how many to skip

        // Figure out how many multiply stages to use, and how many to skip

        if (!single_clock) {

        if (!single_clock) {

                nmpypstage = 6;

                nmpypstage = 6;

        } else if (ckpce <= 1) {

        } else if (ckpce <= 1) {

                nmpypstage = 3;

                nmpypstage = 3;

        } else if (ckpce == 2) {

        } else if (ckpce == 2) {

                nmpypstage = 2;

                nmpypstage = 2;

        } else

        } else

                nmpypstage = 1;

                nmpypstage = 1;

        mpy_stages = nummpy / nmpypstage;

        mpy_stages = nummpy / nmpypstage;

        if (mpy_stages > lgval(fftsize)-2)

        if (mpy_stages > lgval(fftsize)-2)

                mpy_stages = lgval(fftsize)-2;

                mpy_stages = lgval(fftsize)-2;

                struct stat     sbuf;

                struct stat     sbuf;

                if (lstat(coredir.c_str(), &sbuf)==0) {

                if (lstat(coredir.c_str(), &sbuf)==0) {

                        if (!S_ISDIR(sbuf.st_mode)) {

                        if (!S_ISDIR(sbuf.st_mode)) {

                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());

                                fprintf(stderr, "\'%s\' already exists, and is not a directory!\n", coredir.c_str());

                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");

                                fprintf(stderr, "I will stop now, lest I overwrite something you care about.\n");

                                fprintf(stderr, "To try again, please remove this file.\n");

                                fprintf(stderr, "To try again, please remove this file.\n");

                                exit(EXIT_FAILURE);

                                exit(EXIT_FAILURE);

                } else

                } else

                        mkdir(coredir.c_str(), 0755);

                        mkdir(coredir.c_str(), 0755);

                if (access(coredir.c_str(), X_OK|W_OK) != 0) {

                if (access(coredir.c_str(), X_OK|W_OK) != 0) {

                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());

                        fprintf(stderr, "I have no access to the directory \'%s\'.\n", coredir.c_str());

                        exit(EXIT_FAILURE);

                        exit(EXIT_FAILURE);

        if (hdrname.length() > 0) {

        if (hdrname.length() > 0) {

                FILE    *hdr = fopen(hdrname.c_str(), "w");

                FILE    *hdr = fopen(hdrname.c_str(), "w");

                if (hdr == NULL) {

                if (hdr == NULL) {

                        fprintf(stderr, "ERROR: Cannot open %s to create header file\n", hdrname.c_str());

                        fprintf(stderr, "ERROR: Cannot open %s to create header file\n", hdrname.c_str());

                        perror("O/S Err:");

                        perror("O/S Err:");

                        exit(EXIT_FAILURE);

                        exit(EXIT_FAILURE);

                fprintf(hdr,

                fprintf(hdr,

SLASHLINE

SLASHLINE

"//\n"

"//\n"

"// Filename:\t%s\n"

"// Filename:\t%s\n"

"//\n"

"//\n"

"// Project:\t%s\n"

"// Project:\t%s\n"

"//\n"

"//\n"

"// Purpose:    This simple header file captures the internal constants\n"

"// Purpose:    This simple header file captures the internal constants\n"

"//             within the FFT that were used to build it, for the purpose\n"

"//             within the FFT that were used to build it, for the purpose\n"

"//     of making C++ integration (and test bench testing) simpler.  That is,\n"

"//     of making C++ integration (and test bench testing) simpler.  That is,\n"

"//     should the FFT change size, this will note that size change and thus\n"

"//     should the FFT change size, this will note that size change and thus\n"

"//     any test bench or other C++ program dependent upon either the size of\n"

"//     any test bench or other C++ program dependent upon either the size of\n"

"//     the FFT, the number of bits in or out of it, etc., can pick up the\n"

"//     the FFT, the number of bits in or out of it, etc., can pick up the\n"

"//     changes in the defines found within this file.\n"

"//     changes in the defines found within this file.\n"

"//\n",

"//\n",

                hdrname.c_str(), prjname);

                hdrname.c_str(), prjname);

                fprintf(hdr, "%s", creator);

                fprintf(hdr, "%s", creator);

                fprintf(hdr, "//\n");

                fprintf(hdr, "//\n");

                fprintf(hdr, "%s", cpyleft);

                fprintf(hdr, "%s", cpyleft);

                fprintf(hdr, "//\n"

                fprintf(hdr, "//\n"

                "//\n"

                "//\n"

                "#ifndef %sFFTHDR_H\n"

                "#ifndef %sFFTHDR_H\n"

                "#define %sFFTHDR_H\n"

                "#define %sFFTHDR_H\n"

                "\n"

                "\n"

                "#define\t%sFFT_IWIDTH\t%d\n"

                "#define\t%sFFT_IWIDTH\t%d\n"

                "#define\t%sFFT_OWIDTH\t%d\n"

                "#define\t%sFFT_OWIDTH\t%d\n"

                "#define\t%sFFT_LGWIDTH\t%d\n"

                "#define\t%sFFT_LGWIDTH\t%d\n"

                "#define\t%sFFT_SIZE\t(1<<%sFFT_LGWIDTH)\n\n",

                "#define\t%sFFT_SIZE\t(1<<%sFFT_LGWIDTH)\n\n",

                        (inverse)?"I":"", (inverse)?"I":"",

                        (inverse)?"I":"", (inverse)?"I":"",

                        (inverse)?"I":"", nbitsin,

                        (inverse)?"I":"", nbitsin,

                        (inverse)?"I":"", nbitsout,

                        (inverse)?"I":"", nbitsout,

                        (inverse)?"I":"", lgsize,

                        (inverse)?"I":"", lgsize,

                        (inverse)?"I":"", (inverse)?"I":"");

                        (inverse)?"I":"", (inverse)?"I":"");

                if (ckpce > 0)

                if (ckpce > 0)

                        fprintf(hdr, "#define\t%sFFT_CKPCE\t%d\t// Clocks per CE\n",

                        fprintf(hdr, "#define\t%sFFT_CKPCE\t%d\t// Clocks per CE\n",

                                (inverse)?"I":"", ckpce);

                                (inverse)?"I":"", ckpce);

                else

                else

                        fprintf(hdr, "// Two samples per i_ce\n");

                        fprintf(hdr, "// Two samples per i_ce\n");

                if (!bitreverse)

                if (!bitreverse)

                        fprintf(hdr, "#define\t%sFFT_SKIPS_BIT_REVERSE\n",

                        fprintf(hdr, "#define\t%sFFT_SKIPS_BIT_REVERSE\n",

                                (inverse)?"I":"");

                                (inverse)?"I":"");

                if (real_fft)

                if (real_fft)

                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");

                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");

                if (!single_clock)

                if (!single_clock)

                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");

                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");

                else

                else

                        fprintf(hdr, "// #define\tDBLCLK%sFFT // this FFT takes one input sample per clock\n\n", (inverse)?"I":"");

                        fprintf(hdr, "// #define\tDBLCLK%sFFT // this FFT takes one input sample per clock\n\n", (inverse)?"I":"");

                if (USE_OLD_MULTIPLY)

                if (USE_OLD_MULTIPLY)

                        fprintf(hdr, "#define\tUSE_OLD_MULTIPLY\n\n");

                        fprintf(hdr, "#define\tUSE_OLD_MULTIPLY\n\n");

                fprintf(hdr, "// Parameters for testing the longbimpy\n");

                fprintf(hdr, "// Parameters for testing the longbimpy\n");

                fprintf(hdr, "#define\tTST_LONGBIMPY_AW\t%d\n", TST_LONGBIMPY_AW);

                fprintf(hdr, "#define\tTST_LONGBIMPY_AW\t%d\n", TST_LONGBIMPY_AW);

#ifdef  TST_LONGBIMPY_BW

#ifdef  TST_LONGBIMPY_BW

                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\t%d\n\n", TST_LONGBIMPY_BW);

                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\t%d\n\n", TST_LONGBIMPY_BW);

#else

#else

                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\tTST_LONGBIMPY_AW\n\n");

                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\tTST_LONGBIMPY_AW\n\n");

#endif

#endif

                fprintf(hdr, "// Parameters for testing the shift add multiply\n");

                fprintf(hdr, "// Parameters for testing the shift add multiply\n");

                fprintf(hdr, "#define\tTST_SHIFTADDMPY_AW\t%d\n", TST_SHIFTADDMPY_AW);

                fprintf(hdr, "#define\tTST_SHIFTADDMPY_AW\t%d\n", TST_SHIFTADDMPY_AW);

#ifdef  TST_SHIFTADDMPY_BW

#ifdef  TST_SHIFTADDMPY_BW

                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\t%d\n\n", TST_SHIFTADDMPY_BW);

                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\t%d\n\n", TST_SHIFTADDMPY_BW);

#else

#else

                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\tTST_SHIFTADDMPY_AW\n\n");

                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\tTST_SHIFTADDMPY_AW\n\n");

#endif

#endif

#define TST_SHIFTADDMPY_AW      16

#define TST_SHIFTADDMPY_AW      16

#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW

#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW

                fprintf(hdr, "// Parameters for testing the butterfly\n");

                fprintf(hdr, "// Parameters for testing the butterfly\n");

                fprintf(hdr, "#define\tTST_BUTTERFLY_IWIDTH\t%d\n", TST_BUTTERFLY_IWIDTH);

                fprintf(hdr, "#define\tTST_BUTTERFLY_IWIDTH\t%d\n", TST_BUTTERFLY_IWIDTH);

                fprintf(hdr, "#define\tTST_BUTTERFLY_CWIDTH\t%d\n", TST_BUTTERFLY_CWIDTH);

                fprintf(hdr, "#define\tTST_BUTTERFLY_CWIDTH\t%d\n", TST_BUTTERFLY_CWIDTH);

                fprintf(hdr, "#define\tTST_BUTTERFLY_OWIDTH\t%d\n", TST_BUTTERFLY_OWIDTH);

                fprintf(hdr, "#define\tTST_BUTTERFLY_OWIDTH\t%d\n", TST_BUTTERFLY_OWIDTH);

                fprintf(hdr, "#define\tTST_BUTTERFLY_MPYDELAY\t%d\n\n",

                fprintf(hdr, "#define\tTST_BUTTERFLY_MPYDELAY\t%d\n\n",

                                bflydelay(TST_BUTTERFLY_IWIDTH,

                                bflydelay(TST_BUTTERFLY_IWIDTH,

                                        TST_BUTTERFLY_CWIDTH-TST_BUTTERFLY_IWIDTH));

                                        TST_BUTTERFLY_CWIDTH-TST_BUTTERFLY_IWIDTH));

                fprintf(hdr, "// Parameters for testing the quarter stage\n");

                fprintf(hdr, "// Parameters for testing the quarter stage\n");

                fprintf(hdr, "#define\tTST_QTRSTAGE_IWIDTH\t%d\n", TST_QTRSTAGE_IWIDTH);

                fprintf(hdr, "#define\tTST_QTRSTAGE_IWIDTH\t%d\n", TST_QTRSTAGE_IWIDTH);

                fprintf(hdr, "#define\tTST_QTRSTAGE_LGWIDTH\t%d\n\n", TST_QTRSTAGE_LGWIDTH);

                fprintf(hdr, "#define\tTST_QTRSTAGE_LGWIDTH\t%d\n\n", TST_QTRSTAGE_LGWIDTH);

                fprintf(hdr, "// Parameters for testing the double stage\n");

                fprintf(hdr, "// Parameters for testing the double stage\n");

                fprintf(hdr, "#define\tTST_DBLSTAGE_IWIDTH\t%d\n", TST_DBLSTAGE_IWIDTH);

                fprintf(hdr, "#define\tTST_DBLSTAGE_IWIDTH\t%d\n", TST_DBLSTAGE_IWIDTH);

                fprintf(hdr, "#define\tTST_DBLSTAGE_SHIFT\t%d\n\n", TST_DBLSTAGE_SHIFT);

                fprintf(hdr, "#define\tTST_DBLSTAGE_SHIFT\t%d\n\n", TST_DBLSTAGE_SHIFT);

                fprintf(hdr, "// Parameters for testing the bit reversal stage\n");

                fprintf(hdr, "// Parameters for testing the bit reversal stage\n");

                fprintf(hdr, "#define\tTST_DBLREVERSE_LGSIZE\t%d\n\n", TST_DBLREVERSE_LGSIZE);

                fprintf(hdr, "#define\tTST_DBLREVERSE_LGSIZE\t%d\n\n", TST_DBLREVERSE_LGSIZE);

                fprintf(hdr, "\n" "#endif\n\n");

                fprintf(hdr, "\n" "#endif\n\n");

                fclose(hdr);

                fclose(hdr);

                std::string     fname_string;

                std::string     fname_string;

                fname_string = coredir;

                fname_string = coredir;

                fname_string += "/";

                fname_string += "/";

                if (inverse) fname_string += "i";

                if (inverse) fname_string += "i";

                fname_string += "fftmain.v";

                fname_string += "fftmain.v";

                vmain = fopen(fname_string.c_str(), "w");

                vmain = fopen(fname_string.c_str(), "w");

                if (NULL == vmain) {

                if (NULL == vmain) {

                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());

                        fprintf(stderr, "Could not open \'%s\' for writing\n", fname_string.c_str());

                        perror("Err from O/S:");

                        perror("Err from O/S:");

                        exit(EXIT_FAILURE);

                        exit(EXIT_FAILURE);

                if (verbose_flag)

                if (verbose_flag)

                        printf("Opened %s\n", fname_string.c_str());

                        printf("Opened %s\n", fname_string.c_str());

        fprintf(vmain,

        fprintf(vmain,

SLASHLINE

SLASHLINE

"//\n"

"//\n"

"// Filename:\t%sfftmain.v\n"

"// Filename:\t%sfftmain.v\n"

"//\n"

"//\n"

"// Project:    %s\n"

"// Project:    %s\n"

"//\n"

"//\n"

"// Purpose:    This is the main module in the General Purpose FPGA FFT\n"

"// Purpose:    This is the main module in the General Purpose FPGA FFT\n"

"//             implementation.  As such, all other modules are subordinate\n"

"//             implementation.  As such, all other modules are subordinate\n"

"//     to this one.  This module accomplish a fixed size Complex FFT on\n"

"//     to this one.  This module accomplish a fixed size Complex FFT on\n"

"//     %d data points.\n",

"//     %d data points.\n",

                (inverse)?"i":"",prjname, fftsize);

                (inverse)?"i":"",prjname, fftsize);

        if (single_clock) {

        if (single_clock) {

        fprintf(vmain,

        fprintf(vmain,

"//     The FFT is fully pipelined, and accepts as inputs one complex two\'s\n"

"//     The FFT is fully pipelined, and accepts as inputs one complex two\'s\n"

"//     complement sample per clock.\n");

"//     complement sample per clock.\n");

        } else {

        } else {

        fprintf(vmain,

        fprintf(vmain,

"//     The FFT is fully pipelined, and accepts as inputs two complex two\'s\n"

"//     The FFT is fully pipelined, and accepts as inputs two complex two\'s\n"

"//     complement samples per clock.\n");

"//     complement samples per clock.\n");

        fprintf(vmain,

        fprintf(vmain,

"//\n"

"//\n"

"// Parameters:\n"

"// Parameters:\n"

"//     i_clk\tThe clock.  All operations are synchronous with this clock.\n"

"//     i_clk\tThe clock.  All operations are synchronous with this clock.\n"

"//     i_%sreset%s\tSynchronous reset, active high.  Setting this line will\n"

"//     i_%sreset%s\tSynchronous reset, active high.  Setting this line will\n"

"//     \t\tforce the reset of all of the internals to this routine.\n"

"//     \t\tforce the reset of all of the internals to this routine.\n"

"//     \t\tFurther, following a reset, the o_sync line will go\n"

"//     \t\tFurther, following a reset, the o_sync line will go\n"

"//     \t\thigh the same time the first output sample is valid.\n",

"//     \t\thigh the same time the first output sample is valid.\n",

                (async_reset)?"a":"", (async_reset)?"_n":"");

                (async_reset)?"a":"", (async_reset)?"_n":"");

        if (single_clock) {

        if (single_clock) {

                fprintf(vmain,

                fprintf(vmain,

"//     i_ce\tA clock enable line.  If this line is set, this module\n"

"//     i_ce\tA clock enable line.  If this line is set, this module\n"

"//     \t\twill accept one complex input value, and produce\n"

"//     \t\twill accept one complex input value, and produce\n"

"//     \t\tone (possibly empty) complex output value.\n"

"//     \t\tone (possibly empty) complex output value.\n"

"//     i_sample\tThe complex input sample.  This value is split\n"

"//     i_sample\tThe complex input sample.  This value is split\n"

"//     \t\tinto two two\'s complement numbers, %d bits each, with\n"

"//     \t\tinto two two\'s complement numbers, %d bits each, with\n"

"//     \t\tthe real portion in the high order bits, and the\n"

"//     \t\tthe real portion in the high order bits, and the\n"

"//     \t\timaginary portion taking the bottom %d bits.\n"

"//     \t\timaginary portion taking the bottom %d bits.\n"

"//     o_result\tThe output result, of the same format as i_sample,\n"

"//     o_result\tThe output result, of the same format as i_sample,\n"

"//     \t\tonly having %d bits for each of the real and imaginary\n"

"//     \t\tonly having %d bits for each of the real and imaginary\n"

"//     \t\tcomponents, leading to %d bits total.\n"

"//     \t\tcomponents, leading to %d bits total.\n"

"//     o_sync\tA one bit output indicating the first sample of the FFT frame.\n"

"//     o_sync\tA one bit output indicating the first sample of the FFT frame.\n"

"//     \t\tIt also indicates the first valid sample out of the FFT\n"

"//     \t\tIt also indicates the first valid sample out of the FFT\n"

"//     \t\ton the first frame.\n", nbitsin, nbitsin, nbitsout, nbitsout*2);

"//     \t\ton the first frame.\n", nbitsin, nbitsin, nbitsout, nbitsout*2);

        } else {

        } else {

                fprintf(vmain,

                fprintf(vmain,

"//     i_ce\tA clock enable line.  If this line is set, this module\n"

"//     i_ce\tA clock enable line.  If this line is set, this module\n"

"//     \t\twill accept two complex values as inputs, and produce\n"

"//     \t\twill accept two complex values as inputs, and produce\n"

"//     \t\ttwo (possibly empty) complex values as outputs.\n"

"//     \t\ttwo (possibly empty) complex values as outputs.\n"

"//     i_left\tThe first of two complex input samples.  This value is split\n"

"//     i_left\tThe first of two complex input samples.  This value is split\n"

"//     \t\tinto two two\'s complement numbers, %d bits each, with\n"

"//     \t\tinto two two\'s complement numbers, %d bits each, with\n"

"//     \t\tthe real portion in the high order bits, and the\n"

"//     \t\tthe real portion in the high order bits, and the\n"

"//     \t\timaginary portion taking the bottom %d bits.\n"

"//     \t\timaginary portion taking the bottom %d bits.\n"

"//     i_right\tThis is the same thing as i_left, only this is the second of\n"

"//     i_right\tThis is the same thing as i_left, only this is the second of\n"

"//     \t\ttwo such samples.  Hence, i_left would contain input\n"

"//     \t\ttwo such samples.  Hence, i_left would contain input\n"

"//     \t\tsample zero, i_right would contain sample one.  On the\n"

"//     \t\tsample zero, i_right would contain sample one.  On the\n"

"//     \t\tnext clock i_left would contain input sample two,\n"

"//     \t\tnext clock i_left would contain input sample two,\n"

"//     \t\ti_right number three and so forth.\n"

"//     \t\ti_right number three and so forth.\n"

"//     o_left\tThe first of two output samples, of the same format as i_left,\n"

"//     o_left\tThe first of two output samples, of the same format as i_left,\n"

"//     \t\tonly having %d bits for each of the real and imaginary\n"

"//     \t\tonly having %d bits for each of the real and imaginary\n"

"//     \t\tcomponents, leading to %d bits total.\n"

"//     \t\tcomponents, leading to %d bits total.\n"

"//     o_right\tThe second of two output samples produced each clock.  This has\n"

"//     o_right\tThe second of two output samples produced each clock.  This has\n"

"//     \t\tthe same format as o_left.\n"

"//     \t\tthe same format as o_left.\n"

"//     o_sync\tA one bit output indicating the first valid sample produced by\n"

"//     o_sync\tA one bit output indicating the first valid sample produced by\n"

"//     \t\tthis FFT following a reset.  Ever after, this will\n"

"//     \t\tthis FFT following a reset.  Ever after, this will\n"

"//     \t\tindicate the first sample of an FFT frame.\n",

"//     \t\tindicate the first sample of an FFT frame.\n",

        nbitsin, nbitsin, nbitsout, nbitsout*2);

        nbitsin, nbitsin, nbitsout, nbitsout*2);

        fprintf(vmain,

        fprintf(vmain,

"//\n"

"//\n"

"// Arguments:\tThis file was computer generated using the following command\n"

"// Arguments:\tThis file was computer generated using the following command\n"

"//\t\tline:\n"

"//\t\tline:\n"

"//\n");

"//\n");

        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());

        fprintf(vmain, "//\t\t%% %s\n", cmdline.c_str());

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\tThis core will use hardware accelerated multiplies (DSPs)\n"

                "//\tfor %d of the %d stages\n", mpy_stages, lgval(fftsize));

        fprintf(vmain, "//\n");

        fprintf(vmain, "%s", creator);

        fprintf(vmain, "%s", creator);

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "%s", cpyleft);

        fprintf(vmain, "%s", cpyleft);

        fprintf(vmain, "//\n//\n`default_nettype\tnone\n//\n");

        fprintf(vmain, "//\n//\n`default_nettype\tnone\n//\n");

        std::string     resetw("i_reset");

        std::string     resetw("i_reset");

        if (async_reset)

        if (async_reset)

                resetw = "i_areset_n";

                resetw = "i_areset_n";

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "//\n");

        fprintf(vmain, "module %sfftmain(i_clk, %s, i_ce,\n",

        fprintf(vmain, "module %sfftmain(i_clk, %s, i_ce,\n",

                (inverse)?"i":"", resetw.c_str());

                (inverse)?"i":"", resetw.c_str());

        if (single_clock) {

        if (single_clock) {

                fprintf(vmain, "\t\ti_sample, o_result, o_sync%s);\n",

                fprintf(vmain, "\t\ti_sample, o_result, o_sync%s);\n",

                        (dbg)?", o_dbg":"");

                        (dbg)?", o_dbg":"");

        } else {

        } else {

                fprintf(vmain, "\t\ti_left, i_right,\n");

                fprintf(vmain, "\t\ti_left, i_right,\n");

                fprintf(vmain, "\t\to_left, o_right, o_sync%s);\n",

                fprintf(vmain, "\t\to_left, o_right, o_sync%s);\n",

                        (dbg)?", o_dbg":"");

                        (dbg)?", o_dbg":"");

        fprintf(vmain, "\tparameter\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n\t//\n", nbitsin, nbitsout, lgsize);

        fprintf(vmain,

        "\t// The bit-width of the input, IWIDTH, output, OWIDTH, and the log\n"

        "\t// of the FFT size.  These are localparams, rather than parameters,\n"

        "\t// because once the core has been generated, they can no longer be\n"

        "\t// changed.  (These values can be adjusted by running the core\n"

        "\t// generator again.)  The reason is simply that these values have\n"

        "\t// been hardwired into the core at several places.\n");

        fprintf(vmain, "\tlocalparam\tIWIDTH=%d, OWIDTH=%d, LGWIDTH=%d;\n\t//\n", nbitsin, nbitsout, lgsize);

        assert(lgsize > 0);

        assert(lgsize > 0);

        fprintf(vmain, "\tinput\t\t\t\t\ti_clk, %s, i_ce;\n\t//\n",

        fprintf(vmain, "\tinput\twire\t\t\t\ti_clk, %s, i_ce;\n\t//\n",

                resetw.c_str());

                resetw.c_str());

        if (single_clock) {

        if (single_clock) {

        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_sample;\n");

        fprintf(vmain, "\tinput\twire\t[(2*IWIDTH-1):0]\ti_sample;\n");

        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_result;\n");

        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_result;\n");

        } else {

        } else {

        fprintf(vmain, "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");

        fprintf(vmain, "\tinput\twire\t[(2*IWIDTH-1):0]\ti_left, i_right;\n");

        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");

        fprintf(vmain, "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n");

        fprintf(vmain, "\toutput\treg\t\t\t\to_sync;\n");

        fprintf(vmain, "\toutput\treg\t\t\t\to_sync;\n");

        if (dbg)

        if (dbg)

                fprintf(vmain, "\toutput\twire\t[33:0]\t\to_dbg;\n");

                fprintf(vmain, "\toutput\twire\t[33:0]\t\to_dbg;\n");

        fprintf(vmain, "\n\n");

        fprintf(vmain, "\n\n");

        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");

        fprintf(vmain, "\t// Outputs of the FFT, ready for bit reversal.\n");

        if (single_clock)

        if (single_clock)

                fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_sample;\n");

                fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_sample;\n");

        else

        else

                fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");

                fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_left, br_right;\n");

        int     tmp_size = fftsize, lgtmp = lgsize;

        int     tmp_size = fftsize, lgtmp = lgsize;

        if (fftsize == 2) {

        if (fftsize == 2) {

                if (bitreverse) {

                if (bitreverse) {

                        fprintf(vmain, "\treg\tbr_start;\n");

                        fprintf(vmain, "\treg\tbr_start;\n");

                        fprintf(vmain, "\tinitial br_start = 1\'b0;\n");

                        fprintf(vmain, "\tinitial br_start = 1\'b0;\n");

                        if (async_reset) {

                        if (async_reset) {

                                fprintf(vmain, "\talways @(posedge i_clk, negedge i_arese_n)\n");

                                fprintf(vmain, "\talways @(posedge i_clk, negedge i_arese_n)\n");

                                fprintf(vmain, "\t\tif (!i_areset_n)\n");

                                fprintf(vmain, "\t\tif (!i_areset_n)\n");

                        } else {

                        } else {

                                fprintf(vmain, "\talways @(posedge i_clk)\n");

                                fprintf(vmain, "\talways @(posedge i_clk)\n");

                                fprintf(vmain, "\t\tif (i_reset)\n");

                                fprintf(vmain, "\t\tif (i_reset)\n");

                        fprintf(vmain, "\t\t\tbr_start <= 1\'b0;\n");

                        fprintf(vmain, "\t\t\tbr_start <= 1\'b0;\n");

                        fprintf(vmain, "\t\telse if (i_ce)\n");

                        fprintf(vmain, "\t\telse if (i_ce)\n");

                        fprintf(vmain, "\t\t\tbr_start <= 1\'b1;\n");

                        fprintf(vmain, "\t\t\tbr_start <= 1\'b1;\n");

                fprintf(vmain, "\n\n");

                fprintf(vmain, "\n\n");

                fprintf(vmain, "\tlaststage\t#(IWIDTH)\tstage_2(i_clk, %s, i_ce,\n", resetw.c_str());

                fprintf(vmain, "\tlaststage\t#(IWIDTH)\tstage_2(i_clk, %s, i_ce,\n", resetw.c_str());

                fprintf(vmain, "\t\t\t(%s%s), i_left, i_right, br_left, br_right);\n",

                fprintf(vmain, "\t\t\t(%s%s), i_left, i_right, br_left, br_right);\n",

                        (async_reset)?"":"!", resetw.c_str());

                        (async_reset)?"":"!", resetw.c_str());

                fprintf(vmain, "\n\n");

                fprintf(vmain, "\n\n");

        } else {

        } else {

                int     nbits = nbitsin, dropbit=0;

                int     nbits = nbitsin, dropbit=0;

                int     obits = nbits+1+xtrapbits;

                int     obits = nbits+1+xtrapbits;

                std::string     cmem;

                std::string     cmem;

                FILE    *cmemfp;

                FILE    *cmemfp;

                if ((maxbitsout > 0)&&(obits > maxbitsout))

                if ((maxbitsout > 0)&&(obits > maxbitsout))

                        obits = maxbitsout;

                        obits = maxbitsout;

                // Always do a first stage

                // Always do a first stage

                        bool    mpystage;

                        bool    mpystage;

                        // Last two stages are always non-multiply stages

                        // Last two stages are always non-multiply stages

                        // since the multiplies can be done by adds

                        // since the multiplies can be done by adds

                        mpystage = ((lgtmp-2) <= mpy_stages);

                        mpystage = ((lgtmp-2) <= mpy_stages);

                        fprintf(vmain, "\n\n");

                        if (mpystage)

                        if (mpystage)

                                fprintf(vmain, "\t// A hardware optimized FFT stage\n");

                                fprintf(vmain, "\t// A hardware optimized FFT stage\n");

                        fprintf(vmain, "\n\n");

                        fprintf(vmain, "\twire\t\tw_s%d;\n", fftsize);

                        fprintf(vmain, "\twire\t\tw_s%d;\n", fftsize);

                        if (single_clock) {

                        if (single_clock) {

                                fprintf(vmain, "\twire\t[%d:0]\tw_d%d;\n", 2*(obits+xtrapbits)-1, fftsize);

                                fprintf(vmain, "\twire\t[%d:0]\tw_d%d;\n", 2*(obits+xtrapbits)-1, fftsize);

                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 1, 0, inverse);

                                cmem = gen_coeff_fname(coredir.c_str(), fftsize, 1, 0, inverse);

                                cmemfp = gen_coeff_open(cmem.c_str());

                                cmemfp = gen_coeff_open(cmem.c_str());

                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 1, 0, inverse);

                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 1, 0, inverse);

                                fprintf(vmain, "\tfftstage%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_%d(i_clk, %s, i_ce,\n",

                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 1, 0, inverse);

                                fprintf(vmain, "\tfftstage%s\t#(IWIDTH,IWIDTH+%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_%d(i_clk, %s, i_ce,\n",

                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",

                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",

                                        xtracbits, obits+xtrapbits,

                                        xtracbits, obits+xtrapbits,

                                        lgsize, lgtmp-1,

                                        lgtmp-1, (mpystage)?1:0,

                                        (mpystage)?1:0,

                                        ckpce, cmem.c_str(),

                                        ckpce, cmem.c_str(),

                                        fftsize, resetw.c_str());

                                        fftsize, resetw.c_str());

                                fprintf(vmain, "\t\t\t(%s%s), i_sample, w_d%d, w_s%d%s);\n",

                                fprintf(vmain, "\t\t\t(%s%s), i_sample, w_d%d, w_s%d%s);\n",

                                        (async_reset)?"":"!", resetw.c_str(),

                                        (async_reset)?"":"!", resetw.c_str(),

                                        fftsize, fftsize,

                                        fftsize, fftsize,

                                        ((dbg)&&(dbgstage == fftsize))

                                        ((dbg)&&(dbgstage == fftsize))

                                                ? ", o_dbg":"");

                                                ? ", o_dbg":"");

                        } else {

                        } else {

                                fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os%d;\n\t// verilator lint_on  UNUSED\n", fftsize);

                                fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os%d;\n\t// verilator lint_on  UNUSED\n", fftsize);

                                fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);

                                fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);

                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 2, 0, inverse);

                                cmem = gen_coeff_fname(coredir.c_str(), fftsize, 2, 0, inverse);

                                cmemfp = gen_coeff_open(cmem.c_str());

                                cmemfp = gen_coeff_open(cmem.c_str());

                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 2, 0, inverse);

                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 2, 0, inverse);

                                fprintf(vmain, "\tfftstage%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_e%d(i_clk, %s, i_ce,\n",

                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 2, 0, inverse);

                                fprintf(vmain, "\tfftstage%s\t#(IWIDTH,IWIDTH+%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_e%d(i_clk, %s, i_ce,\n",

                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",

                                        ((dbg)&&(dbgstage == fftsize))?"_dbg":"",

                                        xtracbits, obits+xtrapbits,

                                        xtracbits, obits+xtrapbits,

                                        lgsize, lgtmp-2,

                                        lgtmp-2, (mpystage)?1:0,

                                        (mpystage)?1:0,

                                        ckpce, cmem.c_str(),

                                        ckpce, cmem.c_str(),

                                        fftsize, resetw.c_str());

                                        fftsize, resetw.c_str());

                                fprintf(vmain, "\t\t\t(%s%s), i_left, w_e%d, w_s%d%s);\n",

                                fprintf(vmain, "\t\t\t(%s%s), i_left, w_e%d, w_s%d%s);\n",

                                        (async_reset)?"":"!", resetw.c_str(),

                                        (async_reset)?"":"!", resetw.c_str(),

                                        fftsize, fftsize,

                                        fftsize, fftsize,

                                        ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");

                                        ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");

                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 2, 1, inverse);

                                cmem = gen_coeff_fname(coredir.c_str(), fftsize, 2, 1, inverse);

                                cmemfp = gen_coeff_open(cmem.c_str());

                                cmemfp = gen_coeff_open(cmem.c_str());

                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 2, 1, inverse);

                                gen_coeffs(cmemfp, fftsize,  nbitsin+xtracbits, 2, 1, inverse);

                                fprintf(vmain, "\tfftstage\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_o%d(i_clk, %s, i_ce,\n",

                                cmem = gen_coeff_fname(EMPTYSTR, fftsize, 2, 1, inverse);

                                fprintf(vmain, "\tfftstage\t#(IWIDTH,IWIDTH+%d,%d,%d,0,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_o%d(i_clk, %s, i_ce,\n",

                                        xtracbits, obits+xtrapbits,

                                        xtracbits, obits+xtrapbits,

                                        lgsize, lgtmp-2,

                                        lgtmp-2, (mpystage)?1:0,

                                        (mpystage)?1:0,

                                        ckpce, cmem.c_str(),

                                        ckpce, cmem.c_str(),

                                        fftsize, resetw.c_str());

                                        fftsize, resetw.c_str());

                                fprintf(vmain, "\t\t\t(%s%s), i_right, w_o%d, w_os%d);\n",

                                fprintf(vmain, "\t\t\t(%s%s), i_right, w_o%d, w_os%d);\n",

                                        (async_reset)?"":"!",resetw.c_str(),

                                        (async_reset)?"":"!",resetw.c_str(),

                                        fftsize, fftsize);

                                        fftsize, fftsize);

                        std::string     fname;

                        std::string     fname;

                        fname = coredir + "/";

                        fname = coredir + "/";

                        if (inverse)

                        if (inverse)

                                fname += "i";

                                fname += "i";

                        fname += "fftstage";

                        fname += "fftstage";

                        if (dbg) {

                        if (dbg) {

                                std::string     dbgname(fname);

                                std::string     dbgname(fname);

                                dbgname += "_dbg";

                                dbgname += "_dbg";

                                dbgname += ".v";

                                dbgname += ".v";

                                if (single_clock)

                                if (single_clock)

                                        build_stage(fname.c_str(), fftsize, 1, 0, nbits, xtracbits, ckpce, async_reset, true);

                                        build_stage(fname.c_str(), fftsize, 1, 0, nbits, xtracbits, ckpce, async_reset, true);

                                else

                                else

                                        build_stage(fname.c_str(), fftsize/2, 2, 1, nbits, xtracbits, ckpce, async_reset, true);

                                        build_stage(fname.c_str(), fftsize, 2, 1, nbits, xtracbits, ckpce, async_reset, true);

                        fname += ".v";

                        fname += ".v";

                        if (single_clock) {

                        if (single_clock) {

                                build_stage(fname.c_str(), fftsize, 1, 0,

                                build_stage(fname.c_str(), fftsize, 1, 0,

                                        nbits, xtracbits, ckpce, async_reset,

                                        nbits, xtracbits, ckpce, async_reset,

                                        false);

                                        false);

                        } else {

                        } else {

                                // All stages use the same Verilog, so we only

                                // All stages use the same Verilog, so we only

                                // need to build one

                                // need to build one

                                build_stage(fname.c_str(), fftsize/2, 2, 1,

                                build_stage(fname.c_str(), fftsize, 2, 1,

                                        nbits, xtracbits, ckpce, async_reset, false);

                                        nbits, xtracbits, ckpce, async_reset, false);

                nbits = obits;  // New number of input bits

                nbits = obits;  // New number of input bits

                tmp_size >>= 1; lgtmp--;

                tmp_size >>= 1; lgtmp--;

                dropbit = 0;

                dropbit = 0;

                fprintf(vmain, "\n\n");

                fprintf(vmain, "\n\n");

                while(tmp_size >= 8) {

                while(tmp_size >= 8) {

                        obits = nbits+((dropbit)?0:1);

                        obits = nbits+((dropbit)?0:1);

                        if ((maxbitsout > 0)&&(obits > maxbitsout))

                        if ((maxbitsout > 0)&&(obits > maxbitsout))

                                obits = maxbitsout;

                                obits = maxbitsout;

                                bool            mpystage;

                                bool            mpystage;

                                mpystage = ((lgtmp-2) <= mpy_stages);

                                mpystage = ((lgtmp-2) <= mpy_stages);

                                if (mpystage)

                                if (mpystage)

                                        fprintf(vmain, "\t// A hardware optimized FFT stage\n");

                                        fprintf(vmain, "\t// A hardware optimized FFT stage\n");

                                fprintf(vmain, "\twire\t\tw_s%d;\n",

                                fprintf(vmain, "\twire\t\tw_s%d;\n",

                                        tmp_size);

                                        tmp_size);

                                if (single_clock) {

                                if (single_clock) {

                                        fprintf(vmain,"\twire\t[%d:0]\tw_d%d;\n",

                                        fprintf(vmain,"\twire\t[%d:0]\tw_d%d;\n",

                                                2*(obits+xtrapbits)-1,

                                                2*(obits+xtrapbits)-1,

                                                tmp_size);

                                                tmp_size);

                                        cmem = gen_coeff_fname(EMPTYSTR, tmp_size, 1, 0, inverse);

                                        cmem = gen_coeff_fname(coredir.c_str(), tmp_size, 1, 0, inverse);

                                        cmemfp = gen_coeff_open(cmem.c_str());

                                        cmemfp = gen_coeff_open(cmem.c_str());

                                        gen_coeffs(cmemfp, tmp_size,

                                        gen_coeffs(cmemfp, tmp_size,

                                                nbits+xtracbits+xtrapbits, 1, 0, inverse);

                                                nbits+xtracbits+xtrapbits, 1, 0, inverse);

                                        fprintf(vmain, "\tfftstage%s\t#(%d,%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_%d(i_clk, %s, i_ce,\n",

                                        cmem = gen_coeff_fname(EMPTYSTR, tmp_size, 1, 0, inverse);

                                        fprintf(vmain, "\tfftstage%s\t#(%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_%d(i_clk, %s, i_ce,\n",

                                                ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",

                                                ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",

                                                nbits+xtrapbits,

                                                nbits+xtrapbits,

                                                nbits+xtracbits+xtrapbits,

                                                nbits+xtracbits+xtrapbits,

                                                obits+xtrapbits,

                                                obits+xtrapbits,

                                                lgsize, lgtmp-1,

                                                lgtmp-1, (dropbit)?0:0, (mpystage)?1:0,

                                                (dropbit)?0:0, (mpystage)?1:0,

                                                ckpce,

                                                ckpce,

                                                cmem.c_str(), tmp_size,

                                                cmem.c_str(), tmp_size,

                                                resetw.c_str());

                                                resetw.c_str());

                                        fprintf(vmain, "\t\t\tw_s%d, w_d%d, w_d%d, w_s%d%s);\n",

                                        fprintf(vmain, "\t\t\tw_s%d, w_d%d, w_d%d, w_s%d%s);\n",

                                                tmp_size<<1, tmp_size<<1,

                                                tmp_size<<1, tmp_size<<1,

                                                tmp_size, tmp_size,

                                                tmp_size, tmp_size,

                                                ((dbg)&&(dbgstage == tmp_size))

                                                ((dbg)&&(dbgstage == tmp_size))

                                                        ?", o_dbg":"");

                                                        ?", o_dbg":"");

                                } else {

                                } else {

                                        fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os%d;\n\t// verilator lint_on  UNUSED\n",

                                        fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os%d;\n\t// verilator lint_on  UNUSED\n",

                                                tmp_size);

                                                tmp_size);

                                        fprintf(vmain,"\twire\t[%d:0]\tw_e%d, w_o%d;\n",

                                        fprintf(vmain,"\twire\t[%d:0]\tw_e%d, w_o%d;\n",

                                                2*(obits+xtrapbits)-1,

                                                2*(obits+xtrapbits)-1,

                                                tmp_size, tmp_size);

                                                tmp_size, tmp_size);

                                        cmem = gen_coeff_fname(EMPTYSTR, tmp_size, 2, 0, inverse);

                                        cmem = gen_coeff_fname(coredir.c_str(), tmp_size, 2, 0, inverse);

                                        cmemfp = gen_coeff_open(cmem.c_str());

                                        cmemfp = gen_coeff_open(cmem.c_str());

                                        gen_coeffs(cmemfp, tmp_size,

                                        gen_coeffs(cmemfp, tmp_size,

                                                nbits+xtracbits+xtrapbits, 2, 0, inverse);

                                                nbits+xtracbits+xtrapbits, 2, 0, inverse);

                                        fprintf(vmain, "\tfftstage%s\t#(%d,%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_e%d(i_clk, %s, i_ce,\n",

                                        cmem = gen_coeff_fname(EMPTYSTR, tmp_size, 2, 0, inverse);

                                        fprintf(vmain, "\tfftstage%s\t#(%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_e%d(i_clk, %s, i_ce,\n",

                                                ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",

                                                ((dbg)&&(dbgstage==tmp_size))?"_dbg":"",

                                                nbits+xtrapbits,

                                                nbits+xtrapbits,

                                                nbits+xtracbits+xtrapbits,

                                                nbits+xtracbits+xtrapbits,

                                                obits+xtrapbits,

                                                obits+xtrapbits,

                                                lgsize, lgtmp-2,

                                                lgtmp-2, (dropbit)?0:0, (mpystage)?1:0,

                                                (dropbit)?0:0, (mpystage)?1:0,

                                                ckpce,

                                                ckpce,

                                                cmem.c_str(), tmp_size,

                                                cmem.c_str(), tmp_size,

                                                resetw.c_str());

                                                resetw.c_str());

                                        fprintf(vmain, "\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n",

                                        fprintf(vmain, "\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n",

                                                tmp_size<<1, tmp_size<<1,

                                                tmp_size<<1, tmp_size<<1,

                                                tmp_size, tmp_size,

                                                tmp_size, tmp_size,

                                                ((dbg)&&(dbgstage == tmp_size))

                                                ((dbg)&&(dbgstage == tmp_size))

                                                        ?", o_dbg":"");

                                                        ?", o_dbg":"");

                                        cmem = gen_coeff_fname(EMPTYSTR,

                                        cmem = gen_coeff_fname(coredir.c_str(),

                                                tmp_size, 2, 1, inverse);

                                                tmp_size, 2, 1, inverse);

                                        cmemfp = gen_coeff_open(cmem.c_str());

                                        cmemfp = gen_coeff_open(cmem.c_str());

                                        gen_coeffs(cmemfp, tmp_size,

                                        gen_coeffs(cmemfp, tmp_size,

                                                nbits+xtracbits+xtrapbits,

                                                nbits+xtracbits+xtrapbits,

                                                2, 1, inverse);

                                                2, 1, inverse);

                                        fprintf(vmain, "\tfftstage\t#(%d,%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_o%d(i_clk, %s, i_ce,\n",

                                        cmem = gen_coeff_fname(EMPTYSTR,

                                                tmp_size, 2, 1, inverse);

                                        fprintf(vmain, "\tfftstage\t#(%d,%d,%d,%d,%d,\n\t\t\t%d, %d, \"%s\")\n\t\tstage_o%d(i_clk, %s, i_ce,\n",

                                                nbits+xtrapbits,

                                                nbits+xtrapbits,

                                                nbits+xtracbits+xtrapbits,

                                                nbits+xtracbits+xtrapbits,

                                                obits+xtrapbits,

                                                obits+xtrapbits,

                                                lgsize, lgtmp-2,

                                                lgtmp-2, (dropbit)?0:0, (mpystage)?1:0,

                                                (dropbit)?0:0, (mpystage)?1:0,

                                                ckpce, cmem.c_str(), tmp_size,

                                                ckpce, cmem.c_str(), tmp_size,

                                                resetw.c_str());

                                                resetw.c_str());

                                        fprintf(vmain, "\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n",

                                        fprintf(vmain, "\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n",

                                                tmp_size<<1, tmp_size<<1,

                                                tmp_size<<1, tmp_size<<1,

                                                tmp_size, tmp_size);

                                                tmp_size, tmp_size);

                                fprintf(vmain, "\n");

                                fprintf(vmain, "\n");

                        dropbit ^= 1;

                        dropbit ^= 1;

                        nbits = obits;

                        nbits = obits;

                        tmp_size >>= 1; lgtmp--;

                        tmp_size >>= 1; lgtmp--;

                if (tmp_size == 4) {

                if (tmp_size == 4) {

                        obits = nbits+((dropbit)?0:1);

                        obits = nbits+((dropbit)?0:1);

                        if ((maxbitsout > 0)&&(obits > maxbitsout))

                        if ((maxbitsout > 0)&&(obits > maxbitsout))

                                obits = maxbitsout;

                                obits = maxbitsout;

                        fprintf(vmain, "\twire\t\tw_s4;\n");

                        fprintf(vmain, "\twire\t\tw_s4;\n");

                        if (single_clock) {

                        if (single_clock) {

                                fprintf(vmain, "\twire\t[%d:0]\tw_d4;\n",

                                fprintf(vmain, "\twire\t[%d:0]\tw_d4;\n",

                                        2*(obits+xtrapbits)-1);

                                        2*(obits+xtrapbits)-1);

                                fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,%d,%d)\tstage_4(i_clk, %s, i_ce,\n",

                                fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,%d,%d)\tstage_4(i_clk, %s, i_ce,\n",

                                        ((dbg)&&(dbgstage==4))?"_dbg":"",

                                        ((dbg)&&(dbgstage==4))?"_dbg":"",

                                        nbits+xtrapbits, obits+xtrapbits, lgsize,

                                        nbits+xtrapbits, obits+xtrapbits, lgsize,

                                        (inverse)?1:0, (dropbit)?0:0,

                                        (inverse)?1:0, (dropbit)?0:0,

                                        resetw.c_str());

                                        resetw.c_str());

                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_d8, w_d4, w_s4%s);\n",

                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_d8, w_d4, w_s4%s);\n",

                                        ((dbg)&&(dbgstage==4))?", o_dbg":"");

                                        ((dbg)&&(dbgstage==4))?", o_dbg":"");

                        } else {

                        } else {

                                fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os4;\n\t// verilator lint_on  UNUSED\n");

                                fprintf(vmain, "\t// verilator lint_off UNUSED\n\twire\t\tw_os4;\n\t// verilator lint_on  UNUSED\n");

                                fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);

                                fprintf(vmain, "\twire\t[%d:0]\tw_e4, w_o4;\n", 2*(obits+xtrapbits)-1);

                                fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, %s, i_ce,\n",

                                fprintf(vmain, "\tqtrstage%s\t#(%d,%d,%d,0,%d,%d)\tstage_e4(i_clk, %s, i_ce,\n",

                                        ((dbg)&&(dbgstage==4))?"_dbg":"",

                                        ((dbg)&&(dbgstage==4))?"_dbg":"",

                                        nbits+xtrapbits, obits+xtrapbits, lgsize,

                                        nbits+xtrapbits, obits+xtrapbits, lgsize,

                                        (inverse)?1:0, (dropbit)?0:0,

                                        (inverse)?1:0, (dropbit)?0:0,

                                        resetw.c_str());

                                        resetw.c_str());

                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4%s);\n",

                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_e8, w_e4, w_s4%s);\n",

                                        ((dbg)&&(dbgstage==4))?", o_dbg":"");

                                        ((dbg)&&(dbgstage==4))?", o_dbg":"");

                                fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, %s, i_ce,\n",

                                fprintf(vmain, "\tqtrstage\t#(%d,%d,%d,1,%d,%d)\tstage_o4(i_clk, %s, i_ce,\n",

                                        nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0,

                                        nbits+xtrapbits, obits+xtrapbits, lgsize, (inverse)?1:0, (dropbit)?0:0,

                                        resetw.c_str());

                                        resetw.c_str());

                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");

                                fprintf(vmain, "\t\t\t\t\t\tw_s8, w_o8, w_o4, w_os4);\n");

                        dropbit ^= 1;

                        dropbit ^= 1;

                        nbits = obits;

                        nbits = obits;

                        tmp_size >>= 1; lgtmp--;

                        tmp_size >>= 1; lgtmp--;

                        obits = nbits+((dropbit)?0:1);

                        obits = nbits+((dropbit)?0:1);

                        if (obits > nbitsout)

                        if (obits > nbitsout)

                                obits = nbitsout;

                                obits = nbitsout;

                        if ((maxbitsout>0)&&(obits > maxbitsout))

                        if ((maxbitsout>0)&&(obits > maxbitsout))

                                obits = maxbitsout;

                                obits = maxbitsout;

                        fprintf(vmain, "\twire\t\tw_s2;\n");

                        fprintf(vmain, "\twire\t\tw_s2;\n");

                        if (single_clock) {

                        if (single_clock) {

                                fprintf(vmain, "\twire\t[%d:0]\tw_d2;\n",

                                fprintf(vmain, "\twire\t[%d:0]\tw_d2;\n",

                                        2*obits-1);

                                        2*obits-1);

                        } else {

                        } else {

                                fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n",

                                fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n",

                                        2*obits-1);

                                        2*obits-1);

                        if ((nbits+xtrapbits+1 == obits)&&(!dropbit))

/*

                                printf("WARNING: SCALING OFF BY A FACTOR OF TWO--should\'ve dropped a bit in the last stage.\n");

                        if ((nbits+xtrapbits+1 == obits)&&(!dropbit))

                                printf("Warning: Less than optimal scaling\n");

*/

                        if (single_clock) {

                        if (single_clock) {

                                fprintf(vmain, "\tlaststage\t#(%d,%d,%d)\tstage_2(i_clk, %s, i_ce,\n",

                                fprintf(vmain, "\tlaststage\t#(%d,%d,%d)\tstage_2(i_clk, %s, i_ce,\n",

                                        nbits+xtrapbits, obits,(dropbit)?0:1,

                                        nbits+xtrapbits, obits,(dropbit)?0:1,

                                        resetw.c_str());

                                        resetw.c_str());

                                fprintf(vmain, "\t\t\t\t\tw_s4, w_d4, w_d2, w_s2);\n");

                                fprintf(vmain, "\t\t\t\t\tw_s4, w_d4, w_d2, w_s2);\n");

                        } else {

                        } else {

                                fprintf(vmain, "\tlaststage\t#(%d,%d,%d)\tstage_2(i_clk, %s, i_ce,\n",

                                fprintf(vmain, "\tlaststage\t#(%d,%d,%d)\tstage_2(i_clk, %s, i_ce,\n",

                                        nbits+xtrapbits, obits,(dropbit)?0:1,

                                        nbits+xtrapbits, obits,(dropbit)?0:1,

                                        resetw.c_str());

                                        resetw.c_str());

                                fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");

                                fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");

                        fprintf(vmain, "\n\n");

                        fprintf(vmain, "\n\n");

                        nbits = obits;

                        nbits = obits;

                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");

                fprintf(vmain, "\t// Prepare for a (potential) bit-reverse stage.\n");

                if (single_clock)

                if (single_clock)

                        fprintf(vmain, "\tassign\tbr_sample= w_d2;\n");

                        fprintf(vmain, "\tassign\tbr_sample= w_d2;\n");

                else {

                else {

                        fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");

                        fprintf(vmain, "\tassign\tbr_left  = w_e2;\n");

                        fprintf(vmain, "\tassign\tbr_right = w_o2;\n");

                        fprintf(vmain, "\tassign\tbr_right = w_o2;\n");

                fprintf(vmain, "\n");

                fprintf(vmain, "\n");

                if (bitreverse) {

                if (bitreverse) {

                        fprintf(vmain, "\twire\tbr_start;\n");

                        fprintf(vmain, "\twire\tbr_start;\n");

                        fprintf(vmain, "\treg\tr_br_started;\n");

                        fprintf(vmain, "\treg\tr_br_started;\n");

                        fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n");

                        fprintf(vmain, "\tinitial\tr_br_started = 1\'b0;\n");

                        if (async_reset) {

                        if (async_reset) {

                                fprintf(vmain, "\talways @(posedge i_clk, negedge i_areset_n)\n");

                                fprintf(vmain, "\talways @(posedge i_clk, negedge i_areset_n)\n");

                                fprintf(vmain, "\t\tif (!i_areset_n)\n");

                                fprintf(vmain, "\t\tif (!i_areset_n)\n");

                        } else {

                        } else {

                                fprintf(vmain, "\talways @(posedge i_clk)\n");

                                fprintf(vmain, "\talways @(posedge i_clk)\n");

                                fprintf(vmain, "\t\tif (i_reset)\n");

                                fprintf(vmain, "\t\tif (i_reset)\n");

                        fprintf(vmain, "\t\t\tr_br_started <= 1\'b0;\n");

                        fprintf(vmain, "\t\t\tr_br_started <= 1\'b0;\n");

                        fprintf(vmain, "\t\telse if (i_ce)\n");

                        fprintf(vmain, "\t\telse if (i_ce)\n");

                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");

                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");

                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");

                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");

        fprintf(vmain, "\n");

        fprintf(vmain, "\n");

        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");

        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");

        fprintf(vmain, "\twire\tbr_sync;\n");

        fprintf(vmain, "\twire\tbr_sync;\n");

        if (bitreverse) {

        if (bitreverse) {

                if (single_clock) {

                if (single_clock) {

                        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_result;\n");

                        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_result;\n");

                        fprintf(vmain, "\tbitreverse\t#(%d,%d)\n\t\trevstage(i_clk, %s,\n", lgsize, nbitsout, resetw.c_str());

                        fprintf(vmain, "\tbitreverse\t#(%d,%d)\n\t\trevstage(i_clk, %s,\n", lgsize, nbitsout, resetw.c_str());

                        fprintf(vmain, "\t\t\t(i_ce & br_start), br_sample,\n");

                        fprintf(vmain, "\t\t\t(i_ce & br_start), br_sample,\n");

                        fprintf(vmain, "\t\t\tbr_o_result, br_sync);\n");

                        fprintf(vmain, "\t\t\tbr_o_result, br_sync);\n");

                } else {

                } else {

                        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");

                        fprintf(vmain, "\twire\t[(2*OWIDTH-1):0]\tbr_o_left, br_o_right;\n");

                        fprintf(vmain, "\tbitreverse\t#(%d,%d)\n\t\trevstage(i_clk, %s,\n", lgsize, nbitsout, resetw.c_str());

                        fprintf(vmain, "\tbitreverse\t#(%d,%d)\n\t\trevstage(i_clk, %s,\n", lgsize, nbitsout, resetw.c_str());

                        fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");

                        fprintf(vmain, "\t\t\t(i_ce & br_start), br_left, br_right,\n");

                        fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");

                        fprintf(vmain, "\t\t\tbr_o_left, br_o_right, br_sync);\n");

        } else if (single_clock) {

        } else if (single_clock) {

                fprintf(vmain, "\tassign\tbr_o_result = br_result;\n");

                fprintf(vmain, "\tassign\tbr_o_result = br_result;\n");

                fprintf(vmain, "\tassign\tbr_sync     = w_s2;\n");

                fprintf(vmain, "\tassign\tbr_sync     = w_s2;\n");

        } else {

        } else {

                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");

                fprintf(vmain, "\tassign\tbr_o_left  = br_left;\n");

                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");

                fprintf(vmain, "\tassign\tbr_o_right = br_right;\n");

                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");

                fprintf(vmain, "\tassign\tbr_sync    = w_s2;\n");

        fprintf(vmain,

        fprintf(vmain,

"\n\n"

"\n\n"

"\t// Last clock: Register our outputs, we\'re done.\n"

"\t// Last clock: Register our outputs, we\'re done.\n"

"\tinitial\to_sync  = 1\'b0;\n");

"\tinitial\to_sync  = 1\'b0;\n");

        if (async_reset)

        if (async_reset)

                fprintf(vmain,

                fprintf(vmain,

"\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");

"\talways @(posedge i_clk, negedge i_areset_n)\n\t\tif (!i_areset_n)\n");

        else {

        else {

                fprintf(vmain,

                fprintf(vmain,

"\talways @(posedge i_clk)\n\t\tif (i_reset)\n");

"\talways @(posedge i_clk)\n\t\tif (i_reset)\n");

        fprintf(vmain,

        fprintf(vmain,

"\t\t\to_sync  <= 1\'b0;\n"

"\t\t\to_sync  <= 1\'b0;\n"

"\t\telse if (i_ce)\n"

"\t\telse if (i_ce)\n"

"\t\t\to_sync  <= br_sync;\n"

"\t\t\to_sync  <= br_sync;\n"

"\n"

"\n"

"\talways @(posedge i_clk)\n"

"\talways @(posedge i_clk)\n"

"\t\tif (i_ce)\n");

"\t\tif (i_ce)\n");

        if (single_clock) {

        if (single_clock) {

                fprintf(vmain, "\t\t\to_result  <= br_o_result;\n");

                fprintf(vmain, "\t\t\to_result  <= br_o_result;\n");

        } else {

        } else {

                fprintf(vmain,

                fprintf(vmain,

"\t\tbegin\n"

"\t\tbegin\n"

"\t\t\to_left  <= br_o_left;\n"

"\t\t\to_left  <= br_o_left;\n"

"\t\t\to_right <= br_o_right;\n"

"\t\t\to_right <= br_o_right;\n"

"\t\tend\n");

"\t\tend\n");

        fprintf(vmain,

        fprintf(vmain,

"\n\n"

"\n\n"

"endmodule\n");

"endmodule\n");

        fclose(vmain);

        fclose(vmain);

                std::string     fname;

                std::string     fname;

                fname = coredir + "/butterfly.v";

                fname = coredir + "/butterfly.v";

                build_butterfly(fname.c_str(), xtracbits, rounding,

                build_butterfly(fname.c_str(), xtracbits, rounding,

                        ckpce, async_reset);

                        ckpce, async_reset);

                fname = coredir + "/hwbfly.v";

                fname = coredir + "/hwbfly.v";

                build_hwbfly(fname.c_str(), xtracbits, rounding,

                build_hwbfly(fname.c_str(), xtracbits, rounding,

                        ckpce, async_reset);

                        ckpce, async_reset);

                        // To make debugging easier, we build both of these

                        // To make debugging easier, we build both of these

                        fname = coredir + "/shiftaddmpy.v";

                        fname = coredir + "/shiftaddmpy.v";

                        build_multiply(fname.c_str());

                        build_multiply(fname.c_str());

                        fname = coredir + "/longbimpy.v";

                        fname = coredir + "/longbimpy.v";

                        build_longbimpy(fname.c_str());

                        build_longbimpy(fname.c_str());

                        fname = coredir + "/bimpy.v";

                        fname = coredir + "/bimpy.v";

                        build_bimpy(fname.c_str());

                        build_bimpy(fname.c_str());

                if ((dbg)&&(dbgstage == 4)) {

                if ((dbg)&&(dbgstage == 4)) {

                        fname = coredir + "/qtrstage_dbg.v";

                        fname = coredir + "/qtrstage_dbg.v";

                        if (single_clock)

                        if (single_clock)

                                build_snglquarters(fname.c_str(), rounding,

                                build_snglquarters(fname.c_str(), rounding,

                                        async_reset, true);

                                        async_reset, true);

                        else

                        else

                                build_dblquarters(fname.c_str(), rounding,

                                build_dblquarters(fname.c_str(), rounding,

                                        async_reset, true);

                                        async_reset, true);

                fname = coredir + "/qtrstage.v";

                fname = coredir + "/qtrstage.v";

                if (single_clock)

                if (single_clock)

                        build_snglquarters(fname.c_str(), rounding,

                        build_snglquarters(fname.c_str(), rounding,

                                        async_reset, false);

                                        async_reset, false);

                else

                else

                        build_dblquarters(fname.c_str(), rounding,

                        build_dblquarters(fname.c_str(), rounding,

                                        async_reset, false);

                                        async_reset, false);

                if (single_clock) {

                if (single_clock) {

                        fname = coredir + "/laststage.v";

                        fname = coredir + "/laststage.v";

                        build_sngllast(fname.c_str(), async_reset);

                        build_sngllast(fname.c_str(), async_reset);

                } else {

                } else {

                        if ((dbg)&&(dbgstage == 2))

                        if ((dbg)&&(dbgstage == 2))

                                fname = coredir + "/laststage_dbg.v";

                                fname = coredir + "/laststage_dbg.v";

                        else

                        else

                                fname = coredir + "/laststage.v";

                                fname = coredir + "/laststage.v";

                        build_dblstage(fname.c_str(), rounding,

                        build_dblstage(fname.c_str(), rounding,

                                async_reset, (dbg)&&(dbgstage==2));

                                async_reset, (dbg)&&(dbgstage==2));

                if (bitreverse) {

                if (bitreverse) {

                        fname = coredir + "/bitreverse.v";

                        fname = coredir + "/bitreverse.v";

                        if (single_clock)

                        if (single_clock)

                                build_snglbrev(fname.c_str(), async_reset);

                                build_snglbrev(fname.c_str(), async_reset);

                        else

                        else

                                build_dblreverse(fname.c_str(), async_reset);

                                build_dblreverse(fname.c_str(), async_reset);

                const   char    *rnd_string = "";

                const   char    *rnd_string = "";

                switch(rounding) {

                switch(rounding) {

                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;

                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;

                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;

                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;

                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;

                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;

                        default:

                        default:

                                rnd_string = "/convround.v"; break;

                                rnd_string = "/convround.v"; break;

                } fname = coredir + rnd_string;

                } fname = coredir + rnd_string;

                switch(rounding) {

                switch(rounding) {

                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;

                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;

                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;

                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;

                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;

                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;

                        default:

                        default:

                                build_convround(fname.c_str()); break;

                                build_convround(fname.c_str()); break;

        if (verbose_flag)

        if (verbose_flag)

                printf("All done -- success\n");

                printf("All done -- success\n");

Browse

Tools

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Diff between revs 36 and 37