OpenCores

Rev 22	Rev 23
Line 59...	Line 59...
`#include <ctype.h>`	`#include <ctype.h>`
`#include <assert.h>`	`#include <assert.h>`

`#define COREDIR "fft-core"`	`#define COREDIR "fft-core"`

	`typedef enum {`
	`RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT`
	`} ROUND_T;`

`const char cpyleft[] =`	`const char cpyleft[] =`
`"///////////////////////////////////////////////////////////////////////////\n"`	`"///////////////////////////////////////////////////////////////////////////\n"`
`"//\n"`	`"//\n"`
`"// Copyright (C) 2015, Gisselquist Technology, LLC\n"`	`"// Copyright (C) 2015, Gisselquist Technology, LLC\n"`
`"//\n"`	`"//\n"`
Line 123...	Line 127...
`// more storage space to look for these values, but without a`	`// more storage space to look for these values, but without a`
`// redesign that's just what we'll deal with.`	`// redesign that's just what we'll deal with.`
`return lgval(bflydelay(nbits, xtra)+3);`	`return lgval(bflydelay(nbits, xtra)+3);`
`}`	`}`

`void build_quarters(const char *fname) {`	`void build_truncator(const char *fname) {`
	`printf("TRUNCATING!\n");`
	`FILE *fp = fopen(fname, "w");`
	`if (NULL == fp) {`
	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
	`perror("O/S Err was:");`
	`return;`
	`}`

	`fprintf(fp,`
	`"///////////////////////////////////////////////////////////////////////////\n"`
	`"//\n"`
	`"// Filename: truncate.v\n"`
	`"// \n"`
	`"// Project: %s\n"`
	`"//\n"`
	`"// Purpose: Truncation is one of several options that can be used\n"`
	`"// internal to the various FFT stages to drop bits from one \n"`
	`"// stage to the next. In general, it is the simplest method\n"`
	`"// of dropping bits, since it requires only a bit selection.\n"`
	`"//\n"`
	`"// This form of rounding isn\'t really that great for FFT\'s,\n"`
	`"// since it tends to produce a DC bias in the result. (Other\n"`
	`"// less pronounced biases may also exist.)\n"`
	`"//\n"`
	`"// This particular version also registers the output with the\n"`
	`"// clock, so there will be a delay of one going through this\n"`
	`"// module. This will keep it in line with the other forms of\n"`
	`"// rounding that can be used.\n"`
	`"//\n"`
	`"//\n%s"`
	`"//\n",`
	`prjname, creator);`

	`fprintf(fp, "%s", cpyleft);`
	`fprintf(fp,`
	`"module truncate(i_clk, i_ce, i_val, o_val);\n"`
	`"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"`
	`"\tinput\t\t\t\t\ti_clk, i_ce;\n"`
	`"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"`
	`"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"`
	`"\n"`
	`"\talways @(posedge i_clk)\n"`
	`"\t\tif (i_ce)\n"`
	`"\t\t\to_val <= i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"`
	`"\n"`
	`"endmodule\n");`
	`}`


	`void build_roundhalfup(const char *fname) {`
	`FILE *fp = fopen(fname, "w");`
	`if (NULL == fp) {`
	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
	`perror("O/S Err was:");`
	`return;`
	`}`

	`fprintf(fp,`
	`"///////////////////////////////////////////////////////////////////////////\n"`
	`"//\n"`
	`"// Filename: roundhalfup.v\n"`
	`"// \n"`
	`"// Project: %s\n"`
	`"//\n"`
	`"// Purpose: Rounding half up is the way I was always taught to round in\n"`
	`"// school. A one half value is added to the result, and then\n"`
	`"// the result is truncated. When used in an FFT, this produces\n"`
	`"// less bias than the truncation method, although a bias still\n"`
	`"// tends to remain.\n"`
	`"//\n"`
	`"//\n%s"`
	`"//\n",`
	`prjname, creator);`

	`fprintf(fp, "%s", cpyleft);`
	`fprintf(fp,`
	`"module roundhalfup(i_clk, i_ce, i_val, o_val);\n"`
	`"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"`
	`"\tinput\t\t\t\t\ti_clk, i_ce;\n"`
	`"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"`
	`"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"`
	`"\n"`
	`"\t// Let's deal with two cases to be as general as we can be here\n"`
	`"\t//\n"`
	`"\t// 1. The desired output would lose no bits at all\n"`
	`"\t// 2. One or more bits would be dropped, so the rounding is simply\n"`
	`"\t//\t\ta matter of adding one to the bit about to be dropped,\n"`
	`"\t//\t\tmoving all halfway and above numbers up to the next\n"`
	`"\t//\t\tvalue.\n"`
	`"\tgenerate\n"`
	`"\tif (IWID-SHIFT == OWID)\n"`
	`"\tbegin // No truncation or rounding, output drops no bits\n"`
	`"\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"`
	`"\n"`
	`"\tend else // if (IWID-SHIFT-1 >= OWID)\n"`
	`"\tbegin // Output drops one bit, can only add one or ... not.\n"`
	`"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"`
	`"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"`
	`"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"`
	`"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"`
	`"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"`
	`"\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\t\tif (i_ce)\n"`
	`"\t\t\tbegin\n"`
	`"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"`
	`"\t\t\t\t\to_val <= truncated_value;\n"`
	`"\t\t\t\telse\n"`
	`"\t\t\t\t\to_val <= rounded_up; // even value\n"`
	`"\t\t\tend\n"`
	`"\n"`
	`"\tend\n"`
	`"\tendgenerate\n"`
	`"\n"`
	`"endmodule\n");`
	`}`

	`void build_roundfromzero(const char *fname) {`
	`FILE *fp = fopen(fname, "w");`
	`if (NULL == fp) {`
	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
	`perror("O/S Err was:");`
	`return;`
	`}`

	`fprintf(fp,`
	`"///////////////////////////////////////////////////////////////////////////\n"`
	`"//\n"`
	`"// Filename: roundfromzero.v\n"`
	`"// \n"`
	`"// Project: %s\n"`
	`"//\n"`
	`"// Purpose: Truncation is one of several options that can be used\n"`
	`"// internal to the various FFT stages to drop bits from one \n"`
	`"// stage to the next. In general, it is the simplest method\n"`
	`"// of dropping bits, since it requires only a bit selection.\n"`
	`"//\n"`
	`"// This form of rounding isn\'t really that great for FFT\'s,\n"`
	`"// since it tends to produce a DC bias in the result. (Other\n"`
	`"// less pronounced biases may also exist.)\n"`
	`"//\n"`
	`"// This particular version also registers the output with the\n"`
	`"// clock, so there will be a delay of one going through this\n"`
	`"// module. This will keep it in line with the other forms of\n"`
	`"// rounding that can be used.\n"`
	`"//\n"`
	`"//\n%s"`
	`"//\n",`
	`prjname, creator);`

	`fprintf(fp, "%s", cpyleft);`
	`fprintf(fp,`
	`"module convround(i_clk, i_ce, i_val, o_val);\n"`
	`"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"`
	`"\tinput\t\t\t\t\ti_clk, i_ce;\n"`
	`"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"`
	`"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"`
	`"\n"`
	`"\t// Let's deal with three cases to be as general as we can be here\n"`
	`"\t//\n"`
	`"\t//\t1. The desired output would lose no bits at all\n"`
	`"\t//\t2. One bit would be dropped, so the rounding is simply\n"`
	`"\t//\t\tadjusting the value to be the closer to zero in\n"`
	`"\t//\t\tcases of being halfway between two. If identically\n"`
	`"\t//\t\tequal to a number, we just leave it as is.\n"`
	`"\t//\t3. Two or more bits would be dropped. In this case, we round\n"`
	`"\t//\t\tnormally unless we are rounding a value of exactly\n"`
	`"\t//\t\thalfway between the two. In the halfway case, we\n"`
	`"\t//\t\tround away from zero.\n"`
	`"\tgenerate\n"`
	`"\tif (IWID-SHIFT == OWID)\n"`
	`"\tbegin // No truncation or rounding, output drops no bits\n"`
	`"\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"`
	`"\n"`
	`"\tend else if (IWID-SHIFT-1 == OWID)\n"`
	`"\tbegin // Output drops one bit, can only add one or ... not.\n"`
	`"\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"`
	`"\t\twire\t\t\tsign_bit, first_lost_bit;\n"`
	`"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"`
	`"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"`
	`"\t\tassign\tfirst_lost_bit = i_val[0];\n"`
	`"\t\tassign\tsign_bit = i_val[(IWID-1)];\n"`
	`"\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\t\tif (i_ce)\n"`
	`"\t\t\tbegin\n"`
	`"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"`
	`"\t\t\t\t\to_val <= truncated_value;\n"`
	`"\t\t\t\telse if (sign_bit)\n"`
	`"\t\t\t\t\to_val <= truncated_value;\n"`
	`"\t\t\t\telse\n"`
	`"\t\t\t\t\to_val <= rounded_up;\n"`
	`"\t\t\tend\n"`
	`"\n"`
	`"\tend else // If there's more than one bit we are dropping\n"`
	`"\tbegin\n"`
	`"\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"`
	`"\t\twire\t\t\tsign_bit, first_lost_bit;\n"`
	`"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"`
	`"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"`
	`"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"`
	`"\t\tassign\tsign_bit = i_val[(IWID-1)];\n"`
	`"\n"`
	`"\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"`
	`"\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"`
	`"\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\t\tif (i_ce)\n"`
	`"\t\t\tbegin\n"`
	`"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"`
	`"\t\t\t\t\to_val <= truncated_value;\n"`
	`"\t\t\t\telse if (\|other_lost_bits) // Round up to\n"`
	`"\t\t\t\t\to_val <= rounded_up; // closest value\n"`
	`"\t\t\t\telse if (sign_bit)\n"`
	`"\t\t\t\t\to_val <= truncated_value;\n"`
	`"\t\t\t\telse\n"`
	`"\t\t\t\t\to_val <= rounded_up;\n"`
	`"\t\t\tend\n"`
	`"\tend\n"`
	`"\tendgenerate\n"`
	`"\n"`
	`"endmodule\n");`
	`}`

	`void build_convround(const char *fname) {`
	`printf("CONVERGENT--ROUNDING!\n");`
	`FILE *fp = fopen(fname, "w");`
	`if (NULL == fp) {`
	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
	`perror("O/S Err was:");`
	`return;`
	`}`

	`fprintf(fp,`
	`"///////////////////////////////////////////////////////////////////////////\n"`
	`"//\n"`
	`"// Filename: convround.v\n"`
	`"// \n"`
	`"// Project: %s\n"`
	`"//\n"`
	`"// Purpose: A convergent rounding routine, also known as banker\'s\n"`
	`"// rounding, Dutch rounding, Gaussian rounding, unbiased\n"`
	`"// rounding, or ... more, at least according to Wikipedia.\n"`
	`"//\n"`
	`"// This form of rounding works by rounding, when the direction\n"`
	`"// is in question, towards the nearest even value.\n"`
	`"//\n"`
	`"//\n%s"`
	`"//\n",`
	`prjname, creator);`

	`fprintf(fp, "%s", cpyleft);`
	`fprintf(fp,`
	`"module convround(i_clk, i_ce, i_val, o_val);\n"`
	`"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"`
	`"\tinput\t\t\t\t\ti_clk, i_ce;\n"`
	`"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"`
	`"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"`
	`"\n"`
	`"\t// Let's deal with three cases to be as general as we can be here\n"`
	`"\t//\n"`
	`"\t//\t1. The desired output would lose no bits at all\n"`
	`"\t//\t2. One bit would be dropped, so the rounding is simply\n"`
	`"\t//\t\tadjusting the value to be the nearest even number in\n"`
	`"\t//\t\tcases of being halfway between two. If identically\n"`
	`"\t//\t\tequal to a number, we just leave it as is.\n"`
	`"\t//\t3. Two or more bits would be dropped. In this case, we round\n"`
	`"\t//\t\tnormally unless we are rounding a value of exactly\n"`
	`"\t//\t\thalfway between the two. In the halfway case we round\n"`
	`"\t//\t\tto the nearest even number.\n"`
	`"\tgenerate\n"`
	`"\tif (IWID-SHIFT == OWID)\n"`
	`"\tbegin // No truncation or rounding, output drops no bits\n"`
	`"\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"`
	`"\n"`
	`"\tend else if (IWID-SHIFT-1 == OWID)\n"`
	`"\tbegin // Output drops one bit, can only add one or ... not.\n"`
	`"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"`
	`"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"`
	`"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"`
	`"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"`
	`"\t\tassign\tlast_valid_bit = truncated_value[0];\n"`
	`"\t\tassign\tfirst_lost_bit = i_val[0];\n"`
	`"\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\t\tif (i_ce)\n"`
	`"\t\t\tbegin\n"`
	`"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"`
	`"\t\t\t\t\to_val <= truncated_value;\n"`
	`"\t\t\t\telse if (last_valid_bit)// Round up to nearest\n"`
	`"\t\t\t\t\to_val <= rounded_up; // even value\n"`
	`"\t\t\t\telse // else round down to the nearest\n"`
	`"\t\t\t\t\to_val <= truncated_value; // even value\n"`
	`"\t\t\tend\n"`
	`"\n"`
	`"\tend else // If there's more than one bit we are dropping\n"`
	`"\tbegin\n"`
	`"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"`
	`"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"`
	`"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"`
	`"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"`
	`"\t\tassign\tlast_valid_bit = truncated_value[0];\n"`
	`"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"`
	`"\n"`
	`"\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"`
	`"\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"`
	`"\n"`
	`"\t\talways @(posedge i_clk)\n"`
	`"\t\t\tif (i_ce)\n"`
	`"\t\t\tbegin\n"`
	`"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"`
	`"\t\t\t\t\to_val <= truncated_value;\n"`
	`"\t\t\t\telse if (\|other_lost_bits) // Round up to\n"`
	`"\t\t\t\t\to_val <= rounded_up; // closest value\n"`
	`"\t\t\t\telse if (last_valid_bit) // Round up to\n"`
	`"\t\t\t\t\to_val <= rounded_up; // nearest even\n"`
	`"\t\t\t\telse // else round down to nearest even\n"`
	`"\t\t\t\t\to_val <= truncated_value;\n"`
	`"\t\t\tend\n"`
	`"\tend\n"`
	`"\tendgenerate\n"`
	`"\n"`
	`"endmodule\n");`
	`}`

	`void build_quarters(const char *fname, ROUND_T rounding) {`
`FILE *fp = fopen(fname, "w");`	`FILE *fp = fopen(fname, "w");`
`if (NULL == fp) {`	`if (NULL == fp) {`
`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`	`fprintf(stderr, "Could not open \'%s\' for writing\n", fname);`
`perror("O/S Err was:");`	`perror("O/S Err was:");`
`return;`	`return;`
`}`	`}`
	`const char *rnd_string;`
	`if (rounding == RND_TRUNCATE)`
	`rnd_string = "truncate";`
	`else if (rounding == RND_FROMZERO)`
	`rnd_string = "roundfromzero";`
	`else if (rounding == RND_HALFUP)`
	`rnd_string = "roundhalfup";`
	`else`
	`rnd_string = "convround";`


`fprintf(fp,`	`fprintf(fp,`
`"///////////////////////////////////////////////////////////////////////////\n"`	`"///////////////////////////////////////////////////////////////////////////\n"`
`"//\n"`	`"//\n"`
`"// Filename: qtrstage.v\n"`	`"// Filename: qtrstage.v\n"`
Line 154...	Line 500...
`"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"`	`"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"`
`"\tparameter IWIDTH=16, OWIDTH=IWIDTH+1;\n"`	`"\tparameter IWIDTH=16, OWIDTH=IWIDTH+1;\n"`
`"\t// Parameters specific to the core that should be changed when this\n"`	`"\t// Parameters specific to the core that should be changed when this\n"`
`"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller \n"`	`"\t// core is built ... Note that the minimum LGSPAN is 2. Smaller \n"`
`"\t// spans must use the fftdoubles stage.\n"`	`"\t// spans must use the fftdoubles stage.\n"`
`"\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0,ROUND=1;\n"`	`"\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0;\n"`
`"\tinput\t i_clk, i_rst, i_ce, i_sync;\n"`	`"\tinput\t i_clk, i_rst, i_ce, i_sync;\n"`
`"\tinput\t [(2*IWIDTH-1):0] i_data;\n"`	`"\tinput\t [(2*IWIDTH-1):0] i_data;\n"`
`"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"`	`"\toutput\treg [(2*OWIDTH-1):0] o_data;\n"`
`"\toutput\treg o_sync;\n"`	`"\toutput\treg o_sync;\n"`
`"\t\n");`	`"\t\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\treg\t wait_for_sync;\n"`	`"\treg\t wait_for_sync;\n"`
`"\treg\t[2:0] pipeline;\n"`	`"\treg\t[3:0] pipeline;\n"`
`"\n"`	`"\n"`
`"\treg\t[(IWIDTH):0] sum_r, sum_i, diff_r, diff_i;\n"`	`"\treg\t[(IWIDTH):0] sum_r, sum_i, diff_r, diff_i;\n"`
`"\twire\t[(IWIDTH):0] n_diff_r, n_diff_i;\n"`	`"\twire\t[(IWIDTH):0] n_diff_r, n_diff_i;\n"`
`"\tassign n_diff_r = -diff_r;\n"`	`"\tassign n_diff_r = -diff_r;\n"`
`"\tassign n_diff_i = -diff_i;\n"`	`"\tassign n_diff_i = -diff_i;\n"`
`"\n"`	`"\n"`
`"\treg\t[(2*OWIDTH-1):0] ob_a;\n"`	`"\treg\t[(2*OWIDTH-1):0]\tob_a;\n"`
`"\twire\t[(2*OWIDTH-1):0] ob_b;\n"`	`"\twire\t[(2*OWIDTH-1):0]\tob_b;\n"`
`"\treg\t[(OWIDTH-1):0] ob_b_r, ob_b_i;\n"`	`"\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"`
`"\tassign ob_b = { ob_b_r, ob_b_i };\n"`	`"\tassign\tob_b = { ob_b_r, ob_b_i };\n"`
`"\n"`	`"\n"`
`"\treg\t[(LGWIDTH-1):0] iaddr;\n"`	`"\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"`
`"\treg\t[(2*IWIDTH-1):0] imem;\n"`	`"\treg\t[(2*IWIDTH-1):0]\timem;\n"`
`"\n"`	`"\n"`
`"\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"`	`"\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"`
`"\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"`	`"\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"`
`"\tassign\timem_i = imem[(IWIDTH-1):0];\n"`	`"\tassign\timem_i = imem[(IWIDTH-1):0];\n"`
`"\n"`	`"\n"`
Line 188...	Line 534...
`"\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"`	`"\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"`
`"\n"`	`"\n"`
`"\treg [(2*OWIDTH-1):0] omem;\n"`	`"\treg [(2*OWIDTH-1):0] omem;\n"`
`"\n");`	`"\n");`
`fprintf(fp,`	`fprintf(fp,`
`"\twire [(IWIDTH-1):0] rnd;\n"`	`"\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");`
`"\tgenerate\n"`	`fprintf(fp,`
`"\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"`	`"\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");`
`"\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"`	`fprintf(fp,`
`"\telse\n"`	`"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"`
`"\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"`	`"\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);`
`"\tendgenerate\n"`	`fprintf(fp,`
`"\n"`	`"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"`
	`"\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);`
	`fprintf(fp,`
	`"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"`
	`"\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);`
	`fprintf(fp,`
	`"\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"`
	`"\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);`
	`fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"`
	`"\tassign n_rnd_diff_i = - rnd_diff_i;\n");`
	`/*`
	`fprintf(fp,`
	`"\twire [(IWIDTH-1):0] rnd;\n"`
	`"\tgenerate\n"`
	`"\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"`
	`"\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"`
	`"\telse\n"`
	`"\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"`
	`"\tendgenerate\n"`
	`"\n"`
	`*/`
	`fprintf(fp,`
`"\talways @(posedge i_clk)\n"`	`"\talways @(posedge i_clk)\n"`
`"\t\tif (i_rst)\n"`	`"\t\tif (i_rst)\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\twait_for_sync <= 1'b1;\n"`	`"\t\t\twait_for_sync <= 1'b1;\n"`
`"\t\t\tiaddr <= 0;\n"`	`"\t\t\tiaddr <= 0;\n"`
`"\t\t\tpipeline <= 3'b000;\n"`	`"\t\tend else if ((i_ce)&&((~wait_for_sync)\|\|(i_sync)))\n"`
`"\t\tend\n"`
`"\t\telse if ((i_ce)&&((~wait_for_sync)\|\|(i_sync)))\n"`
`"\t\tbegin\n"`	`"\t\tbegin\n"`
`"\t\t\t// Always\n"`
`"\t\t\timem <= i_data;\n"`	`"\t\t\timem <= i_data;\n"`
`"\t\t\tiaddr <= iaddr + 1;\n"`	`"\t\t\tiaddr <= iaddr + 1;\n"`
`"\t\t\twait_for_sync <= 1'b0;\n"`	`"\t\t\twait_for_sync <= 1'b0;\n"`
`"\n"`	`"\t\tend\n\n");`
`"\t\t\t// In sequence, clock = 0\n"`	`fprintf(fp,`
`"\t\t\tif (iaddr[0])\n"`	`"\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"`
`"\t\t\tbegin\n"`	`"\t// Why not? Because iaddr will always be zero until after the\n"`
`"\t\t\t\tsum_r <= imem_r + i_data_r + rnd;\n"`	`"\t// first i_ce, so we are safe.\n"`
`"\t\t\t\tsum_i <= imem_i + i_data_i + rnd;\n"`	`"\talways\t@(posedge i_clk)\n"`
`"\t\t\t\tdiff_r <= imem_r - i_data_r + rnd;\n"`	`"\t\tif (i_rst)\n"`
`"\t\t\t\tdiff_i <= imem_i - i_data_i + rnd;\n"`	`"\t\t\tpipeline <= 4'h0;\n"`
`"\n"`	`"\t\telse if (i_ce) // is our pipeline process full? Which stages?\n"`
`"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b1 };\n"`	`"\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");`
`"\t\t\tend else\n"`	`fprintf(fp,`
`"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"`	`"\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"`
`"\n"`	`"\talways\t@(posedge i_clk)\n"`
`"\t\t\t// In sequence, clock = 1\n"`	`"\t\tif ((i_ce)&&(iaddr[0]))\n"`
`"\t\t\tif (pipeline[1])\n"`	`"\t\tbegin\n"`
	`"\t\t\tsum_r <= imem_r + i_data_r;\n"`
	`"\t\t\tsum_i <= imem_i + i_data_i;\n"`
	`"\t\t\tdiff_r <= imem_r - i_data_r;\n"`
	`"\t\t\tdiff_i <= imem_i - i_data_i;\n"`
	`"\t\tend\n\n");`
	`fprintf(fp,`
	`"\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");`
	`fprintf(fp,`
	`"\t// Now for pipeline[2]\n"`
	`"\talways\t@(posedge i_clk)\n"`
	`"\t\tif ((i_ce)&&(pipeline[2]))\n"`
	`"\t\tbegin\n"`
	`"\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"`
	`"\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"`
	`"\t\t\tif (ODD == 0)\n"`
`"\t\t\tbegin\n"`	`"\t\t\tbegin\n"`
`"\t\t\t\tob_a <= { sum_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)],\n"`	`"\t\t\t\tob_b_r <= rnd_diff_r;\n"`
`"\t\t\t\t\t\tsum_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)] };\n"`	`"\t\t\t\tob_b_i <= rnd_diff_i;\n"`
`"\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"`	`"\t\t\tend else if (INVERSE==0) begin\n"`
`"\t\t\t\tif (ODD == 0)\n"`	`"\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"`
`"\t\t\t\tbegin\n"`	`"\t\t\t\tob_b_r <= rnd_diff_i;\n"`
`"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\t\t\t\tob_b_i <= n_rnd_diff_r;\n"`
`"\t\t\t\t\tob_b_i <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`	`"\t\t\tend else begin\n"`
`// "\t\t\t\t\tob_b_r <= { (OWIDTH) {1'b0} };\n"`	`"\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"`
`// "\t\t\t\t\tob_b_i <= { (OWIDTH) {1'b0} };\n"`	`"\t\t\t\tob_b_r <= n_rnd_diff_i;\n"`
`"\t\t\t\tend else if (INVERSE==0) begin\n"`	`"\t\t\t\tob_b_i <= rnd_diff_r;\n"`
`"\t\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"`
`"\t\t\t\t\tob_b_r <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`
`"\t\t\t\t\tob_b_i <= n_diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"`

Line 59...

#include <ctype.h>

#include <ctype.h>

#include <assert.h>

#include <assert.h>

#define COREDIR "fft-core"

#define COREDIR "fft-core"

typedef enum {

        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT

} ROUND_T;

const char      cpyleft[] =

const char      cpyleft[] =

"///////////////////////////////////////////////////////////////////////////\n"

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Copyright (C) 2015, Gisselquist Technology, LLC\n"

"// Copyright (C) 2015, Gisselquist Technology, LLC\n"

"//\n"

"//\n"

Line 123...

Line 127...

        // more storage space to look for these values, but without a

        // more storage space to look for these values, but without a

        // redesign that's just what we'll deal with.

        // redesign that's just what we'll deal with.

        return lgval(bflydelay(nbits, xtra)+3);

        return lgval(bflydelay(nbits, xtra)+3);

void    build_quarters(const char *fname) {

void    build_truncator(const char *fname) {

        printf("TRUNCATING!\n");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                return;

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"// Filename:   truncate.v\n"

"//             \n"

"// Project:    %s\n"

"//\n"

"// Purpose:    Truncation is one of several options that can be used\n"

"//             internal to the various FFT stages to drop bits from one \n"

"//             stage to the next.  In general, it is the simplest method\n"

"//             of dropping bits, since it requires only a bit selection.\n"

"//\n"

"//             This form of rounding isn\'t really that great for FFT\'s,\n"

"//             since it tends to produce a DC bias in the result.  (Other\n"

"//             less pronounced biases may also exist.)\n"

"//\n"

"//             This particular version also registers the output with the\n"

"//             clock, so there will be a delay of one going through this\n"

"//             module.  This will keep it in line with the other forms of\n"

"//             rounding that can be used.\n"

"//\n"

"//\n%s"

"//\n",

                prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

"module truncate(i_clk, i_ce, i_val, o_val);\n"

        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"

        "\tinput\t\t\t\t\ti_clk, i_ce;\n"

        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"

        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"

"\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\t\to_val <= i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"

"\n"

"endmodule\n");

void    build_roundhalfup(const char *fname) {

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                return;

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"// Filename:   roundhalfup.v\n"

"//             \n"

"// Project:    %s\n"

"//\n"

"// Purpose:    Rounding half up is the way I was always taught to round in\n"

"//             school.  A one half value is added to the result, and then\n"

"//             the result is truncated.  When used in an FFT, this produces\n"

"//             less bias than the truncation method, although a bias still\n"

"//             tends to remain.\n"

"//\n"

"//\n%s"

"//\n",

                prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

"module roundhalfup(i_clk, i_ce, i_val, o_val);\n"

        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"

        "\tinput\t\t\t\t\ti_clk, i_ce;\n"

        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"

        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"

"\n"

        "\t// Let's deal with two cases to be as general as we can be here\n"

        "\t//\n"

        "\t//   1. The desired output would lose no bits at all\n"

        "\t//   2. One or more bits would be dropped, so the rounding is simply\n"

        "\t//\t\ta matter of adding one to the bit about to be dropped,\n"

        "\t//\t\tmoving all halfway and above numbers up to the next\n"

        "\t//\t\tvalue.\n"

        "\tgenerate\n"

        "\tif (IWID-SHIFT == OWID)\n"

        "\tbegin // No truncation or rounding, output drops no bits\n"

"\n"

                "\t\talways @(posedge i_clk)\n"

                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"

"\n"

        "\tend else // if (IWID-SHIFT-1 >= OWID)\n"

        "\tbegin // Output drops one bit, can only add one or ... not.\n"

                "\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"

                "\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"

                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"

                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"

                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"

"\n"

                "\t\talways @(posedge i_clk)\n"

                "\t\t\tif (i_ce)\n"

                "\t\t\tbegin\n"

                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"

                        "\t\t\t\t\to_val <= truncated_value;\n"

                        "\t\t\t\telse\n"

                        "\t\t\t\t\to_val <= rounded_up; // even value\n"

                "\t\t\tend\n"

"\n"

        "\tend\n"

        "\tendgenerate\n"

"\n"

"endmodule\n");

void    build_roundfromzero(const char *fname) {

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                return;

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"// Filename:   roundfromzero.v\n"

"//             \n"

"// Project:    %s\n"

"//\n"

"// Purpose:    Truncation is one of several options that can be used\n"

"//             internal to the various FFT stages to drop bits from one \n"

"//             stage to the next.  In general, it is the simplest method\n"

"//             of dropping bits, since it requires only a bit selection.\n"

"//\n"

"//             This form of rounding isn\'t really that great for FFT\'s,\n"

"//             since it tends to produce a DC bias in the result.  (Other\n"

"//             less pronounced biases may also exist.)\n"

"//\n"

"//             This particular version also registers the output with the\n"

"//             clock, so there will be a delay of one going through this\n"

"//             module.  This will keep it in line with the other forms of\n"

"//             rounding that can be used.\n"

"//\n"

"//\n%s"

"//\n",

                prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

"module convround(i_clk, i_ce, i_val, o_val);\n"

        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"

        "\tinput\t\t\t\t\ti_clk, i_ce;\n"

        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"

        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"

"\n"

        "\t// Let's deal with three cases to be as general as we can be here\n"

        "\t//\n"

        "\t//\t1. The desired output would lose no bits at all\n"

        "\t//\t2. One bit would be dropped, so the rounding is simply\n"

        "\t//\t\tadjusting the value to be the closer to zero in\n"

        "\t//\t\tcases of being halfway between two.  If identically\n"

        "\t//\t\tequal to a number, we just leave it as is.\n"

        "\t//\t3. Two or more bits would be dropped.  In this case, we round\n"

        "\t//\t\tnormally unless we are rounding a value of exactly\n"

        "\t//\t\thalfway between the two.  In the halfway case, we\n"

        "\t//\t\tround away from zero.\n"

        "\tgenerate\n"

        "\tif (IWID-SHIFT == OWID)\n"

        "\tbegin // No truncation or rounding, output drops no bits\n"

"\n"

                "\t\talways @(posedge i_clk)\n"

                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"

"\n"

        "\tend else if (IWID-SHIFT-1 == OWID)\n"

        "\tbegin // Output drops one bit, can only add one or ... not.\n"

        "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"

        "\t\twire\t\t\tsign_bit, first_lost_bit;\n"

        "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"

        "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"

        "\t\tassign\tfirst_lost_bit = i_val[0];\n"

        "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"

"\n"

        "\t\talways @(posedge i_clk)\n"

                "\t\t\tif (i_ce)\n"

                "\t\t\tbegin\n"

                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"

                                "\t\t\t\t\to_val <= truncated_value;\n"

                        "\t\t\t\telse if (sign_bit)\n"

                                "\t\t\t\t\to_val <= truncated_value;\n"

                        "\t\t\t\telse\n"

                                "\t\t\t\t\to_val <= rounded_up;\n"

                "\t\t\tend\n"

"\n"

        "\tend else // If there's more than one bit we are dropping\n"

        "\tbegin\n"

                "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"

                "\t\twire\t\t\tsign_bit, first_lost_bit;\n"

                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"

                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"

                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"

                "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"

"\n"

                "\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"

                "\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"

"\n"

                "\t\talways @(posedge i_clk)\n"

                        "\t\t\tif (i_ce)\n"

                        "\t\t\tbegin\n"

                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"

                                "\t\t\t\t\to_val <= truncated_value;\n"

                        "\t\t\t\telse if (|other_lost_bits) // Round up to\n"

                                "\t\t\t\t\to_val <= rounded_up; // closest value\n"

                        "\t\t\t\telse if (sign_bit)\n"

                                "\t\t\t\t\to_val <= truncated_value;\n"

                        "\t\t\t\telse\n"

                                "\t\t\t\t\to_val <= rounded_up;\n"

                        "\t\t\tend\n"

        "\tend\n"

        "\tendgenerate\n"

"\n"

"endmodule\n");

void    build_convround(const char *fname) {

        printf("CONVERGENT--ROUNDING!\n");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                return;

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"// Filename:   convround.v\n"

"//             \n"

"// Project:    %s\n"

"//\n"

"// Purpose:    A convergent rounding routine, also known as banker\'s\n"

"//             rounding, Dutch rounding, Gaussian rounding, unbiased\n"

"//             rounding, or ... more, at least according to Wikipedia.\n"

"//\n"

"//             This form of rounding works by rounding, when the direction\n"

"//             is in question, towards the nearest even value.\n"

"//\n"

"//\n%s"

"//\n",

                prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

"module convround(i_clk, i_ce, i_val, o_val);\n"

"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"

"\tinput\t\t\t\t\ti_clk, i_ce;\n"

"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"

"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"

"\n"

"\t// Let's deal with three cases to be as general as we can be here\n"

"\t//\n"

"\t//\t1. The desired output would lose no bits at all\n"

"\t//\t2. One bit would be dropped, so the rounding is simply\n"

"\t//\t\tadjusting the value to be the nearest even number in\n"

"\t//\t\tcases of being halfway between two.  If identically\n"

"\t//\t\tequal to a number, we just leave it as is.\n"

"\t//\t3. Two or more bits would be dropped.  In this case, we round\n"

"\t//\t\tnormally unless we are rounding a value of exactly\n"

"\t//\t\thalfway between the two.  In the halfway case we round\n"

"\t//\t\tto the nearest even number.\n"

"\tgenerate\n"

"\tif (IWID-SHIFT == OWID)\n"

"\tbegin // No truncation or rounding, output drops no bits\n"

"\n"

"\t\talways @(posedge i_clk)\n"

"\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"

"\n"

"\tend else if (IWID-SHIFT-1 == OWID)\n"

"\tbegin // Output drops one bit, can only add one or ... not.\n"

"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"

"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"

"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"

"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"

"\t\tassign\tlast_valid_bit = truncated_value[0];\n"

"\t\tassign\tfirst_lost_bit = i_val[0];\n"

"\n"

"\t\talways @(posedge i_clk)\n"

"\t\t\tif (i_ce)\n"

"\t\t\tbegin\n"

"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"

"\t\t\t\t\to_val <= truncated_value;\n"

"\t\t\t\telse if (last_valid_bit)// Round up to nearest\n"

"\t\t\t\t\to_val <= rounded_up; // even value\n"

"\t\t\t\telse // else round down to the nearest\n"

"\t\t\t\t\to_val <= truncated_value; // even value\n"

"\t\t\tend\n"

"\n"

"\tend else // If there's more than one bit we are dropping\n"

"\tbegin\n"

"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"

"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"

"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"

"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"

"\t\tassign\tlast_valid_bit = truncated_value[0];\n"

"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"

"\n"

"\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"

"\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"

"\n"

"\t\talways @(posedge i_clk)\n"

"\t\t\tif (i_ce)\n"

"\t\t\tbegin\n"

"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"

"\t\t\t\t\to_val <= truncated_value;\n"

"\t\t\t\telse if (|other_lost_bits) // Round up to\n"

"\t\t\t\t\to_val <= rounded_up; // closest value\n"

"\t\t\t\telse if (last_valid_bit) // Round up to\n"

"\t\t\t\t\to_val <= rounded_up; // nearest even\n"

"\t\t\t\telse   // else round down to nearest even\n"

"\t\t\t\t\to_val <= truncated_value;\n"

"\t\t\tend\n"

"\tend\n"

"\tendgenerate\n"

"\n"

"endmodule\n");

void    build_quarters(const char *fname, ROUND_T rounding) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        const   char    *rnd_string;

        if (rounding == RND_TRUNCATE)

                rnd_string = "truncate";

        else if (rounding == RND_FROMZERO)

                rnd_string = "roundfromzero";

        else if (rounding == RND_HALFUP)

                rnd_string = "roundhalfup";

        else

                rnd_string = "convround";

        fprintf(fp,

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Filename:   qtrstage.v\n"

"// Filename:   qtrstage.v\n"

Line 154...

Line 500...

"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"

"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"

        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"

        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"

        "\t// Parameters specific to the core that should be changed when this\n"

        "\t// Parameters specific to the core that should be changed when this\n"

        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"

        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"

        "\t// spans must use the fftdoubles stage.\n"

        "\t// spans must use the fftdoubles stage.\n"

        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0,ROUND=1;\n"

        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0;\n"

        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"

        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"

        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"

        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"

        "\toutput\treg                          o_sync;\n"

        "\toutput\treg                          o_sync;\n"

        "\t\n");

        "\t\n");

        fprintf(fp,

        fprintf(fp,

        "\treg\t        wait_for_sync;\n"

        "\treg\t        wait_for_sync;\n"

        "\treg\t[2:0]   pipeline;\n"

        "\treg\t[3:0]   pipeline;\n"

"\n"

"\n"

        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"

        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"

        "\twire\t[(IWIDTH):0]   n_diff_r, n_diff_i;\n"

        "\twire\t[(IWIDTH):0]   n_diff_r, n_diff_i;\n"

        "\tassign n_diff_r = -diff_r;\n"

        "\tassign n_diff_r = -diff_r;\n"

        "\tassign n_diff_i = -diff_i;\n"

        "\tassign n_diff_i = -diff_i;\n"

"\n"

"\n"

        "\treg\t[(2*OWIDTH-1):0]        ob_a;\n"

        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"

        "\twire\t[(2*OWIDTH-1):0]       ob_b;\n"

        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"

        "\treg\t[(OWIDTH-1):0]          ob_b_r, ob_b_i;\n"

        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"

        "\tassign       ob_b = { ob_b_r, ob_b_i };\n"

        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"

"\n"

"\n"

        "\treg\t[(LGWIDTH-1):0]         iaddr;\n"

        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"

        "\treg\t[(2*IWIDTH-1):0]        imem;\n"

        "\treg\t[(2*IWIDTH-1):0]\timem;\n"

"\n"

"\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"

        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"

        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"

        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"

        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"

"\n"

"\n"

Line 188...

Line 534...

        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"

        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"

"\n"

"\n"

        "\treg  [(2*OWIDTH-1):0]        omem;\n"

        "\treg  [(2*OWIDTH-1):0]        omem;\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\twire [(IWIDTH-1):0]  rnd;\n"

        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");

        "\tgenerate\n"

        fprintf(fp,

        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"

        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");

                "\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"

        fprintf(fp,

        "\telse\n"

        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"

                "\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"

        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);

        "\tendgenerate\n"

        fprintf(fp,

"\n"

        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"

        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);

        fprintf(fp,

        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"

        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);

        fprintf(fp,

        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"

        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);

        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"

                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");

/*

        fprintf(fp,

        "\twire [(IWIDTH-1):0]  rnd;\n"

        "\tgenerate\n"

        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"

                "\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"

        "\telse\n"

                "\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"

        "\tendgenerate\n"

"\n"

*/

        fprintf(fp,

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_rst)\n"

                "\t\tif (i_rst)\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\twait_for_sync <= 1'b1;\n"

                        "\t\t\twait_for_sync <= 1'b1;\n"

                        "\t\t\tiaddr <= 0;\n"

                        "\t\t\tiaddr <= 0;\n"

                        "\t\t\tpipeline <= 3'b000;\n"

                "\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

                "\t\tend\n"

                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\t// Always\n"

                        "\t\t\timem <= i_data;\n"

                        "\t\t\timem <= i_data;\n"

                        "\t\t\tiaddr <= iaddr + 1;\n"

                        "\t\t\tiaddr <= iaddr + 1;\n"

                        "\t\t\twait_for_sync <= 1'b0;\n"

                        "\t\t\twait_for_sync <= 1'b0;\n"

"\n"

                "\t\tend\n\n");

                        "\t\t\t// In sequence, clock = 0\n"

        fprintf(fp,

                        "\t\t\tif (iaddr[0])\n"

        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"

                        "\t\t\tbegin\n"

        "\t// Why not?  Because iaddr will always be zero until after the\n"

                                "\t\t\t\tsum_r  <= imem_r + i_data_r + rnd;\n"

        "\t// first i_ce, so we are safe.\n"

                                "\t\t\t\tsum_i  <= imem_i + i_data_i + rnd;\n"

        "\talways\t@(posedge i_clk)\n"

                                "\t\t\t\tdiff_r <= imem_r - i_data_r + rnd;\n"

                "\t\tif (i_rst)\n"

                                "\t\t\t\tdiff_i <= imem_i - i_data_i + rnd;\n"

                        "\t\t\tpipeline <= 4'h0;\n"

"\n"

                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"

                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b1 };\n"

                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");

                        "\t\t\tend else\n"

        fprintf(fp,

                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"

        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"

"\n"

        "\talways\t@(posedge i_clk)\n"

                        "\t\t\t// In sequence, clock = 1\n"

                "\t\tif ((i_ce)&&(iaddr[0]))\n"

                        "\t\t\tif (pipeline[1])\n"

                "\t\tbegin\n"

                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"

                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"

                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"

                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"

                "\t\tend\n\n");

        fprintf(fp,

        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");

        fprintf(fp,

        "\t// Now for pipeline[2]\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif ((i_ce)&&(pipeline[2]))\n"

                "\t\tbegin\n"

                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"

                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"

                        "\t\t\tif (ODD == 0)\n"

                        "\t\t\tbegin\n"

                        "\t\t\tbegin\n"

"\t\t\t\tob_a <= { sum_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)],\n"

                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"

        "\t\t\t\t\t\tsum_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)] };\n"

                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"

                                "\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"

                        "\t\t\tend else if (INVERSE==0) begin\n"

                                "\t\t\t\tif (ODD == 0)\n"

                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"

                                "\t\t\t\tbegin\n"

                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"

"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"

"\t\t\t\t\tob_b_i <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

                        "\t\t\tend else begin\n"

// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"

                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"

// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"

                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"

                                "\t\t\t\tend else if (INVERSE==0) begin\n"

                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"

"\t\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"

"\t\t\t\t\tob_b_r <=   diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_i <= n_diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"

// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"

                                "\t\t\t\tend else begin\n"

"\t\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"

"\t\t\t\t\tob_b_r <= n_diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\t\t\t\t\tob_b_i <=   diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"

// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"

                                "\t\t\t\tend\n"

                                "\t\t\t\t// (wire) ob_b <= { ob_b_r, ob_b_i };\n"

                        "\t\t\tend\n"

                        "\t\t\tend\n"

                        "\t\t\t// In sequence, clock = 2\n"

                "\t\tend\n\n");

                        "\t\t\tif (pipeline[2])\n"

        fprintf(fp,

        "\talways\t@(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin // In sequence, clock = 3\n"

                        "\t\t\tif (pipeline[3])\n"

                        "\t\t\tbegin\n"

                        "\t\t\tbegin\n"

                                "\t\t\t\tomem <= ob_b;\n"

                                "\t\t\t\tomem <= ob_b;\n"

                                "\t\t\t\to_data <= ob_a;\n"

                                "\t\t\t\to_data <= ob_a;\n"

                        "\t\t\tend else\n"

                        "\t\t\tend else\n"

                                "\t\t\t\to_data <= omem;\n"

                                "\t\t\t\to_data <= omem;\n"

                        "\t\t\t// Don\'t forget in the sync check that we are running\n"

                "\t\tend\n\n");

                        "\t\t\t// at two clocks per sample.  Thus we need to\n"

                        "\t\t\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"

        fprintf(fp,

                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b100);\n"

        "\t// Don\'t forget in the sync check that we are running\n"

                "\t\tend\n"

        "\t// at two clocks per sample.  Thus we need to\n"

"endmodule\n");

        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"

        "\talways\t@(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");

        fprintf(fp, "endmodule\n");

void    build_dblstage(const char *fname) {

void    build_dblstage(const char *fname, ROUND_T rounding) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        const   char    *rnd_string;

        if (rounding == RND_TRUNCATE)

                rnd_string = "truncate";

        else if (rounding == RND_FROMZERO)

                rnd_string = "roundfromzero";

        else if (rounding == RND_HALFUP)

                rnd_string = "roundhalfup";

        else

                rnd_string = "convround";

        fprintf(fp,

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Filename:   dblstage.v\n"

"// Filename:   dblstage.v\n"

"//\n"

"//\n"

Line 311...

Line 695...

"//\n", prjname, creator);

"//\n", prjname, creator);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp, "%s", cpyleft);

        fprintf(fp,

        fprintf(fp,

"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"

"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"

        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0, ROUND=1;\n"

        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"

        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"

        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"

        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"

        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"

        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"

        "\toutput\treg\t\t\to_sync;\n"

        "\toutput\treg\t\t\to_sync;\n"

        "\n");

        "\n");

Line 329...

Line 713...

                                "\t\t\t\t\to_out_1r, o_out_1i;\n"

                                "\t\t\t\t\to_out_1r, o_out_1i;\n"

"\n"

"\n"

"\n"

"\n"

        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"

        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"

"\n"

"\n"

        "\twire\tsigned\t[(IWIDTH):0]\trnd;\n"

"\n");

"\n"

        fprintf(fp,

        "\tgenerate\n"

        "\tif ((ROUND==0)||(IWIDTH+1-OWIDTH-SHIFT==0))\n"

                "\t\tassign rnd = { {(IWIDTH+1){1'b0}} };\n"

        "\telse if (IWIDTH+1-OWIDTH-SHIFT==1)\n"

                "\t\tassign rnd = { {(IWIDTH){1'b0}}, 1'b1 };\n"

        "\telse if (IWIDTH+1-OWIDTH-SHIFT>1)\n"

                "\t\tassign rnd = { {(IWIDTH-(IWIDTH+1-OWIDTH-SHIFT-1)){1'b0}}, 1'b1, {(IWIDTH+1-OWIDTH-SHIFT-1){1'b0}} };\n"

        "\tendgenerate\n"

"\n"

        "\t// Don't forget that we accumulate a bit by adding two values\n"

        "\t// Don't forget that we accumulate a bit by adding two values\n"

        "\t// together. Therefore our intermediate value must have one more\n"

        "\t// together. Therefore our intermediate value must have one more\n"

        "\t// bit than the two originals.\n"

        "\t// bit than the two originals.\n"

        "\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"

        "\twire\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i, rnd_in_1r, rnd_in_1i;\n\n");

"\n"

        fprintf(fp,

        "\treg\twait_for_sync;\n"

        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_0r(i_clk, i_ce,\n"

        "\t\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string);

        fprintf(fp,

        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_0i(i_clk, i_ce,\n"

        "\t\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string);

        fprintf(fp,

        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_1r(i_clk, i_ce,\n"

        "\t\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string);

        fprintf(fp,

        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_1i(i_clk, i_ce,\n"

        "\t\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string);

        fprintf(fp,

        "\treg\twait_for_sync, rnd_sync;\n"

"\n"

"\n"

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_rst)\n"

                "\t\tif (i_rst)\n"

                "\t\tbegin\n"

                        "\t\t\trnd_sync <= 1'b0;\n"

                        "\t\t\to_sync <= 1'b0;\n"

                        "\t\t\twait_for_sync <= 1'b1;\n"

                        "\t\t\twait_for_sync <= 1'b1;\n"

                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

                "\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

                "\t\tbegin\n"

                "\t\tbegin\n"

                        "\t\t\twait_for_sync <= 1'b0;\n"

                        "\t\t\twait_for_sync <= 1'b0;\n"

                        "\t\t\t//\n"

                        "\t\t\t//\n"

                        "\t\t\tout_0r <= i_in_0r + i_in_1r + rnd;\n"

                        "\t\t\trnd_in_0r <= i_in_0r + i_in_1r;\n"

                        "\t\t\tout_0i <= i_in_0i + i_in_1i + rnd;\n"

                        "\t\t\trnd_in_0i <= i_in_0i + i_in_1i;\n"

                        "\t\t\t//\n"

                        "\t\t\t//\n"

                        "\t\t\tout_1r <= i_in_0r - i_in_1r + rnd;\n"

                        "\t\t\trnd_in_1r <= i_in_0r - i_in_1r;\n"

                        "\t\t\tout_1i <= i_in_0i - i_in_1i + rnd;\n"

                        "\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n"

                        "\t\t\t//\n"

                        "\t\t\t//\n"

                        "\t\t\to_sync <= i_sync;\n"

                        "\t\t\trnd_sync <= i_sync;\n"

                        "\t\t\to_sync <= rnd_sync;\n"

                "\t\tend\n"

                "\t\tend\n"

"\n"

"\n"

        "\t// Now, if the master control program doesn't want to keep all of\n"

        "\t// our bits, we can shift down to OWIDTH bits here.\n"

        "\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

        "\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

        "\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

        "\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"

"\n"

        "\tassign\to_left  = { o_out_0r, o_out_0i };\n"

        "\tassign\to_left  = { o_out_0r, o_out_0i };\n"

        "\tassign\to_right = { o_out_1r, o_out_1i };\n"

        "\tassign\to_right = { o_out_1r, o_out_1i };\n"

"\n"

"\n"

"endmodule\n");

"endmodule\n");

        fclose(fp);

        fclose(fp);

Line 611...

Line 996...

"endmodule\n");

"endmodule\n");

        fclose(fp);

        fclose(fp);

void    build_butterfly(const char *fname, int xtracbits) {

void    build_butterfly(const char *fname, int xtracbits, ROUND_T rounding) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        const   char    *rnd_string;

        if (rounding == RND_TRUNCATE)

                rnd_string = "truncate";

        else if (rounding == RND_FROMZERO)

                rnd_string = "roundfromzero";

        else if (rounding == RND_HALFUP)

                rnd_string = "roundhalfup";

        else

                rnd_string = "convround";

        fprintf(fp,

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Filename:   butterfly.v\n"

"// Filename:   butterfly.v\n"

Line 698...

Line 1092...

                "\t\to_left, o_right, o_aux);\n"

                "\t\to_left, o_right, o_aux);\n"

        "\t// Public changeable parameters ...\n"

        "\t// Public changeable parameters ...\n"

        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"

        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"

        "\t// Parameters specific to the core that should not be changed.\n"

        "\t// Parameters specific to the core that should not be changed.\n"

        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"

        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"

                        "\t\t\tSHIFT=0, ROUND=1;\n"

                        "\t\t\tSHIFT=0;\n"

        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"

        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"

        "\t// this value is fractional, then round up to the nearest\n"

        "\t// this value is fractional, then round up to the nearest\n"

        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"

        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"

        "\tparameter\tLGDELAY=%d;\n"

        "\tparameter\tLGDELAY=%d;\n"

        "\tinput\t\ti_clk, i_rst, i_ce;\n"

        "\tinput\t\ti_clk, i_rst, i_ce;\n"

Line 865...

Line 1259...

        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\taux = fifo_read[2*IWIDTH+2];\n"

        "\tassign\taux = fifo_read[2*IWIDTH+2];\n"

"\n"

"\n"

"\n"

"\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"

        "\treg\tsigned\t[(OWIDTH-1):0]  b_left_r, b_left_i,\n"

                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"

                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"

        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"

        "\twire\tsigned\t[(CWIDTH+IWIDTH+3-1):0]        rnd;\n"

        "\tgenerate\n"

        "\tif ((ROUND==0)||(CWIDTH+IWIDTH-OWIDTH-SHIFT<2))\n"

                "\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"

        "\telse if ((IWIDTH+CWIDTH)-(OWIDTH+SHIFT) == 2)\n"

                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1 });\n"

        "\telse\n"

                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1,\n"

                "\t\t\t\t{((IWIDTH+CWIDTH+3)-(OWIDTH+SHIFT+5)){1'b0}} });\n"

        "\tendgenerate\n"

"\n");

        fprintf(fp,

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                        "\t\t\t// First clock, recover all values\n"

                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"

                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"

                        "\t\t\t// although they only need to be (IWIDTH+1)\n"

                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"

                        "\t\t\t// extra bits we need to get rid of.)\n"

                        "\t\t\tmpy_r <= p_one - p_two;\n"

                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"

"\n"

                        "\t\t\t// Second clock, round and latch for final clock\n"

                        "\t\t\tb_right_r <= mpy_r + rnd;\n"

                        "\t\t\tb_right_i <= mpy_i + rnd;\n"

                        "\t\t\tb_left_r <= { {2{fifo_r[(IWIDTH+CWIDTH)]}},fifo_r } + rnd;\n"

                        "\t\t\tb_left_i <= { {2{fifo_i[(IWIDTH+CWIDTH)]}},fifo_i } + rnd;\n"

                        "\t\t\to_aux <= aux & ovalid;\n"

                "\t\tend\n"

"\n");

"\n");

        fprintf(fp,

        fprintf(fp,

        "\t// Final clock--clock and remove unnecessary bits.\n"

        "\t// Let's do some rounding and remove unnecessary bits.\n"

        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"

        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"

        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"

        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"

        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"

        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"

        "\t// them, but the actual values will never fill all these bits.\n"

        "\t// them, but the actual values will never fill all these bits.\n"

        "\t// In particular, we only need:\n"

        "\t// In particular, we only need:\n"

Line 925...

Line 1288...

        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"

        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"

        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"

        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"

        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"

        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"

        "\t// or if he wishes to arbitrarily shift some of these off (via\n"

        "\t// or if he wishes to arbitrarily shift some of these off (via\n"

        "\t// SHIFT) we accomplish that here.\n"

        "\t// SHIFT) we accomplish that here.\n"

        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

"\n");

        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        fprintf(fp,

        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");

        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        fprintf(fp,

        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_r(i_clk, i_ce,\n"

        "\t\t\t\t{ {2{fifo_r[(IWIDTH+CWIDTH)]}}, fifo_r }, rnd_left_r);\n\n",

                rnd_string);

        fprintf(fp,

        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_i(i_clk, i_ce,\n"

        "\t\t\t\t{ {2{fifo_i[(IWIDTH+CWIDTH)]}}, fifo_i }, rnd_left_i);\n\n",

                rnd_string);

        fprintf(fp,

        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_r(i_clk, i_ce,\n"

        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);

        fprintf(fp,

        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_i(i_clk, i_ce,\n"

        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);

        fprintf(fp,

        "\talways @(posedge i_clk)\n"

                "\t\tif (i_ce)\n"

                "\t\tbegin\n"

                        "\t\t\t// First clock, recover all values\n"

                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"

                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"

                        "\t\t\t// although they only need to be (IWIDTH+1)\n"

                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"

                        "\t\t\t// extra bits we need to get rid of.)\n"

                        "\t\t\tmpy_r <= p_one - p_two;\n"

                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"

"\n"

"\n"

                        "\t\t\t// Second clock, round and latch for final clock\n"

                        "\t\t\tb_right_r <= rnd_right_r;\n"

                        "\t\t\tb_right_i <= rnd_right_i;\n"

                        "\t\t\tb_left_r <= rnd_left_r;\n"

                        "\t\t\tb_left_i <= rnd_left_i;\n"

                        "\t\t\to_aux <= aux & ovalid;\n"

                "\t\tend\n"

"\n");

        fprintf(fp,

        "\t// As a final step, we pack our outputs into two packed two\'s\n"

        "\t// As a final step, we pack our outputs into two packed two\'s\n"

        "\t// complement numbers per output word, so that each output word\n"

        "\t// complement numbers per output word, so that each output word\n"

        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"

        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"

        "\t// portion and the bottom half being the imaginary portion.\n"

        "\t// portion and the bottom half being the imaginary portion.\n"

        "\tassign       o_left = { o_left_r, o_left_i };\n"

        "\tassign       o_left = { rnd_left_r, rnd_left_i };\n"

        "\tassign       o_right= { o_right_r,o_right_i};\n"

        "\tassign       o_right= { rnd_right_r,rnd_right_i};\n"

"\n"

"\n"

"endmodule\n");

"endmodule\n");

        fclose(fp);

        fclose(fp);

void    build_hwbfly(const char *fname, int xtracbits) {

void    build_hwbfly(const char *fname, int xtracbits, ROUND_T rounding) {

        FILE    *fp = fopen(fname, "w");

        FILE    *fp = fopen(fname, "w");

        if (NULL == fp) {

        if (NULL == fp) {

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);

                perror("O/S Err was:");

                perror("O/S Err was:");

                return;

                return;

        const   char    *rnd_string;

        if (rounding == RND_TRUNCATE)

                rnd_string = "truncate";

        else if (rounding == RND_FROMZERO)

                rnd_string = "roundfromzero";

        else if (rounding == RND_HALFUP)

                rnd_string = "roundhalfup";

        else

                rnd_string = "convround";

        fprintf(fp,

        fprintf(fp,

"///////////////////////////////////////////////////////////////////////////\n"

"///////////////////////////////////////////////////////////////////////////\n"

"//\n"

"//\n"

"// Filename:   hwbfly.v\n"

"// Filename:   hwbfly.v\n"

"//\n"

"//\n"

Line 973...

Line 1382...

"module hwbfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"

"module hwbfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"

                "\t\to_left, o_right, o_aux);\n"

                "\t\to_left, o_right, o_aux);\n"

        "\t// Public changeable parameters ...\n"

        "\t// Public changeable parameters ...\n"

        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"

        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"

        "\t// Parameters specific to the core that should not be changed.\n"

        "\t// Parameters specific to the core that should not be changed.\n"

        "\tparameter\tSHIFT=0, ROUND=1;\n"

        "\tparameter\tSHIFT=0;\n"

        "\tinput\t\ti_clk, i_rst, i_ce;\n"

        "\tinput\t\ti_clk, i_rst, i_ce;\n"

        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"

        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"

        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"

        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"

        "\tinput\t\ti_aux;\n"

        "\tinput\t\ti_aux;\n"

        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"

        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"

Line 1073...

Line 1482...

        "\t// multiply.  Here, we recover them.  During the multiply,\n"

        "\t// multiply.  Here, we recover them.  During the multiply,\n"

        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"

        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"

        "\t// therefore, the left_x values need to be right shifted by\n"

        "\t// therefore, the left_x values need to be right shifted by\n"

        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"

        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"

        "\t// extension.\n"

        "\t// extension.\n"

        "\twire\taux_s;\n"

        "\twire\taux_s, aux_ss;\n"

        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"

        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"

        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"

        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"

        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"

        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"

        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"

"\n"

"\n"

"\n"

"\n"

        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"

        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n");

                                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"

        fprintf(fp,

        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"

        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");

        "\twire signed  [(CWIDTH+IWIDTH+3-1):0] rnd;\n"

        "\tgenerate\n"

        fprintf(fp,

        "\tif ((ROUND==0)||(CWIDTH+IWIDTH-OWIDTH-SHIFT<2))\n"

        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_r(i_clk, i_ce,\n"

                "\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"

        "\t\t\t\t{ {2{left_sr[(IWIDTH+CWIDTH)]}}, left_sr }, rnd_left_r);\n\n",

        "\telse if ((IWIDTH+CWIDTH)-(OWIDTH+SHIFT) == 2)\n"

                rnd_string);

                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1 });\n"

        fprintf(fp,

        "\telse\n"

        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_i(i_clk, i_ce,\n"

                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1,\n"

        "\t\t\t\t{ {2{left_si[(IWIDTH+CWIDTH)]}}, left_si }, rnd_left_i);\n\n",

                        "\t\t\t\t{((IWIDTH+CWIDTH+3)-(OWIDTH+SHIFT+5)){1'b0}} });\n"

                rnd_string);

        "\tendgenerate\n"

        fprintf(fp,

"\n");

        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_r(i_clk, i_ce,\n"

        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);

        fprintf(fp,

        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_i(i_clk, i_ce,\n"

        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);

        fprintf(fp,

        fprintf(fp,

        "\talways @(posedge i_clk)\n"

        "\talways @(posedge i_clk)\n"

        "\t\tif (i_rst)\n"

        "\t\tif (i_rst)\n"

        "\t\tbegin\n"

        "\t\tbegin\n"

                "\t\t\tleft_saved <= 0;\n"

                "\t\t\tleft_saved <= 0;\n"

                "\t\t\tb_left_r <= 0;\n"

                "\t\t\tb_left_i <= 0;\n"

                "\t\t\tb_right_r <= 0;\n"

                "\t\t\tb_right_i <= 0;\n"

                "\t\t\to_aux <= 1'b0;\n"

                "\t\t\to_aux <= 1'b0;\n"

        "\t\tend else if (i_ce)\n"

        "\t\tend else if (i_ce)\n"

        "\t\tbegin\n"

        "\t\tbegin\n"

                "\t\t\t// First clock, recover all values\n"

                "\t\t\t// First clock, recover all values\n"

                "\t\t\tleft_saved <= leftvv;\n"

                "\t\t\tleft_saved <= leftvv;\n"

Line 1118...

Line 1527...

                "\t\t\t// extra bits we need to get rid of.)\n"

                "\t\t\t// extra bits we need to get rid of.)\n"

                "\t\t\tmpy_r <= p_one - p_two;\n"

                "\t\t\tmpy_r <= p_one - p_two;\n"

                "\t\t\tmpy_i <= p_three - p_one - p_two;\n"

                "\t\t\tmpy_i <= p_three - p_one - p_two;\n"

"\n"

"\n"

                "\t\t\t// Second clock, round and latch for final clock\n"

                "\t\t\t// Second clock, round and latch for final clock\n"

                "\t\t\tb_right_r <= mpy_r + rnd;\n"

                "\t\t\tb_right_i <= mpy_i + rnd;\n"

                "\t\t\tb_left_r <= { {2{left_sr[(IWIDTH+CWIDTH)]}},left_sr } + rnd;\n"

                "\t\t\tb_left_i <= { {2{left_si[(IWIDTH+CWIDTH)]}},left_si } + rnd;\n"

"\n"

"\n"

                "\t\t\to_aux <= aux_s;\n"

                "\t\t\to_aux <= aux_s;\n"

        "\t\tend\n"

        "\t\tend\n"

        "\n");

        "\n");

        fprintf(fp,

        fprintf(fp,

        "\t// Final step--remove unnecessary bits.\n"

        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"

"\n"

        "\t// As a final step, we pack our outputs into two packed two's\n"

        "\t// As a final step, we pack our outputs into two packed two's\n"

        "\t// complement numbers per output word, so that each output word\n"

        "\t// complement numbers per output word, so that each output word\n"

        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"

        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"

        "\t// portion and the bottom half being the imaginary portion.\n"

        "\t// portion and the bottom half being the imaginary portion.\n"

        "\tassign\to_left = { o_left_r, o_left_i };\n"

        "\tassign\to_left = { rnd_left_r, rnd_left_i };\n"

        "\tassign\to_right= { o_right_r,o_right_i};\n"

        "\tassign\to_right= { rnd_right_r,rnd_right_i};\n"

"\n"

"\n"

"endmodule\n");

"endmodule\n");

Line 1203...

Line 1602...

"\treg  [(2*CWIDTH-1):0]        ib_c;\n"

"\treg  [(2*CWIDTH-1):0]        ib_c;\n"

"\treg  ib_sync;\n"

"\treg  ib_sync;\n"

"\n"

"\n"

"\treg  b_started;\n"

"\treg  b_started;\n"

"\twire ob_sync;\n"

"\twire ob_sync;\n"

"\twire [(2*OWIDTH-1):0]        ob_a, ob_b;\n");

"\twire [(2*OWIDTH-1):0]\tob_a, ob_b;\n");

        fprintf(fstage,

        fprintf(fstage,

"\n"

"\n"

"\t// %scmem is defined as an array of real and complex values,\n"

"\t// %scmem is defined as an array of real and complex values,\n"

"\t// where the top CWIDTH bits are the real value and the bottom\n"

"\t// where the top CWIDTH bits are the real value and the bottom\n"

"\t// CWIDTH bits are the imaginary value.\n"

"\t// CWIDTH bits are the imaginary value.\n"

Line 1279...

Line 1678...

"\talways @(posedge i_clk)\n"

"\talways @(posedge i_clk)\n"

        "\t\tif (i_rst)\n"

        "\t\tif (i_rst)\n"

        "\t\tbegin\n"

        "\t\tbegin\n"

                "\t\t\twait_for_sync <= 1'b1;\n"

                "\t\t\twait_for_sync <= 1'b1;\n"

                "\t\t\tiaddr <= 0;\n"

                "\t\t\tiaddr <= 0;\n"

                "\t\t\toB <= 0;\n"

                "\t\t\tib_sync   <= 1'b0;\n"

                "\t\t\tib_sync   <= 1'b0;\n"

                "\t\t\to_sync    <= 1'b0;\n"

                "\t\t\tb_started <= 1'b0;\n"

        "\t\tend\n"

        "\t\tend\n"

        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"

        "\t\tbegin\n"

        "\t\tbegin\n"

                "\t\t\t//\n"

                "\t\t\t//\n"

                "\t\t\t// First step: Record what we\'re not ready to use yet\n"

                "\t\t\t// First step: Record what we\'re not ready to use yet\n"

                "\t\t\t//\n"

                "\t\t\t//\n"

                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data;\n"

                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data;\n"

                "\t\t\tiaddr <= iaddr + 1;\n"

                "\t\t\tiaddr <= iaddr + 1;\n"

                "\t\t\twait_for_sync <= 1'b0;\n"

                "\t\t\twait_for_sync <= 1'b0;\n"

"\n"

        "\t\tend\n\n");

                "\t\t\t//\n"

                "\t\t\t// Now, we have all the inputs, so let\'s feed the\n"

        fprintf(fstage,

                "\t\t\t// butterfly\n"

        "\t//\n"

                "\t\t\t//\n"

        "\t// Now, we have all the inputs, so let\'s feed the butterfly\n"

                "\t\t\tif (iaddr[LGSPAN])\n"

        "\t//\n"

                "\t\t\tbegin\n"

        "\talways\t@(posedge i_clk)\n"

                        "\t\t\t\t// One input from memory, ...\n"

        "\tif ((i_ce)&&(iaddr[LGSPAN]))\n"

                        "\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"

                "\t\tbegin\n"

                        "\t\t\t\t// One input clocked in from the top\n"

                        "\t\t\t// One input from memory, ...\n"

                        "\t\t\t\tib_b <= i_data;\n"

                        "\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"

                        "\t\t\t\t// Set the sync to true on the very first\n"

                        "\t\t\t// One input clocked in from the top\n"

                        "\t\t\t\t// valid input in, and hence on the very\n"

                        "\t\t\tib_b <= i_data;\n"

                        "\t\t\t\t// first valid data out per FFT.\n"

                        "\t\t\t// Set the sync to true on the very first\n"

                        "\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"

                        "\t\t\t// valid input in, and hence on the very\n"

                        "\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"

                        "\t\t\t// first valid data out per FFT.\n"

                "\t\t\tend else begin\n"

                        "\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"

                        "\t\t\t\t// Just to make debugging easier, let\'s\n"

                        "\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"

                        "\t\t\t\t// clear these registers.  That\'ll make\n"

                "\t\tend\n\n", (inv)?"i":"");

                        "\t\t\t\t// the transition easier to watch.\n"

                        "\t\t\t\tib_a <= {(2*IWIDTH){1'b0}};\n"

        if (hwmpy) {

                        "\t\t\t\tib_b <= {(2*IWIDTH){1'b0}};\n"

                fprintf(fstage,

                        "\t\t\t\tib_sync <= 1'b0;\n"

        "\thwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"

                "\t\t\tend\n"

                        "\t\t\t.SHIFT(BFLYSHIFT))\n"

"\n"

                "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"

                "\t\t\t//\n"

                        "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n");

                "\t\t\t// Next step: recover the outputs from the butterfly\n"

        } else {

                "\t\t\t//\n"

        fprintf(fstage,

        "\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"

                "\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"

        "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"

                "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n",

                        lgdelay(nbits, xtra), bflydelay(nbits, xtra));

        fprintf(fstage,

        "\t//\n"

        "\t// Next step: recover the outputs from the butterfly\n"

        "\t//\n"

        "\talways\t@(posedge i_clk)\n"

        "\t\tif (i_rst)\n"

        "\t\tbegin\n"

                "\t\t\toB <= 0;\n"

                "\t\t\to_sync <= 0;\n"

                "\t\t\tb_started <= 0;\n"

        "\t\tend else if (i_ce)\n"

        "\t\tbegin\n"

                "\t\t\tif ((ob_sync||b_started)&&(~oB[LGSPAN]))\n"

                "\t\t\tif ((ob_sync||b_started)&&(~oB[LGSPAN]))\n"

                "\t\t\tbegin // A butterfly output is available\n"

                "\t\t\tbegin // A butterfly output is available\n"

                        "\t\t\t\tb_started <= 1'b1;\n"

                        "\t\t\t\tb_started <= 1'b1;\n"

                        "\t\t\t\tomem[oB[(LGSPAN-1):0]] <= ob_b;\n"

                        "\t\t\t\tomem[oB[(LGSPAN-1):0]] <= ob_b;\n"

                        "\t\t\t\toB <= oB+1;\n"

                        "\t\t\t\toB <= oB+1;\n"

Line 1336...

Line 1751...

                        "\t\t\t\to_data <= omem[oB[(LGSPAN-1):0]];\n"

                        "\t\t\t\to_data <= omem[oB[(LGSPAN-1):0]];\n"

                        "\t\t\t\toB <= oB + 1;\n"

                        "\t\t\t\toB <= oB + 1;\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

                "\t\t\tend else\n"

                "\t\t\tend else\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

                        "\t\t\t\to_sync <= 1'b0;\n"

        "\t\tend\n"

        "\t\tend\n\n");

"\n", (inv)?"i":"");

        if (hwmpy) {

                fprintf(fstage,

        "\thwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"

                        "\t\t\t.SHIFT(BFLYSHIFT))\n"

                "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"

                        "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n");

        } else {

        fprintf(fstage,

        "\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"

                "\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"

        "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"

                "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n",

                        lgdelay(nbits, xtra), bflydelay(nbits, xtra));

        fprintf(fstage, "endmodule\n");

        fprintf(fstage, "endmodule\n");

void    usage(void) {

void    usage(void) {

        fprintf(stderr,

        fprintf(stderr,

Line 1403...

Line 1803...

        int     nbitsout, maxbitsout = -1, xtrapbits=0;

        int     nbitsout, maxbitsout = -1, xtrapbits=0;

        bool    bitreverse = true, inverse=false, interactive = false,

        bool    bitreverse = true, inverse=false, interactive = false,

                verbose_flag = false;

                verbose_flag = false;

        FILE    *vmain;

        FILE    *vmain;

        std::string     coredir = "fft-core", cmdline = "";

        std::string     coredir = "fft-core", cmdline = "";

        ROUND_T rounding = RND_CONVERGENT;

        // ROUND_T      rounding = RND_HALFUP;

        if (argc <= 1)

        if (argc <= 1)

                usage();

                usage();

        cmdline = argv[0];

        cmdline = argv[0];

Line 1849...

Line 2251...

                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");

                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");

                fprintf(vmain, "\n");

                fprintf(vmain, "\n");

                if (bitreverse) {

                if (bitreverse) {

                        fprintf(vmain, "\twire\tbr_start;\n");

                        fprintf(vmain, "\twire\tbr_start;\n");

                        fprintf(vmain, "\treg\tr_br_started;\n");

                        fprintf(vmain, "\treg\tr_br_started;\n");

                        fprintf(vmain, "\t// A delay of one clock here is perfect, as it matches the delay in\n");

                        fprintf(vmain, "\t// our dblstage.\n");

                        fprintf(vmain, "\talways @(posedge i_clk)\n");

                        fprintf(vmain, "\talways @(posedge i_clk)\n");

                        fprintf(vmain, "\t\tif (i_rst)\n");

                        fprintf(vmain, "\t\tif (i_rst)\n");

                        fprintf(vmain, "\t\t\tr_br_started <= 1'b0;\n");

                        fprintf(vmain, "\t\t\tr_br_started <= 1'b0;\n");

                        fprintf(vmain, "\t\telse\n");

                        fprintf(vmain, "\t\telse\n");

                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s4;\n");

                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");

                        fprintf(vmain, "\tassign\tbr_start = r_br_started;\n");

                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");

        fprintf(vmain, "\n");

        fprintf(vmain, "\n");

        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");

        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");

Line 1890...

Line 2290...

                std::string     fname;

                std::string     fname;

                fname = coredir + "/butterfly.v";

                fname = coredir + "/butterfly.v";

                build_butterfly(fname.c_str(), xtracbits);

                build_butterfly(fname.c_str(), xtracbits, rounding);

                if (nummpy > 0) {

                if (nummpy > 0) {

                        fname = coredir + "/hwbfly.v";

                        fname = coredir + "/hwbfly.v";

                        build_hwbfly(fname.c_str(), xtracbits);

                        build_hwbfly(fname.c_str(), xtracbits, rounding);

                fname = coredir + "/shiftaddmpy.v";

                fname = coredir + "/shiftaddmpy.v";

                build_multiply(fname.c_str());

                build_multiply(fname.c_str());

                fname = coredir + "/qtrstage.v";

                fname = coredir + "/qtrstage.v";

                build_quarters(fname.c_str());

                build_quarters(fname.c_str(), rounding);

                fname = coredir + "/dblstage.v";

                fname = coredir + "/dblstage.v";

                build_dblstage(fname.c_str());

                build_dblstage(fname.c_str(), rounding);

                if (bitreverse) {

                if (bitreverse) {

                        fname = coredir + "/dblreverse.v";

                        fname = coredir + "/dblreverse.v";

                        build_dblreverse(fname.c_str());

                        build_dblreverse(fname.c_str());

                const   char    *rnd_string = "";

                switch(rounding) {

                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;

                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;

                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;

                        default:

                                rnd_string = "/convround.v"; break;

                } fname = coredir + rnd_string;

                switch(rounding) {

                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;

                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;

                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;

                        default:

                                build_convround(fname.c_str()); break;

 No newline at end of file

 No newline at end of file

Browse

Tools

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Diff between revs 22 and 23