URL
https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk
Subversion Repositories dblclockfft
Compare Revisions
- This comparison shows the changes necessary to convert path
/dblclockfft/trunk/sw
- from Rev 26 to Rev 28
- ↔ Reverse comparison
Rev 26 → Rev 28
/fftgen.cpp
306,7 → 306,14
"\t//\t\thalfway between the two. In the halfway case, we\n" |
"\t//\t\tround away from zero.\n" |
"\tgenerate\n" |
"\tif (IWID-SHIFT == OWID)\n" |
"\tif (IWID == OWID) // In this case, the shift is irrelevant and\n" |
"\tbegin // cannot be applied. No truncation or rounding takes\n" |
"\t// effect here.\n" |
"\n" |
"\t\talways @(posedge i_clk)\n" |
"\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n" |
"\n" |
"\tend else if (IWID-SHIFT == OWID)\n" |
"\tbegin // No truncation or rounding, output drops no bits\n" |
"\n" |
"\t\talways @(posedge i_clk)\n" |
408,7 → 415,14
"\t//\t\thalfway between the two. In the halfway case we round\n" |
"\t//\t\tto the nearest even number.\n" |
"\tgenerate\n" |
"\tif (IWID-SHIFT == OWID)\n" |
"\tif (IWID == OWID) // In this case, the shift is irrelevant and\n" |
"\tbegin // cannot be applied. No truncation or rounding takes\n" |
"\t// effect here.\n" |
"\n" |
"\t\talways @(posedge i_clk)\n" |
"\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n" |
"\n" |
"\tend else if (IWID-SHIFT == OWID)\n" |
"\tbegin // No truncation or rounding, output drops no bits\n" |
"\n" |
"\t\talways @(posedge i_clk)\n" |
717,7 → 731,7
"\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n" |
"\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n" |
"\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n" |
"\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n" |
"\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n" |
"\toutput\treg\t\t\to_sync;\n" |
"\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":""); |
|
740,24 → 754,6
"\n" |
"\n"); |
fprintf(fp, |
"\t// Don't forget that we accumulate a bit by adding two values\n" |
"\t// together. Therefore our intermediate value must have one more\n" |
"\t// bit than the two originals.\n" |
"\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i, rnd_in_1r, rnd_in_1i;\n\n"); |
fprintf(fp, |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string); |
fprintf(fp, |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string); |
fprintf(fp, |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string); |
fprintf(fp, |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string); |
|
fprintf(fp, |
"\n" |
"\t// As with any register connected to the sync pulse, these must\n" |
"\t// have initial values and be reset on the i_rst signal.\n" |
764,31 → 760,32
"\t// Other data values need only restrict their updates to i_ce\n" |
"\t// enabled clocks, but sync\'s must obey resets and initial\n" |
"\t// conditions as well.\n" |
"\treg\twait_for_sync, rnd_sync;\n" |
"\treg\trnd_sync, r_sync;\n" |
"\n" |
"\tinitial begin\n" |
"\t\trnd_sync = 1\'b0;\n" |
"\t\to_sync = 1\'b0;\n" |
"\t\twait_for_sync = 1\'b1;\n" |
"\tend\n" |
"\tinitial\trnd_sync = 1\'b0; // Sync into rounding\n" |
"\tinitial\tr_sync = 1\'b0; // Sync coming out\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_rst)\n" |
"\t\tbegin\n" |
"\t\t\trnd_sync <= 1\'b0;\n" |
"\t\t\to_sync <= 1\'b0;\n" |
"\t\t\twait_for_sync <= 1\'b1;\n" |
"\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n" |
"\t\t\tr_sync <= 1\'b0;\n" |
"\t\tend else if (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\twait_for_sync <= 1\'b0;\n" |
"\t\t\t//\n" |
"\t\t\trnd_sync <= i_sync;\n" |
"\t\t\to_sync <= rnd_sync;\n" |
"\t\t\tr_sync <= rnd_sync;\n" |
"\t\tend\n" |
"\n" |
"\t// As with other variables, these are really only updated when in\n" |
"\t// the processing pipeline, after the first i_sync. However, to\n" |
"\t// eliminate as much unnecessary logic as possible, we toggle\n" |
"\t// these any time the i_ce line is enabled.\n" |
"\t// these any time the i_ce line is enabled, and don\'t reset.\n" |
"\t// them on i_rst.\n"); |
fprintf(fp, |
"\t// Don't forget that we accumulate a bit by adding two values\n" |
"\t// together. Therefore our intermediate value must have one more\n" |
"\t// bit than the two originals.\n" |
"\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i;\n" |
"\treg\tsigned\t[(IWIDTH):0]\trnd_in_1r, rnd_in_1i;\n\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\tbegin\n" |
800,9 → 797,38
"\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n" |
"\t\t\t//\n" |
"\t\tend\n" |
"\n"); |
fprintf(fp, |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string); |
fprintf(fp, |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string); |
fprintf(fp, |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string); |
fprintf(fp, |
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,\n" |
"\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string); |
|
fprintf(fp, "\n" |
"\t// Prior versions of this routine did not include the extra\n" |
"\t// clock and register/flip-flops that this routine requires.\n" |
"\t// These are placed in here to correct a bug in Verilator, that\n" |
"\t// otherwise struggles. (Hopefully this will fix the problem ...)\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce)\n" |
"\t\tbegin\n" |
"\t\t\to_left <= { o_out_0r, o_out_0i };\n" |
"\t\t\to_right <= { o_out_1r, o_out_1i };\n" |
"\t\tend\n" |
"\n" |
"\tassign\to_left = { o_out_0r, o_out_0i };\n" |
"\tassign\to_right = { o_out_1r, o_out_1i };\n" |
"\tinitial\to_sync = 1'b0; // Final sync coming out of module\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_rst)\n" |
"\t\t\to_sync <= 1'b0;\n" |
"\t\telse if (i_ce)\n" |
"\t\t\to_sync <= r_sync;\n" |
"\n" |
"endmodule\n"); |
fclose(fp); |
1046,7 → 1072,7
"\n" |
"\treg\tadrz;\n" |
"\talways @(posedge i_clk)\n" |
"\t\tif (i_ce) adrz = iaddr[LGSIZE-2];\n" |
"\t\tif (i_ce) adrz <= iaddr[LGSIZE-2];\n" |
"\n" |
"\tassign\to_out_0 = (adrz)?odd_out_0:evn_out_0;\n" |
"\tassign\to_out_1 = (adrz)?odd_out_1:evn_out_1;\n" |
1160,7 → 1186,7
"\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n" |
"\t// Parameters specific to the core that should not be changed.\n" |
"\tparameter MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n" |
"\t\t\tSHIFT=0, AUXLEN=%d;\n" |
"\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n" |
"\t// The LGDELAY should be the base two log of the MPYDELAY. If\n" |
"\t// this value is fractional, then round up to the nearest\n" |
"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n" |
1172,8 → 1198,7
"\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n" |
"\toutput\treg\to_aux;\n" |
"\n", 16, xtracbits, lgdelay(16,xtracbits), |
bflydelay(16, xtracbits), bflydelay(16, xtracbits)+3, |
lgdelay(16,xtracbits)); |
bflydelay(16, xtracbits), lgdelay(16,xtracbits)); |
fprintf(fp, |
"\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n" |
"\n" |
1958,7 → 1983,7
verbose_flag = false, single_clock = false, |
real_fft = false; |
FILE *vmain; |
std::string coredir = DEF_COREDIR, cmdline = ""; |
std::string coredir = DEF_COREDIR, cmdline = "", hdrname = ""; |
ROUND_T rounding = RND_CONVERGENT; |
// ROUND_T rounding = RND_HALFUP; |
|
1986,6 → 2011,14
case '1': |
single_clock = true; |
break; |
case 'a': |
if (argn+1 >= argc) { |
printf("ERR: No header filename given\n\n"); |
usage(); exit(-1); |
} |
hdrname = argv[++argn]; |
j+= 200; |
break; |
case 'c': |
if (argn+1 >= argc) { |
printf("ERR: No extra number of coefficient bits given!\n\n"); |
2190,6 → 2223,57
} |
} |
|
if (hdrname.length() > 0) { |
FILE *hdr = fopen(hdrname.c_str(), "w"); |
if (hdr == NULL) { |
fprintf(stderr, "ERROR: Cannot open %s to create header file\n", hdrname.c_str()); |
perror("O/S Err:"); |
exit(-2); |
} |
|
fprintf(hdr, "/////////////////////////////////////////////////////////////////////////////\n"); |
fprintf(hdr, "//\n"); |
fprintf(hdr, "// Filename: %s\n", hdrname.c_str()); |
fprintf(hdr, "//\n"); |
fprintf(hdr, "// Project: %s\n", prjname); |
fprintf(hdr, "//\n"); |
fprintf(hdr, "// Purpose: This simple header file captures the internal constants\n"); |
fprintf(hdr, "// within the FFT that were used to build it, for the purpose\n"); |
fprintf(hdr, "// of making C++ integration (and test bench testing) simpler. That\n"); |
fprintf(hdr, "// is, should the FFT change size, this will note that size change\n"); |
fprintf(hdr, "// and thus any test bench or other C++ program dependent upon\n"); |
fprintf(hdr, "// either the size of the FFT, the number of bits in or out of\n"); |
fprintf(hdr, "// it, etc., can pick up the changes in the defines found within\n"); |
fprintf(hdr, "// this file.\n"); |
fprintf(hdr, "//\n"); |
fprintf(hdr, "%s", creator); |
fprintf(hdr, "//\n"); |
fprintf(hdr, "%s", cpyleft); |
fprintf(hdr, "//\n" |
"//\n" |
"#ifndef %sFFTHDR_H\n" |
"#define %sFFTHDR_H\n" |
"\n" |
"#define\t%sFFT_IWIDTH\t%d\n" |
"#define\t%sFFT_OWIDTH\t%d\n" |
"#define\t%sFFT_LGWIDTH\t%d\n" |
"#define\t%sFFT_SIZE\t(1<<%sFFT_LGWIDTH)\n\n", |
(inverse)?"I":"", (inverse)?"I":"", |
(inverse)?"I":"", nbitsin, |
(inverse)?"I":"", nbitsout, |
(inverse)?"I":"", lgsize, |
(inverse)?"I":"", (inverse)?"I":""); |
if (!bitreverse) |
fprintf(hdr, "#define\t%sFFT_SKIPS_BIT_REVERSE\n", |
(inverse)?"I":""); |
if (real_fft) |
fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":""); |
if (!single_clock) |
fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":""); |
fprintf(hdr, "\n" "#endif\n\n"); |
fclose(hdr); |
} |
|
{ |
std::string fname_string; |
|
2301,27 → 2385,7
obits = maxbitsout; |
|
// Always do a first stage |
fprintf(vmain, "\n\n"); |
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize); |
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize); |
fprintf(vmain, "\t%sfftstage_e%d%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", fftsize, |
((dbg)&&(dbgstage == fftsize))?"_dbg":"", |
xtracbits, obits+xtrapbits, |
lgsize, lgtmp-2, lgdelay(nbits,xtracbits), |
fftsize); |
fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d%s);\n", fftsize, fftsize, ((dbg)&&(dbgstage == fftsize))?", o_dbg":""); |
fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", fftsize, |
xtracbits, obits+xtrapbits, |
lgsize, lgtmp-2, lgdelay(nbits,xtracbits), |
fftsize); |
fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize); |
fprintf(vmain, "\n\n"); |
|
{ |
std::string fname; |
char numstr[12]; |
bool mpystage; |
|
// Last two stages are always non-multiply stages |
2328,6 → 2392,30
// since the multiplies can be done by adds |
mpystage = ((lgtmp-2) <= nummpy); |
|
if (mpystage) |
fprintf(vmain, "\t// A hardware optimized FFT stage\n"); |
fprintf(vmain, "\n\n"); |
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize); |
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize); |
fprintf(vmain, "\t%sfftstage_e%d%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", fftsize, |
((dbg)&&(dbgstage == fftsize))?"_dbg":"", |
xtracbits, obits+xtrapbits, |
lgsize, lgtmp-2, lgdelay(nbits,xtracbits), |
fftsize); |
fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d%s);\n", fftsize, fftsize, ((dbg)&&(dbgstage == fftsize))?", o_dbg":""); |
fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", fftsize, |
xtracbits, obits+xtrapbits, |
lgsize, lgtmp-2, lgdelay(nbits,xtracbits), |
fftsize); |
fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize); |
fprintf(vmain, "\n\n"); |
|
|
std::string fname; |
char numstr[12]; |
|
fname = coredir + "/"; |
if (inverse) fname += "i"; |
fname += "fftstage_e"; |
2357,30 → 2445,48
if ((maxbitsout > 0)&&(obits > maxbitsout)) |
obits = maxbitsout; |
|
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", tmp_size, tmp_size); |
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, tmp_size, tmp_size); |
fprintf(vmain, "\t%sfftstage_e%d%s\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", tmp_size, |
((dbg)&&(dbgstage == tmp_size))?"_dbg":"", |
nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits, |
lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0, |
tmp_size); |
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size, ((dbg)&&(dbgstage == tmp_size))?", o_dbg":""); |
fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", tmp_size, |
nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits, |
lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0, |
tmp_size); |
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size); |
fprintf(vmain, "\n\n"); |
|
{ |
std::string fname; |
char numstr[12]; |
bool mpystage; |
|
mpystage = ((lgtmp-2) <= nummpy); |
|
if (mpystage) |
fprintf(vmain, "\t// A hardware optimized FFT stage\n"); |
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", |
tmp_size, tmp_size); |
fprintf(vmain,"\twire\t[%d:0]\tw_e%d, w_o%d;\n", |
2*(obits+xtrapbits)-1, |
tmp_size, tmp_size); |
fprintf(vmain, "\t%sfftstage_e%d%s\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", tmp_size, |
((dbg)&&(dbgstage==tmp_size))?"_dbg":"", |
nbits+xtrapbits, |
nbits+xtracbits+xtrapbits, |
obits+xtrapbits, |
lgsize, lgtmp-2, |
lgdelay(nbits+xtrapbits,xtracbits), |
(dropbit)?0:0, tmp_size); |
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n", |
tmp_size<<1, tmp_size<<1, |
tmp_size, tmp_size, |
((dbg)&&(dbgstage == tmp_size)) |
?", o_dbg":""); |
fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n", |
(inverse)?"i":"", tmp_size, |
nbits+xtrapbits, |
nbits+xtracbits+xtrapbits, |
obits+xtrapbits, |
lgsize, lgtmp-2, |
lgdelay(nbits+xtrapbits,xtracbits), |
(dropbit)?0:0, tmp_size); |
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n", |
tmp_size<<1, tmp_size<<1, |
tmp_size, tmp_size); |
fprintf(vmain, "\n\n"); |
|
std::string fname; |
char numstr[12]; |
|
fname = coredir + "/"; |
if (inverse) fname += "i"; |
fname += "fftstage_e"; |
2440,6 → 2546,8
obits = maxbitsout; |
fprintf(vmain, "\twire\t\tw_s2;\n"); |
fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1); |
if ((nbits+xtrapbits+1 == obits)&&(!dropbit)) |
printf("WARNING: SCALING OFF BY A FACTOR OF TWO--should\'ve dropped a bit in the last stage.\n"); |
fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits+xtrapbits, obits,(dropbit)?0:1); |
fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n"); |
|
/Makefile
47,6 → 47,8
all: fftgen |
CORED := fft-core |
OBJDR := $(CORED)/obj_dir |
TESTSZ := 2048 |
BENCHD := ../bench/cpp |
|
fftgen: fftgen.o |
$(CXX) $< -o $@ |
66,13 → 68,13
# |
.PHONY: fft |
fft: fftgen |
./fftgen -f 2048 -n 16 -p 4 |
./fftgen -f $(TESTSZ) -n 16 -p 6 -a $(BENCHD)/fftsize.h |
cd $(CORED)/; verilator -cc fftmain.v |
cd $(OBJDR); make -f Vfftmain.mk |
|
.PHONY: ifft |
ifft: fftgen |
./fftgen -f 2048 -i -n 22 -p 4 |
./fftgen -f $(TESTSZ) -i -n 22 -p 6 -a $(BENCHD)/ifftsize.h |
cd $(CORED)/; verilator -cc ifftmain.v |
cd $(OBJDR); make -f Vifftmain.mk |
|