OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /dblclockfft/trunk/sw
    from Rev 26 to Rev 28
    Reverse comparison

Rev 26 → Rev 28

/fftgen.cpp
306,7 → 306,14
"\t//\t\thalfway between the two. In the halfway case, we\n"
"\t//\t\tround away from zero.\n"
"\tgenerate\n"
"\tif (IWID-SHIFT == OWID)\n"
"\tif (IWID == OWID) // In this case, the shift is irrelevant and\n"
"\tbegin // cannot be applied. No truncation or rounding takes\n"
"\t// effect here.\n"
"\n"
"\t\talways @(posedge i_clk)\n"
"\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n"
"\n"
"\tend else if (IWID-SHIFT == OWID)\n"
"\tbegin // No truncation or rounding, output drops no bits\n"
"\n"
"\t\talways @(posedge i_clk)\n"
408,7 → 415,14
"\t//\t\thalfway between the two. In the halfway case we round\n"
"\t//\t\tto the nearest even number.\n"
"\tgenerate\n"
"\tif (IWID-SHIFT == OWID)\n"
"\tif (IWID == OWID) // In this case, the shift is irrelevant and\n"
"\tbegin // cannot be applied. No truncation or rounding takes\n"
"\t// effect here.\n"
"\n"
"\t\talways @(posedge i_clk)\n"
"\t\t\tif (i_ce)\to_val <= i_val[(IWID-1):0];\n"
"\n"
"\tend else if (IWID-SHIFT == OWID)\n"
"\tbegin // No truncation or rounding, output drops no bits\n"
"\n"
"\t\talways @(posedge i_clk)\n"
717,7 → 731,7
"\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"
"\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
"\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
"\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
"\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
"\toutput\treg\t\t\to_sync;\n"
"\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");
 
740,24 → 754,6
"\n"
"\n");
fprintf(fp,
"\t// Don't forget that we accumulate a bit by adding two values\n"
"\t// together. Therefore our intermediate value must have one more\n"
"\t// bit than the two originals.\n"
"\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i, rnd_in_1r, rnd_in_1i;\n\n");
fprintf(fp,
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,\n"
"\t\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string);
fprintf(fp,
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,\n"
"\t\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string);
fprintf(fp,
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,\n"
"\t\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string);
fprintf(fp,
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,\n"
"\t\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string);
 
fprintf(fp,
"\n"
"\t// As with any register connected to the sync pulse, these must\n"
"\t// have initial values and be reset on the i_rst signal.\n"
764,31 → 760,32
"\t// Other data values need only restrict their updates to i_ce\n"
"\t// enabled clocks, but sync\'s must obey resets and initial\n"
"\t// conditions as well.\n"
"\treg\twait_for_sync, rnd_sync;\n"
"\treg\trnd_sync, r_sync;\n"
"\n"
"\tinitial begin\n"
"\t\trnd_sync = 1\'b0;\n"
"\t\to_sync = 1\'b0;\n"
"\t\twait_for_sync = 1\'b1;\n"
"\tend\n"
"\tinitial\trnd_sync = 1\'b0; // Sync into rounding\n"
"\tinitial\tr_sync = 1\'b0; // Sync coming out\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_rst)\n"
"\t\tbegin\n"
"\t\t\trnd_sync <= 1\'b0;\n"
"\t\t\to_sync <= 1\'b0;\n"
"\t\t\twait_for_sync <= 1\'b1;\n"
"\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
"\t\t\tr_sync <= 1\'b0;\n"
"\t\tend else if (i_ce)\n"
"\t\tbegin\n"
"\t\t\twait_for_sync <= 1\'b0;\n"
"\t\t\t//\n"
"\t\t\trnd_sync <= i_sync;\n"
"\t\t\to_sync <= rnd_sync;\n"
"\t\t\tr_sync <= rnd_sync;\n"
"\t\tend\n"
"\n"
"\t// As with other variables, these are really only updated when in\n"
"\t// the processing pipeline, after the first i_sync. However, to\n"
"\t// eliminate as much unnecessary logic as possible, we toggle\n"
"\t// these any time the i_ce line is enabled.\n"
"\t// these any time the i_ce line is enabled, and don\'t reset.\n"
"\t// them on i_rst.\n");
fprintf(fp,
"\t// Don't forget that we accumulate a bit by adding two values\n"
"\t// together. Therefore our intermediate value must have one more\n"
"\t// bit than the two originals.\n"
"\treg\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i;\n"
"\treg\tsigned\t[(IWIDTH):0]\trnd_in_1r, rnd_in_1i;\n\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n"
800,9 → 797,38
"\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n"
"\t\t\t//\n"
"\t\tend\n"
"\n");
fprintf(fp,
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0r(i_clk, i_ce,\n"
"\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string);
fprintf(fp,
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_0i(i_clk, i_ce,\n"
"\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string);
fprintf(fp,
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1r(i_clk, i_ce,\n"
"\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string);
fprintf(fp,
"\t%s #(IWIDTH+1,OWIDTH,SHIFT) do_rnd_1i(i_clk, i_ce,\n"
"\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string);
 
fprintf(fp, "\n"
"\t// Prior versions of this routine did not include the extra\n"
"\t// clock and register/flip-flops that this routine requires.\n"
"\t// These are placed in here to correct a bug in Verilator, that\n"
"\t// otherwise struggles. (Hopefully this will fix the problem ...)\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n"
"\t\t\to_left <= { o_out_0r, o_out_0i };\n"
"\t\t\to_right <= { o_out_1r, o_out_1i };\n"
"\t\tend\n"
"\n"
"\tassign\to_left = { o_out_0r, o_out_0i };\n"
"\tassign\to_right = { o_out_1r, o_out_1i };\n"
"\tinitial\to_sync = 1'b0; // Final sync coming out of module\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_rst)\n"
"\t\t\to_sync <= 1'b0;\n"
"\t\telse if (i_ce)\n"
"\t\t\to_sync <= r_sync;\n"
"\n"
"endmodule\n");
fclose(fp);
1046,7 → 1072,7
"\n"
"\treg\tadrz;\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce) adrz = iaddr[LGSIZE-2];\n"
"\t\tif (i_ce) adrz <= iaddr[LGSIZE-2];\n"
"\n"
"\tassign\to_out_0 = (adrz)?odd_out_0:evn_out_0;\n"
"\tassign\to_out_1 = (adrz)?odd_out_1:evn_out_1;\n"
1160,7 → 1186,7
"\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
"\t// Parameters specific to the core that should not be changed.\n"
"\tparameter MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
"\t\t\tSHIFT=0, AUXLEN=%d;\n"
"\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"
"\t// The LGDELAY should be the base two log of the MPYDELAY. If\n"
"\t// this value is fractional, then round up to the nearest\n"
"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
1172,8 → 1198,7
"\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
"\toutput\treg\to_aux;\n"
"\n", 16, xtracbits, lgdelay(16,xtracbits),
bflydelay(16, xtracbits), bflydelay(16, xtracbits)+3,
lgdelay(16,xtracbits));
bflydelay(16, xtracbits), lgdelay(16,xtracbits));
fprintf(fp,
"\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
"\n"
1958,7 → 1983,7
verbose_flag = false, single_clock = false,
real_fft = false;
FILE *vmain;
std::string coredir = DEF_COREDIR, cmdline = "";
std::string coredir = DEF_COREDIR, cmdline = "", hdrname = "";
ROUND_T rounding = RND_CONVERGENT;
// ROUND_T rounding = RND_HALFUP;
 
1986,6 → 2011,14
case '1':
single_clock = true;
break;
case 'a':
if (argn+1 >= argc) {
printf("ERR: No header filename given\n\n");
usage(); exit(-1);
}
hdrname = argv[++argn];
j+= 200;
break;
case 'c':
if (argn+1 >= argc) {
printf("ERR: No extra number of coefficient bits given!\n\n");
2190,6 → 2223,57
}
}
 
if (hdrname.length() > 0) {
FILE *hdr = fopen(hdrname.c_str(), "w");
if (hdr == NULL) {
fprintf(stderr, "ERROR: Cannot open %s to create header file\n", hdrname.c_str());
perror("O/S Err:");
exit(-2);
}
 
fprintf(hdr, "/////////////////////////////////////////////////////////////////////////////\n");
fprintf(hdr, "//\n");
fprintf(hdr, "// Filename: %s\n", hdrname.c_str());
fprintf(hdr, "//\n");
fprintf(hdr, "// Project: %s\n", prjname);
fprintf(hdr, "//\n");
fprintf(hdr, "// Purpose: This simple header file captures the internal constants\n");
fprintf(hdr, "// within the FFT that were used to build it, for the purpose\n");
fprintf(hdr, "// of making C++ integration (and test bench testing) simpler. That\n");
fprintf(hdr, "// is, should the FFT change size, this will note that size change\n");
fprintf(hdr, "// and thus any test bench or other C++ program dependent upon\n");
fprintf(hdr, "// either the size of the FFT, the number of bits in or out of\n");
fprintf(hdr, "// it, etc., can pick up the changes in the defines found within\n");
fprintf(hdr, "// this file.\n");
fprintf(hdr, "//\n");
fprintf(hdr, "%s", creator);
fprintf(hdr, "//\n");
fprintf(hdr, "%s", cpyleft);
fprintf(hdr, "//\n"
"//\n"
"#ifndef %sFFTHDR_H\n"
"#define %sFFTHDR_H\n"
"\n"
"#define\t%sFFT_IWIDTH\t%d\n"
"#define\t%sFFT_OWIDTH\t%d\n"
"#define\t%sFFT_LGWIDTH\t%d\n"
"#define\t%sFFT_SIZE\t(1<<%sFFT_LGWIDTH)\n\n",
(inverse)?"I":"", (inverse)?"I":"",
(inverse)?"I":"", nbitsin,
(inverse)?"I":"", nbitsout,
(inverse)?"I":"", lgsize,
(inverse)?"I":"", (inverse)?"I":"");
if (!bitreverse)
fprintf(hdr, "#define\t%sFFT_SKIPS_BIT_REVERSE\n",
(inverse)?"I":"");
if (real_fft)
fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");
if (!single_clock)
fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");
fprintf(hdr, "\n" "#endif\n\n");
fclose(hdr);
}
 
{
std::string fname_string;
 
2301,27 → 2385,7
obits = maxbitsout;
 
// Always do a first stage
fprintf(vmain, "\n\n");
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);
fprintf(vmain, "\t%sfftstage_e%d%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", fftsize,
((dbg)&&(dbgstage == fftsize))?"_dbg":"",
xtracbits, obits+xtrapbits,
lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
fftsize);
fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d%s);\n", fftsize, fftsize, ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");
fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", fftsize,
xtracbits, obits+xtrapbits,
lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
fftsize);
fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);
fprintf(vmain, "\n\n");
 
{
std::string fname;
char numstr[12];
bool mpystage;
 
// Last two stages are always non-multiply stages
2328,6 → 2392,30
// since the multiplies can be done by adds
mpystage = ((lgtmp-2) <= nummpy);
 
if (mpystage)
fprintf(vmain, "\t// A hardware optimized FFT stage\n");
fprintf(vmain, "\n\n");
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", fftsize, fftsize);
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, fftsize, fftsize);
fprintf(vmain, "\t%sfftstage_e%d%s\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_e%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", fftsize,
((dbg)&&(dbgstage == fftsize))?"_dbg":"",
xtracbits, obits+xtrapbits,
lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
fftsize);
fprintf(vmain, "\t\t\t(~i_rst), i_left, w_e%d, w_s%d%s);\n", fftsize, fftsize, ((dbg)&&(dbgstage == fftsize))?", o_dbg":"");
fprintf(vmain, "\t%sfftstage_o%d\t#(IWIDTH,IWIDTH+%d,%d,%d,%d,%d,0)\tstage_o%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", fftsize,
xtracbits, obits+xtrapbits,
lgsize, lgtmp-2, lgdelay(nbits,xtracbits),
fftsize);
fprintf(vmain, "\t\t\t(~i_rst), i_right, w_o%d, w_os%d);\n", fftsize, fftsize);
fprintf(vmain, "\n\n");
 
 
std::string fname;
char numstr[12];
 
fname = coredir + "/";
if (inverse) fname += "i";
fname += "fftstage_e";
2357,30 → 2445,48
if ((maxbitsout > 0)&&(obits > maxbitsout))
obits = maxbitsout;
 
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n", tmp_size, tmp_size);
fprintf(vmain, "\twire\t[%d:0]\tw_e%d, w_o%d;\n", 2*(obits+xtrapbits)-1, tmp_size, tmp_size);
fprintf(vmain, "\t%sfftstage_e%d%s\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", tmp_size,
((dbg)&&(dbgstage == tmp_size))?"_dbg":"",
nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0,
tmp_size);
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size, ((dbg)&&(dbgstage == tmp_size))?", o_dbg":"");
fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", tmp_size,
nbits+xtrapbits, nbits+xtracbits+xtrapbits, obits+xtrapbits,
lgsize, lgtmp-2, lgdelay(nbits+xtrapbits,xtracbits), (dropbit)?0:0,
tmp_size);
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n", tmp_size<<1, tmp_size<<1, tmp_size, tmp_size);
fprintf(vmain, "\n\n");
 
{
std::string fname;
char numstr[12];
bool mpystage;
 
mpystage = ((lgtmp-2) <= nummpy);
 
if (mpystage)
fprintf(vmain, "\t// A hardware optimized FFT stage\n");
fprintf(vmain, "\twire\t\tw_s%d, w_os%d;\n",
tmp_size, tmp_size);
fprintf(vmain,"\twire\t[%d:0]\tw_e%d, w_o%d;\n",
2*(obits+xtrapbits)-1,
tmp_size, tmp_size);
fprintf(vmain, "\t%sfftstage_e%d%s\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_e%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", tmp_size,
((dbg)&&(dbgstage==tmp_size))?"_dbg":"",
nbits+xtrapbits,
nbits+xtracbits+xtrapbits,
obits+xtrapbits,
lgsize, lgtmp-2,
lgdelay(nbits+xtrapbits,xtracbits),
(dropbit)?0:0, tmp_size);
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_e%d, w_e%d, w_s%d%s);\n",
tmp_size<<1, tmp_size<<1,
tmp_size, tmp_size,
((dbg)&&(dbgstage == tmp_size))
?", o_dbg":"");
fprintf(vmain, "\t%sfftstage_o%d\t#(%d,%d,%d,%d,%d,%d,%d)\tstage_o%d(i_clk, i_rst, i_ce,\n",
(inverse)?"i":"", tmp_size,
nbits+xtrapbits,
nbits+xtracbits+xtrapbits,
obits+xtrapbits,
lgsize, lgtmp-2,
lgdelay(nbits+xtrapbits,xtracbits),
(dropbit)?0:0, tmp_size);
fprintf(vmain, "\t\t\t\t\t\tw_s%d, w_o%d, w_o%d, w_os%d);\n",
tmp_size<<1, tmp_size<<1,
tmp_size, tmp_size);
fprintf(vmain, "\n\n");
 
std::string fname;
char numstr[12];
 
fname = coredir + "/";
if (inverse) fname += "i";
fname += "fftstage_e";
2440,6 → 2546,8
obits = maxbitsout;
fprintf(vmain, "\twire\t\tw_s2;\n");
fprintf(vmain, "\twire\t[%d:0]\tw_e2, w_o2;\n", 2*obits-1);
if ((nbits+xtrapbits+1 == obits)&&(!dropbit))
printf("WARNING: SCALING OFF BY A FACTOR OF TWO--should\'ve dropped a bit in the last stage.\n");
fprintf(vmain, "\tdblstage\t#(%d,%d,%d)\tstage_2(i_clk, i_rst, i_ce,\n", nbits+xtrapbits, obits,(dropbit)?0:1);
fprintf(vmain, "\t\t\t\t\tw_s4, w_e4, w_o4, w_e2, w_o2, w_s2);\n");
 
/Makefile
47,6 → 47,8
all: fftgen
CORED := fft-core
OBJDR := $(CORED)/obj_dir
TESTSZ := 2048
BENCHD := ../bench/cpp
 
fftgen: fftgen.o
$(CXX) $< -o $@
66,13 → 68,13
#
.PHONY: fft
fft: fftgen
./fftgen -f 2048 -n 16 -p 4
./fftgen -f $(TESTSZ) -n 16 -p 6 -a $(BENCHD)/fftsize.h
cd $(CORED)/; verilator -cc fftmain.v
cd $(OBJDR); make -f Vfftmain.mk
 
.PHONY: ifft
ifft: fftgen
./fftgen -f 2048 -i -n 22 -p 4
./fftgen -f $(TESTSZ) -i -n 22 -p 6 -a $(BENCHD)/ifftsize.h
cd $(CORED)/; verilator -cc ifftmain.v
cd $(OBJDR); make -f Vifftmain.mk
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.