OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Diff between revs 2 and 5

Go to most recent revision | Show entire file | Details | Blame | View Log

Rev 2 Rev 5
Line 23... Line 23...
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"
"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
"// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n"
"// for more details.\n"
"// for more details.\n"
"//\n"
"//\n"
"// You should have received a copy of the GNU General Public License along\n"
"// You should have received a copy of the GNU General Public License along\n"
"// with this program.  If not, see <http://www.gnu.org/licenses/>.\n"
"// with this program.  (It's in the $(ROOT)/doc directory, run make with no\n"
 
"// target there if the PDF file isn\'t present.)  If not, see\n"
 
"// <http://www.gnu.org/licenses/> for a copy.\n"
 
"//\n"
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
"//             http://www.gnu.org/licenses/gpl.html\n"
"//             http://www.gnu.org/licenses/gpl.html\n"
"//\n"
"//\n"
"//\n"
"//\n"
"///////////////////////////////////////////////////////////////////////////\n";
"///////////////////////////////////////////////////////////////////////////\n";
Line 53... Line 56...
 
 
int     lgdelay(int nbits, int xtra) {
int     lgdelay(int nbits, int xtra) {
        int     cbits = nbits + xtra;
        int     cbits = nbits + xtra;
        int     delay = nbits + 2;
        int     delay = nbits + 2;
        if (nbits+1<cbits)
        if (nbits+1<cbits)
                delay = nbits+2;
                delay = nbits+4;
        else
        else
                delay = cbits+1;
                delay = cbits+3;
        return lgval(delay);
        return lgval(delay);
}
}
 
 
void    build_quarters(const char *fname) {
void    build_quarters(const char *fname) {
        FILE    *fp = fopen(fname, "w");
        FILE    *fp = fopen(fname, "w");
Line 74... Line 77...
"//\n"
"//\n"
"// Filename:   qtrstage.v\n"
"// Filename:   qtrstage.v\n"
"//             \n"
"//             \n"
"// Project:    %s\n"
"// Project:    %s\n"
"//\n"
"//\n"
"// Purpose:    This file is (almost) a Verilog source file.  It is meant to\n"
"// Purpose:    This file encapsulates the 4 point stage of a decimation in\n"
"//             be used by a FFT core compiler to generate FFTs which may be\n"
"//             frequency FFT.  This particular implementation is optimized\n"
"//             used as part of an FFT core.  Specifically, this file \n"
"//             so that all of the multiplies are accomplished by additions\n"
"//             encapsulates the options of a 4 point, decimation in\n"
"//             and multiplexers only.\n"
"//             frequency FFT-stage.  This particular stage is optimized so\n"
"//\n"
"//             that all of the multiplies are accomplished by additions and\n"
 
"//             mux'es.\n"
 
"//\n%s"
"//\n%s"
"//\n",
"//\n",
                prjname, creator);
                prjname, creator);
        fprintf(fp, "%s", cpyleft);
        fprintf(fp, "%s", cpyleft);
 
 
Line 155... Line 156...
"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"
"\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"
"\n"
"\n"
"\t\t\t// In sequence, clock = 1\n"
"\t\t\t// In sequence, clock = 1\n"
"\t\t\tif (pipeline[1])\n"
"\t\t\tif (pipeline[1])\n"
"\t\t\tbegin\n"
"\t\t\tbegin\n"
"\t\t\t ob_a <= { sum_r[(IWIDTH):(IWIDTH+1-OWIDTH)],\n"
                        "\t\t\t\tob_a <= { sum_r[(IWIDTH):(IWIDTH+1-OWIDTH)],\n"
"\t\t\t\t\t\tsum_i[(IWIDTH):(IWIDTH+1-OWIDTH)] };\n"
"\t\t\t\t\t\tsum_i[(IWIDTH):(IWIDTH+1-OWIDTH)] };\n"
"\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
"\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
"\t\t\t\tif (~ODD)\n"
"\t\t\t\tif (~ODD)\n"
"\t\t\t\tbegin\n"
"\t\t\t\tbegin\n"
"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
Line 201... Line 202...
"// Filename:   dblstage.v\n"
"// Filename:   dblstage.v\n"
"//\n"
"//\n"
"// Project:    %s\n"
"// Project:    %s\n"
"//\n"
"//\n"
"// Purpose:    This is part of an FPGA implementation that will process\n"
"// Purpose:    This is part of an FPGA implementation that will process\n"
"//             data at two samples per clock.  If you notice from the\n"
"//             the final stage of a decimate-in-frequency FFT, running\n"
"//             derivation of an FFT, the only time both even and odd\n"
"//             through the data at two samples per clock.  If you notice\n"
"//             samples are used at the same time is the first stage.\n"
"//             from the derivation of an FFT, the only time both even and\n"
"//             Therefore, after this stage and these twiddles, all of the\n"
"//             odd samples are used at the same time is in this stage.\n"
"//             other stages can run two stages at a time at one sample per\n"
"//             Therefore, other than this stage and these twiddles, all of\n"
"//             clock.\n"
"//             the other stages can run two stages at a time at one sample\n"
 
"//             per clock.\n"
"//\n"
"//\n"
"//             In this implementation, the output is valid one clock after\n"
"//             In this implementation, the output is valid one clock after\n"
"//             the input is valid.  The output also accumulates one bit\n"
"//             the input is valid.  The output also accumulates one bit\n"
"//             above and beyond the number of bits in the input.\n"
"//             above and beyond the number of bits in the input.\n"
"//             \n"
"//             \n"
Line 240... Line 242...
"\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"
"\tassign\ti_in_1r = i_right[(2*IWIDTH-1):(IWIDTH)]; \n"
"\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"
"\tassign\ti_in_1i = i_right[(IWIDTH-1):0]; \n"
"\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"
"\twire\t[(OWIDTH-1):0]\t\to_out_0r, o_out_0i,\n"
"\t\t\t\t\to_out_1r, o_out_1i;\n"
"\t\t\t\t\to_out_1r, o_out_1i;\n"
"\n"
"\n"
"\t// Don't forget that we accumulate a bit by adding two values together.\n"
        "\t// Don't forget that we accumulate a bit by adding two values\n"
"\t// Therefore our intermediate value must have one more bit than the\n"
        "\t// together. Therefore our intermediate value must have one more\n"
"\t// two originals.\n"
        "\t// bit than the two originals.\n"
"\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"
"\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"
"\n"
"\n"
"\talways @(posedge i_clk)\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n"
"\t\tbegin\n"
Line 255... Line 257...
"\t\t\t//\n"
"\t\t\t//\n"
"\t\t\tout_1r <= i_in_0r - i_in_1r;\n"
"\t\t\tout_1r <= i_in_0r - i_in_1r;\n"
"\t\t\tout_1i <= i_in_0i - i_in_1i;\n"
"\t\t\tout_1i <= i_in_0i - i_in_1i;\n"
"\t\tend\n"
"\t\tend\n"
"\n"
"\n"
"\t// Now, if the master control program doesn't want to keep all of our\n"
        "\t// Now, if the master control program doesn't want to keep all of\n"
"\t// bits, we can shift down to OWIDTH bits here.\n"
        "\t// our bits, we can shift down to OWIDTH bits here.\n"
"\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
"\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
"\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
"\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
"\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
"\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
"\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
"\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
"\n"
"\n"
Line 324... Line 326...
        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"
        "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n"
        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"
        "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n"
        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"
        "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n"
        "\tgenvar k;\n"
        "\tgenvar k;\n"
"\n"
"\n"
 
        "\t// If we were forced to stay within two\'s complement arithmetic,\n"
 
        "\t// taking the absolute value here would require an additional bit.\n"
 
        "\t// However, because our results are now unsigned, we can stay\n"
 
        "\t// within the number of bits given (for now).\n"
        "\talways @(posedge i_clk)\n"
        "\talways @(posedge i_clk)\n"
                "\t\tif (i_ce)\n"
                "\t\tif (i_ce)\n"
                "\t\tbegin\n"
                "\t\tbegin\n"
                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"
                        "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n"
                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"
                        "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n"
Line 429... Line 435...
"//     mem[11xxx1] = s_1[m]\n"
"//     mem[11xxx1] = s_1[m]\n"
"//     o_0[m] = mem[00xxx1]\n"
"//     o_0[m] = mem[00xxx1]\n"
"//     o_1[m] = mem[01xxx1]\n"
"//     o_1[m] = mem[01xxx1]\n"
"//     ...\n"
"//     ...\n"
"//\n"
"//\n"
 
"//     The answer is that, yes we can but: we need to use four memory banks\n"
 
"//     to do it properly.  These four banks are defined by the two bits\n"
 
"//     that determine the top and bottom of the correct address.  Larger\n"
 
"//     FFT\'s would require more memories.\n"
 
"//\n"
"//\n");
"//\n");
        fprintf(fp,
        fprintf(fp,
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
"\t\to_out_0, o_out_1, o_sync);\n"
"\t\to_out_0, o_out_1, o_sync);\n"
"\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"
"\tparameter\t\t\tLGSIZE=4, WIDTH=24;\n"
Line 578... Line 589...
 
 
        fprintf(fp,
        fprintf(fp,
"module\tbutterfly(i_clk, i_ce, i_coef, i_left, i_right, i_aux,\n"
"module\tbutterfly(i_clk, i_ce, i_coef, i_left, i_right, i_aux,\n"
"\t\to_left, o_right, o_aux);\n"
"\t\to_left, o_right, o_aux);\n"
"\t// Public changeable parameters ...\n"
"\t// Public changeable parameters ...\n"
"\tparameter IWIDTH=16,CWIDTH=IWIDTH,OWIDTH=IWIDTH;\n"
        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+4,OWIDTH=IWIDTH+1;\n"
"\t// Parameters specific to the core that should not be changed.\n"
"\t// Parameters specific to the core that should not be changed.\n"
"\tparameter    MPYDELAY=(IWIDTH+1 < CWIDTH)?(IWIDTH+2):(CWIDTH+1),\n"
        "\tparameter    MPYDELAY=5'd20, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
"\t\t\tSHIFT=0, ROUND=1;\n"
                        "\t\t\tSHIFT=0, ROUND=0;\n"
"\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
"\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
"\t// this value is fractional, then round up to the nearest\n"
"\t// this value is fractional, then round up to the nearest\n"
"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
"\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
"\tparameter    LGDELAY=5;\n"
        "\tparameter\tLGDELAY=5;\n"
"\tinput                i_clk, i_ce;\n"
        "\tinput\t\ti_clk, i_ce;\n"
"\tinput                [(2*CWIDTH-1):0] i_coef;\n"
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
"\tinput                [(2*IWIDTH-1):0] i_left, i_right;\n"
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
"\tinput                i_aux;\n"
        "\tinput\t\ti_aux;\n"
"\toutput       wire    [(2*OWIDTH-1):0] o_left, o_right;\n"
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
"\toutput       wire    o_aux;\n"
        "\toutput\twire o_aux;\n"
"\n"
"\n"
"\twire [(OWIDTH-1):0]  o_left_r, o_left_i, o_right_r, o_right_i;\n"
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
"\n"
"\n"
"\treg  [(2*IWIDTH-1):0]        r_left, r_right;\n"
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
"\treg                          r_aux, r_aux_2;\n"
        "\treg\t\t\t\tr_aux, r_aux_2;\n"
"\treg  [(2*CWIDTH-1):0]        r_coef, r_coef_2;\n"
        "\treg\t[(2*CWIDTH-1):0]\tr_coef, r_coef_2;\n"
"\twire [(CWIDTH-1):0]          r_coef_r, r_coef_i;\n"
        "\twire\tsigned\t[(CWIDTH-1):0]\tr_coef_r, r_coef_i;\n"
"\tassign       r_coef_r  = r_coef_2[ (2*CWIDTH-1):(CWIDTH)];\n"
        "\tassign\tr_coef_r  = r_coef_2[ (2*CWIDTH-1):(CWIDTH)];\n"
"\tassign       r_coef_i  = r_coef_2[ (CWIDTH-1):0];\n"
        "\tassign\tr_coef_i  = r_coef_2[ (  CWIDTH-1):0];\n"
"\twire [(IWIDTH-1):0]  r_left_r, r_left_i, r_right_r, r_right_i;\n"
        "\twire\tsigned\t[(IWIDTH-1):0]\tr_left_r, r_left_i, r_right_r, r_right_i;\n"
"\tassign       r_left_r  = i_left[ (2*IWIDTH-1):(IWIDTH)];\n"
        "\tassign\tr_left_r  = r_left[ (2*IWIDTH-1):(IWIDTH)];\n"
"\tassign       r_left_i  = i_left[ (IWIDTH-1):0];\n"
        "\tassign\tr_left_i  = r_left[ (IWIDTH-1):0];\n"
"\tassign       r_right_r = i_right[(2*IWIDTH-1):(IWIDTH)];\n"
        "\tassign\tr_right_r = r_right[(2*IWIDTH-1):(IWIDTH)];\n"
"\tassign       r_right_i = i_right[(IWIDTH-1):0];\n"
        "\tassign\tr_right_i = r_right[(IWIDTH-1):0];\n"
"\n"
"\n"
"\treg  [(IWIDTH):0]    r_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
        "\treg\tsigned\t[(IWIDTH):0]\tr_sum_r, r_sum_i, r_dif_r, r_dif_i;\n"
"\n"
"\n"
"\treg  [(LGDELAY-1):0] fifo_addr;\n"
"\treg  [(LGDELAY-1):0] fifo_addr;\n"
"\twire [(LGDELAY-1):0] fifo_read_addr;\n"
"\twire [(LGDELAY-1):0] fifo_read_addr;\n"
"\t/* verilator lint_off WIDTH */\n"
 
"\tassign fifo_read_addr = fifo_addr - MPYDELAY;\n"
"\tassign fifo_read_addr = fifo_addr - MPYDELAY;\n"
"\t/* verilator lint_on WIDTH */\n"
 
"\treg  [(2*IWIDTH+2):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"
"\treg  [(2*IWIDTH+2):0]        fifo_left [ 0:((1<<LGDELAY)-1)];\n"
"\n"
"\n");
 
        fprintf(fp,
 
        "\t// Set up the input to the multiply\n"
        "\talways @(posedge i_clk)\n"
        "\talways @(posedge i_clk)\n"
                "\t\tif (i_ce)\n"
                "\t\tif (i_ce)\n"
                "\t\tbegin\n"
                "\t\tbegin\n"
                        "\t\t\t// One clock just latches the inputs\n"
                        "\t\t\t// One clock just latches the inputs\n"
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
                        "\t\t\tr_left <= i_left;        // No change in # of bits\n"
Line 633... Line 644...
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
                        "\t\t\tr_dif_i <= r_left_i - r_right_i;\n"
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
                        "\t\t\t// Other inputs are simply delayed on second clock\n"
                        "\t\t\tr_aux_2 <= r_aux;\n"
                        "\t\t\tr_aux_2 <= r_aux;\n"
                        "\t\t\tr_coef_2<= r_coef;\n"
                        "\t\t\tr_coef_2<= r_coef;\n"
        "\t\tend\n"
        "\t\tend\n"
"\n"
"\n");
 
        fprintf(fp,
 
        "\t// Don\'t forget to record the even side, since it doesn\'t need\n"
 
        "\t// to be multiplied, but yet we still need the results in sync\n"
 
        "\t// with the answer when it is ready.\n"
        "\talways @(posedge i_clk)\n"
        "\talways @(posedge i_clk)\n"
                "\t\tif (i_ce)\n"
                "\t\tif (i_ce)\n"
                "\t\tbegin\n"
                "\t\tbegin\n"
                        "\t\t\t// Need to delay the sum side--nothing else happens\n"
                        "\t\t\t// Need to delay the sum side--nothing else happens\n"
                        "\t\t\t// to it, but it needs to stay synchronized with the\n"
                        "\t\t\t// to it, but it needs to stay synchronized with the\n"
                        "\t\t\t// right side.\n"
                        "\t\t\t// right side.\n"
                        "\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"
                        "\t\t\tfifo_left[fifo_addr] <= { r_aux_2, r_sum_r, r_sum_i };\n"
                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"
                        "\t\t\tfifo_addr <= fifo_addr + 1;\n"
                "\t\tend\n"
                "\t\tend\n"
"\n"
"\n"
        "\twire [(CWIDTH-1):0]  ir_coef_r, ir_coef_i;\n"
        "\twire\tsigned\t[(CWIDTH-1):0] ir_coef_r, ir_coef_i;\n"
        "\tassign ir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
        "\tassign\tir_coef_r = r_coef_2[(2*CWIDTH-1):CWIDTH];\n"
        "\tassign ir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
        "\tassign\tir_coef_i = r_coef_2[(CWIDTH-1):0];\n"
        "\twire [(IWIDTH+CWIDTH+1+2-1):0]       p_one, p_two, p_three;\n"
        "\twire\tsigned\t[((IWIDTH+2)+(CWIDTH+1)-1):0]\tp_one, p_two, p_three;\n"
"\n"
"\n"
        "\t// Multiply output is always a width of IWIDTH+CWIDTH-1.  ALWAYS.\n"
"\n");
        "\t// We take care of dropping the width to OWIDTH in our routine\n"
        fprintf(fp,
        "\t// below, but this is the definition of a multiply.\n"
        "\t// Multiply output is always a width of the sum of the widths of\n"
"\n"
        "\t// the two inputs.  ALWAYS.  This is independent of the number of\n"
"\n"
        "\t// bits in p_one, p_two, or p_three.  These values needed to \n"
"\n"
        "\t// accumulate a bit (or two) each.  However, this approach to a\n"
"// This should really be based upon an IF\n"
        "\t// three multiply complex multiply cannot increase the total\n"
"// if (IWIDTH < CWIDTH) then ...\n"
        "\t// number of bits in our final output.  We\'ll take care of\n"
 
        "\t// dropping back down to the proper width, OWIDTH, in our routine\n"
 
        "\t// below.\n"
 
"\n"
 
"\n");
 
        fprintf(fp,
 
        "\t// We accomplish here \"Karatsuba\" multiplication.  That is,\n"
 
        "\t// by doing three multiplies we accomplish the work of four.\n"
 
        "\t// Let\'s prove to ourselves that this works ... We wish to\n"
 
        "\t// multiply: (a+jb) * (c+jd), where a+jb is given by\n"
 
        "\t//\ta + jb = r_dif_r + j r_dif_i, and\n"
 
        "\t//\tc + jd = ir_coef_r + j ir_coef_i.\n"
 
        "\t// We do this by calculating the intermediate products P1, P2,\n"
 
        "\t// and P3 as\n"
 
        "\t//\tP1 = ac\n"
 
        "\t//\tP2 = bd\n"
 
        "\t//\tP3 = (a + b) * (c + d)\n"
 
        "\t// and then complete our final answer with\n"
 
        "\t//\tac - bd = P1 - P2 (this checks)\n"
 
        "\t//\tad + bc = P3 - P2 - P1\n"
 
        "\t//\t        = (ac + bc + ad + bd) - bd - ac\n"
 
        "\t//\t        = bc + ad (this checks)\n"
 
"\n"
 
"\n");
 
        fprintf(fp,
 
        "\t// This should really be based upon an IF, such as in\n"
 
        "\t// if (IWIDTH < CWIDTH) then ...\n"
 
        "\t// However, this is the only (other) way I know to do it.\n"
        "\tgenerate\n"
        "\tgenerate\n"
        "\tif (CWIDTH < IWIDTH+1)\n"
        "\tif (CWIDTH < IWIDTH+1)\n"
        "\tbegin\n"
        "\tbegin\n"
                "\t\t// We need to pad these first two multiplies by an extra\n"
                "\t\t// We need to pad these first two multiplies by an extra\n"
                "\t\t// just to keep them aligned with the third, simpler,\n"
                "\t\t// bit just to keep them aligned with the third,\n"
                "\t\t// multiply.\n"
                "\t\t// simpler, multiply.\n"
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
                        "\t\t\t\tir_coef_i+ir_coef_r, r_dif_r + r_dif_i, p_three);\n"
                        "\t\t\t\tir_coef_i+ir_coef_r,\n"
 
                        "\t\t\t\tr_dif_r + r_dif_i,\n"
 
                        "\t\t\t\tp_three);\n"
        "\tend else begin\n"
        "\tend else begin\n"
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_two);\n"
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
                                "\t\t\t\tr_dif_r+r_dif_i,\n"
                                "\t\t\t\tr_dif_r+r_dif_i,\n"
                                "\t\t\t\tir_coef_i+ir_coef_r,\n"
                                "\t\t\t\tir_coef_i+ir_coef_r,\n"
                                "\t\t\t\tp_three);\n"
                                "\t\t\t\tp_three);\n"
        "\tend\n"
        "\tend\n"
        "\tendgenerate\n"
        "\tendgenerate\n"
"\n"
"\n");
 
        fprintf(fp,
 
        "\t// These values are held in memory and delayed during the\n"
 
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
 
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
 
        "\t// therefore, the left_x values need to be right shifted by\n"
 
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
 
        "\t// extension.\n"
        "\twire aux;\n"
        "\twire aux;\n"
        "\twire [(IWIDTH+CWIDTH):0]     left_i, left_r;\n"
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    fifo_i, fifo_r;\n"
        "\treg  [(2*IWIDTH+2):0]        fifo_read;\n"
        "\treg\t\t[(2*IWIDTH+2):0]      fifo_read;\n"
        "\tassign       left_r = { fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH){1'b0}} };\n"
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
        "\tassign       left_i = { fifo_read[((IWIDTH+1)-1):0], {(CWIDTH){1'b0}} };\n"
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
        "\tassign       aux = fifo_read[2*IWIDTH+2];\n"
        "\tassign\taux = fifo_read[2*IWIDTH+2];\n"
"\n"
"\n"
"\n"
"\n"
        "\treg [(CWIDTH+IWIDTH+3-1):0]  b_left_r, b_left_i, b_right_r, b_right_i, mpy_r, mpy_i;\n"
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"
        "\treg  [(CWIDTH+IWIDTH+3-1):0] rnd;\n"
                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
        "\tassign rnd = ((~ROUND)||(SHIFT==0))?\n"
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
        "\t\t\t({(CWIDTH+IWIDTH+3){1'b0}})\n"
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] rnd;\n"
        "\t\t\t: ({ {(OWIDTH+1+SHIFT){1'b0}},1'b1,{(CWIDTH+IWIDTH+3-2-OWIDTH-SHIFT){1'b0}} });\n"
        "\tgenerate\n"
 
        "\tif ((~ROUND)||(CWIDTH+IWIDTH-OWIDTH-SHIFT<1))\n"
 
                "\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"
 
        "\telse\n"
 
                "\t\tassign rnd = ({ {(OWIDTH+3+SHIFT){1'b0}},1'b1,\n"
 
                "\t\t\t\t{(CWIDTH+IWIDTH-OWIDTH-SHIFT-1){1'b0}} });\n"
 
        "\tendgenerate\n"
 
"\n");
 
        fprintf(fp,
        "\talways @(posedge i_clk)\n"
        "\talways @(posedge i_clk)\n"
                "\t\tif (i_ce)\n"
                "\t\tif (i_ce)\n"
                "\t\tbegin\n"
                "\t\tbegin\n"
                        "\t\t\t// First clock, recover all values\n"
                        "\t\t\t// First clock, recover all values\n"
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
 
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
 
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
 
                        "\t\t\t// extra bits we need to get rid of.)\n"
                        "\t\t\tmpy_r <= p_one - p_two;\n"
                        "\t\t\tmpy_r <= p_one - p_two;\n"
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
"\n"
"\n"
                        "\t\t\t// Second clock, round and latch for final clock\n"
                        "\t\t\t// Second clock, round and latch for final clock\n"
                        "\t\t\tb_right_r <= mpy_r + rnd;\n"
                        "\t\t\tb_right_r <= mpy_r + rnd;\n"
                        "\t\t\tb_right_i <= mpy_i + rnd;\n"
                        "\t\t\tb_right_i <= mpy_i + rnd;\n"
                        "\t\t\tb_left_r <= { {2{left_r[(IWIDTH+CWIDTH)]}},left_r } + rnd;\n"
                        "\t\t\tb_left_r <= { {2{fifo_r[(IWIDTH+CWIDTH)]}},fifo_r } + rnd;\n"
                        "\t\t\tb_left_i <= { {2{left_i[(IWIDTH+CWIDTH)]}},left_i } + rnd;\n"
                        "\t\t\tb_left_i <= { {2{fifo_i[(IWIDTH+CWIDTH)]}},fifo_i } + rnd;\n"
                        "\t\t\to_aux <= aux;\n"
                        "\t\t\to_aux <= aux;\n"
                "\t\tend\n"
                "\t\tend\n"
"\n"
"\n");
 
        fprintf(fp,
        "\t// Final clock--clock and remove unnecessary bits.\n"
        "\t// Final clock--clock and remove unnecessary bits.\n"
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to OWIDTH,\n"
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
        "\t// and SHIFT by SHIFT bits in the process.\n"
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"
        "\t// them, but the actual values will never fill all these bits.\n"
        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"
        "\t// In particular, we only need:\n"
        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH+2-SHIFT):(CWIDTH+IWIDTH+2-SHIFT-OWIDTH+1)];\n"
        "\t//\t IWIDTH bits for the input\n"
"\n"
        "\t//\t     +1 bit for the add/subtract\n"
 
        "\t//\t+CWIDTH bits for the coefficient multiply\n"
 
        "\t//\t     +1 bit for the add/subtract in the complex multiply\n"
 
        "\t//\t ------\n"
 
        "\t//\t (IWIDTH+CWIDTH+2) bits at full precision.\n"
 
        "\t//\n"
 
        "\t// However, the coefficient multiply multiplied by a maximum value\n"
 
        "\t// of 2^(CWIDTH-2).  Thus, we only have\n"
 
        "\t//\t   IWIDTH bits for the input\n"
 
        "\t//\t       +1 bit for the add/subtract\n"
 
        "\t//\t+CWIDTH-2 bits for the coefficient multiply\n"
 
        "\t//\t       +1 (optional) bit for the add/subtract in the cpx mpy.\n"
 
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
 
        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"
 
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
 
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
 
        "\t// SHIFT) we accomplish that here.\n"
 
        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
 
        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
 
        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
 
        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
 
"\n"
 
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
 
        "\t// complement numbers per output word, so that each output word\n"
 
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
 
        "\t// portion and the bottom half being the imaginary portion.\n"
        "\tassign       o_left = { o_left_r, o_left_i };\n"
        "\tassign       o_left = { o_left_r, o_left_i };\n"
        "\tassign       o_right= { o_right_r,o_right_i};\n"
        "\tassign       o_right= { o_right_r,o_right_i};\n"
"\n"
"\n"
"endmodule\n");
"endmodule\n");
        fclose(fp);
        fclose(fp);
Line 905... Line 993...
                        "\t\t\t\toB <= oB + 1;\n"
                        "\t\t\t\toB <= oB + 1;\n"
                        "\t\t\t\to_sync <= 1'b0;\n"
                        "\t\t\t\to_sync <= 1'b0;\n"
                "\t\t\tend else\n"
                "\t\t\tend else\n"
                        "\t\t\t\to_sync <= 1'b0;\n"
                        "\t\t\t\to_sync <= 1'b0;\n"
        "\t\tend\n"
        "\t\tend\n"
"\n"
"\n", (inv)?"i":"");
 
        fprintf(fstage,
"\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
"\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
"\t\t\t.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
"\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
"\t\tbfly(i_clk, (b_ce&i_ce), ib_c,\n"
"\t\tbfly(i_clk, (b_ce&i_ce), ib_c,\n"
"\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n"
"\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n"
"endmodule;\n",
"endmodule;\n",
        (inv)?"i":"");
        lgdelay(nbits, xtra), (1<xtra)?(nbits+4):(nbits+xtra+3));
}
}
 
 
void    usage(void) {
void    usage(void) {
        fprintf(stderr,
        fprintf(stderr,
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"
"USAGE:\tfftgen [-f <size>] [-d dir] [-c cbits] [-n nbits] [-m mxbits] [-s01]\n"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.