OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Diff between revs 28 and 29

Go to most recent revision | Show entire file | Details | Blame | View Log

Rev 28 Rev 29
Line 1... Line 1...
/////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
//
// Filename:    fftgen.cpp
// Filename:    fftgen.cpp
//
//
// Project:     A Doubletime Pipelined FFT
// Project:     A Doubletime Pipelined FFT
//
//
Line 21... Line 21...
//              make in the documents directory to build it.
//              make in the documents directory to build it.
//
//
// Creator:     Dan Gisselquist, Ph.D.
// Creator:     Dan Gisselquist, Ph.D.
//              Gisselquist Tecnology, LLC
//              Gisselquist Tecnology, LLC
//
//
///////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
//
// Copyright (C) 2015, Gisselquist Technology, LLC
// Copyright (C) 2015, Gisselquist Technology, LLC
//
//
// This program is free software (firmware): you can redistribute it and/or
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of  the GNU General Public License as published
// modify it under the terms of  the GNU General Public License as published
Line 44... Line 44...
//
//
// License:     GPL, v3, as defined and found on www.gnu.org,
// License:     GPL, v3, as defined and found on www.gnu.org,
//              http://www.gnu.org/licenses/gpl.html
//              http://www.gnu.org/licenses/gpl.html
//
//
//
//
///////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
//
//
//
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#include <unistd.h>
#include <unistd.h>
Line 62... Line 62...
#define DEF_NBITSIN     16
#define DEF_NBITSIN     16
#define DEF_COREDIR     "fft-core"
#define DEF_COREDIR     "fft-core"
#define DEF_XTRACBITS   4
#define DEF_XTRACBITS   4
#define DEF_NMPY        0
#define DEF_NMPY        0
#define DEF_XTRAPBITS   0
#define DEF_XTRAPBITS   0
 
#define USE_OLD_MULTIPLY        false
 
 
 
// To coordinate testing, it helps to have some defines in our header file that
 
// are common with the default parameters found within the various subroutines.
 
// We'll define those common parameters here.  These values, however, have no
 
// effect on anything other than bench testing.  They do, though, allow us to
 
// bench test exact copies of what is going on within the FFT when necessary
 
// in order to find problems.
 
// First, parameters for the new multiply based upon the bi-multiply structure
 
// (2-bits/2-tableau rows at a time).
 
#define TST_LONGBIMPY_AW        16
 
#define TST_LONGBIMPY_BW        20      // Leave undefined to match AW
 
 
 
//  We also include parameters for the shift add multiply
 
#define TST_SHIFTADDMPY_AW      16
 
#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW
 
 
 
// Now for parameters matching the butterfly
 
#define TST_BUTTERFLY_IWIDTH    16
 
#define TST_BUTTERFLY_CWIDTH    20
 
#define TST_BUTTERFLY_OWIDTH    17
 
 
 
// Now for parameters matching the qtrstage
 
#define TST_QTRSTAGE_IWIDTH     16
 
#define TST_QTRSTAGE_LGWIDTH    8
 
 
 
// Parameters for the dblstage
 
#define TST_DBLSTAGE_IWIDTH     16
 
#define TST_DBLSTAGE_SHIFT      0
 
 
 
// Now for parameters matching the dblreverse stage
 
#define TST_DBLREVERSE_LGSIZE   5
 
 
typedef enum {
typedef enum {
        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
} ROUND_T;
} ROUND_T;
 
 
const char      cpyleft[] =
const char      cpyleft[] =
"///////////////////////////////////////////////////////////////////////////\n"
"////////////////////////////////////////////////////////////////////////////////\n"
"//\n"
"//\n"
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
"//\n"
"//\n"
"// This program is free software (firmware): you can redistribute it and/or\n"
"// This program is free software (firmware): you can redistribute it and/or\n"
"// modify it under the terms of  the GNU General Public License as published\n"
"// modify it under the terms of  the GNU General Public License as published\n"
Line 91... Line 123...
"//\n"
"//\n"
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
"// License:    GPL, v3, as defined and found on www.gnu.org,\n"
"//             http://www.gnu.org/licenses/gpl.html\n"
"//             http://www.gnu.org/licenses/gpl.html\n"
"//\n"
"//\n"
"//\n"
"//\n"
"///////////////////////////////////////////////////////////////////////////\n";
"////////////////////////////////////////////////////////////////////////////////\n";
const char      prjname[] = "A Doubletime Pipelined FFT";
const char      prjname[] = "A Doubletime Pipelined FFT";
const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"
const char      creator[] =     "// Creator:    Dan Gisselquist, Ph.D.\n"
                                "//             Gisselquist Tecnology, LLC\n";
                                "//             Gisselquist Tecnology, LLC\n";
 
 
int     lgval(int vl) {
int     lgval(int vl) {
Line 115... Line 147...
}
}
 
 
int     bflydelay(int nbits, int xtra) {
int     bflydelay(int nbits, int xtra) {
        int     cbits = nbits + xtra;
        int     cbits = nbits + xtra;
        int     delay;
        int     delay;
 
 
 
        if (USE_OLD_MULTIPLY) {
        if (nbits+1<cbits)
        if (nbits+1<cbits)
                delay = nbits+4;
                delay = nbits+4;
        else
        else
                delay = cbits+3;
                delay = cbits+3;
 
        } else {
 
                int     na=nbits+2, nb=cbits+1;
 
                if (nb<na) {
 
                        int tmp = nb;
 
                        nb = na; na = tmp;
 
                } delay = ((na)/2+(na&1)+2);
 
        }
        return delay;
        return delay;
}
}
 
 
int     lgdelay(int nbits, int xtra) {
int     lgdelay(int nbits, int xtra) {
        // The butterfly code needs to compare a valid address, of this
        // The butterfly code needs to compare a valid address, of this
Line 513... Line 554...
                (dbg)?"_dbg":"", prjname, creator);
                (dbg)?"_dbg":"", prjname, creator);
        fprintf(fp, "%s", cpyleft);
        fprintf(fp, "%s", cpyleft);
 
 
        fprintf(fp,
        fprintf(fp,
"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
"module\tqtrstage%s(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync%s);\n"
        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"
        "\tparameter    IWIDTH=%d, OWIDTH=IWIDTH+1;\n"
        "\t// Parameters specific to the core that should be changed when this\n"
        "\t// Parameters specific to the core that should be changed when this\n"
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
        "\t// spans must use the fftdoubles stage.\n"
        "\t// spans must use the fftdoubles stage.\n"
        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0;\n"
        "\tparameter\tLGWIDTH=%d, ODD=0, INVERSE=0,SHIFT=0;\n"
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
        "\toutput\treg                          o_sync;\n"
        "\toutput\treg                          o_sync;\n"
        "\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");
        "\t\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"", TST_QTRSTAGE_IWIDTH,
 
        TST_QTRSTAGE_LGWIDTH);
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_data[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
                        "\t\t\t\t\to_data[(OWIDTH-1):(OWIDTH-16)] };\n"
"\n");
"\n");
        }
        }
Line 726... Line 768...
"//\n", (dbg)?"_dbg":"", prjname, creator);
"//\n", (dbg)?"_dbg":"", prjname, creator);
 
 
        fprintf(fp, "%s", cpyleft);
        fprintf(fp, "%s", cpyleft);
        fprintf(fp,
        fprintf(fp,
"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"
"module\tdblstage%s(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync%s);\n"
        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"
        "\tparameter\tIWIDTH=%d,OWIDTH=IWIDTH+1, SHIFT=%d;\n"
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
        "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
        "\toutput\treg\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
        "\toutput\treg\t\t\to_sync;\n"
        "\toutput\treg\t\t\to_sync;\n"
        "\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"");
        "\n", (dbg)?"_dbg":"", (dbg)?", o_dbg":"",
 
        TST_DBLSTAGE_IWIDTH, TST_DBLSTAGE_SHIFT);
 
 
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
        if (dbg) { fprintf(fp, "\toutput\twire\t[33:0]\t\t\to_dbg;\n"
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
                "\tassign\to_dbg = { ((o_sync)&&(i_ce)), i_ce, o_left[(2*OWIDTH-1):(2*OWIDTH-16)],\n"
                        "\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"
                        "\t\t\t\t\to_left[(OWIDTH-1):(OWIDTH-16)] };\n"
"\n");
"\n");
Line 871... Line 914...
"//\n", prjname, creator);
"//\n", prjname, creator);
 
 
        fprintf(fp, "%s", cpyleft);
        fprintf(fp, "%s", cpyleft);
        fprintf(fp,
        fprintf(fp,
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
"module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n"
        "\tparameter\tAWIDTH=16,BWIDTH=AWIDTH;\n"
        "\tparameter\tAWIDTH=%d,BWIDTH=", TST_SHIFTADDMPY_AW);
 
#ifdef  TST_SHIFTADDMPY_BW
 
        fprintf(fp, "%d;\n", TST_SHIFTADDMPY_BW);
 
#else
 
        fprintf(fp, "AWIDTH;\n");
 
#endif
 
        fprintf(fp,
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
        "\tinput\t\t[(AWIDTH-1):0]\t\ti_a;\n"
        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
        "\tinput\t\t[(BWIDTH-1):0]\t\ti_b;\n"
        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
        "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n"
"\n"
"\n"
Line 932... Line 981...
"endmodule\n");
"endmodule\n");
 
 
        fclose(fp);
        fclose(fp);
}
}
 
 
 
void    build_bimpy(const char *fname) {
 
        FILE    *fp = fopen(fname, "w");
 
        if (NULL == fp) {
 
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
 
                perror("O/S Err was:");
 
                return;
 
        }
 
 
 
        fprintf(fp,
 
"////////////////////////////////////////////////////////////////////////////////\n"
 
"//\n"
 
"// Filename:   %s\n"
 
"//\n"
 
"// Project:    %s\n"
 
"//\n"
 
"// Purpose:    A simple 2-bit multiply based upon the fact that LUT's allow\n"
 
"//             6-bits of input.  In other words, I could build a 3-bit\n"
 
"//             multiply from 6 LUTs (5 actually, since the first could have\n"
 
"//             two outputs).  This would allow multiplication of three bit\n"
 
"//             digits, save only for the fact that you would need two bits\n"
 
"//             of carry.  The bimpy approach throttles back a bit and does\n"
 
"//             a 2x2 bit multiply in a LUT, guaranteeing that it will never\n"
 
"//             carry more than one bit.  While this multiply is hardware\n"
 
"//             independent (and can still run under Verilator therefore),\n"
 
"//             it is really motivated by trying to optimize for a specific\n"
 
"//             piece of hardware (Xilinx-7 series ...) that has at least\n"
 
"//             4-input LUT's with carry chains.\n"
 
"//\n"
 
"//\n"
 
"//\n%s"
 
"//\n", fname, prjname, creator);
 
 
 
        fprintf(fp, "%s", cpyleft);
 
        fprintf(fp,
 
"module bimpy(i_clk, i_ce, i_a, i_b, o_r);\n"
 
"\tparameter\tBW=18, // Number of bits in i_b\n"
 
"\t\t\tLUTB=2; // Number of bits in i_a for our LUT multiply\n"
 
"\tinput\t\t\t\ti_clk, i_ce;\n"
 
"\tinput\t\t[(LUTB-1):0]\ti_a;\n"
 
"\tinput\t\t[(BW-1):0]\ti_b;\n"
 
"\toutput\treg\t[(BW+LUTB-1):0] o_r;\n"
 
"\n"
 
"\twire [(BW+LUTB-2):0] w_r;\n"
 
"\twire [(BW+LUTB-3):1] c;\n"
 
"\n"
 
"\tassign\tw_r =  { ((i_a[1])?i_b:{(BW){1'b0}}), 1'b0 }\n"
 
"\t\t\t\t^ { 1'b0, ((i_a[0])?i_b:{(BW){1'b0}}) };\n"
 
"\tassign\tc = { ((i_a[1])?i_b[(BW-2):0]:{(BW-1){1'b0}}) }\n"
 
"\t\t\t& ((i_a[0])?i_b[(BW-1):1]:{(BW-1){1'b0}});\n"
 
"\n"
 
"\talways @(posedge i_clk)\n"
 
"\t\tif (i_ce)\n"
 
"\t\t\to_r <= w_r + { c, 2'b0 };\n"
 
"\n"
 
"endmodule\n");
 
 
 
        fclose(fp);
 
}
 
 
 
void    build_longbimpy(const char *fname) {
 
        FILE    *fp = fopen(fname, "w");
 
        if (NULL == fp) {
 
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
 
                perror("O/S Err was:");
 
                return;
 
        }
 
 
 
        fprintf(fp,
 
"////////////////////////////////////////////////////////////////////////////////\n"
 
"//\n"
 
"// Filename:   %s\n"
 
"//\n"
 
"// Project:    %s\n"
 
"//\n"
 
"// Purpose:    A portable shift and add multiply, built with the knowledge\n"
 
"//             of the existence of a six bit LUT and carry chain.  That\n"
 
"//             knowledge allows us to multiply two bits from one value\n"
 
"//             at a time against all of the bits of the other value.  This\n"
 
"//             sub multiply is called the bimpy.\n"
 
"//\n"
 
"//             For minimal processing delay, make the first parameter\n"
 
"//             the one with the least bits, so that AWIDTH <= BWIDTH.\n"
 
"//\n"
 
"//\n"
 
"//\n%s"
 
"//\n", fname, prjname, creator);
 
 
 
        fprintf(fp, "%s", cpyleft);
 
        fprintf(fp,
 
"module longbimpy(i_clk, i_ce, i_a, i_b, o_r);\n"
 
        "\tparameter    AW=%d,  // The width of i_a, min width is 5\n"
 
                        "\t\t\tBW=", TST_LONGBIMPY_AW);
 
#ifdef  TST_LONGBIMPY_BW
 
        fprintf(fp, "%d", TST_LONGBIMPY_BW);
 
#else
 
        fprintf(fp, "AW");
 
#endif
 
 
 
        fprintf(fp, ",  // The width of i_b, can be anything\n"
 
                        "\t\t\t// The following three parameters should not be changed\n"
 
                        "\t\t\t// by any implementation, but are based upon hardware\n"
 
                        "\t\t\t// and the above values:\n"
 
                        "\t\t\tOW=AW+BW,        // The output width\n"
 
                        "\t\t\tIW=(AW+1)&(-2),  // Internal width of A\n"
 
                        "\t\t\tLUTB=2,  // How many bits we can multiply by at once\n"
 
                        "\t\t\tTLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau\n"
 
        "\tinput\t\t\t\ti_clk, i_ce;\n"
 
        "\tinput\t\t[(AW-1):0]\ti_a;\n"
 
        "\tinput\t\t[(BW-1):0]\ti_b;\n"
 
        "\toutput\treg\t[(AW+BW-1):0]\to_r;\n"
 
"\n"
 
        "\treg\t[(IW-1):0]\tu_a;\n"
 
        "\treg\t[(BW-1):0]\tu_b;\n"
 
        "\treg\t\t\tsgn;\n"
 
"\n"
 
        "\treg\t[(IW-1-2*(LUTB)):0]\tr_a[0:(TLEN-3)];\n"
 
        "\treg\t[(BW-1):0]\t\tr_b[0:(TLEN-3)];\n"
 
        "\treg\t[(TLEN-1):0]\t\tr_s;\n"
 
        "\treg\t[(IW+BW-1):0]\t\tacc[0:(TLEN-2)];\n"
 
        "\tgenvar k;\n"
 
"\n"
 
        "\t// First step:\n"
 
        "\t// Switch to unsigned arithmetic for our multiply, keeping track\n"
 
        "\t// of the along the way.  We'll then add the sign again later at\n"
 
        "\t// the end.\n"
 
        "\t//\n"
 
        "\t// If we were forced to stay within two's complement arithmetic,\n"
 
        "\t// taking the absolute value here would require an additional bit.\n"
 
        "\t// However, because our results are now unsigned, we can stay\n"
 
        "\t// within the number of bits given (for now).\n"
 
        "\tgenerate if (IW > AW)\n"
 
        "\tbegin\n"
 
                "\t\talways @(posedge i_clk)\n"
 
                        "\t\t\tif (i_ce)\n"
 
                        "\t\t\t\tu_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };\n"
 
        "\tend else begin\n"
 
                "\t\talways @(posedge i_clk)\n"
 
                        "\t\t\tif (i_ce)\n"
 
                        "\t\t\t\tu_a <= (i_a[AW-1])?(-i_a):(i_a);\n"
 
        "\tend endgenerate\n"
 
"\n"
 
        "\talways @(posedge i_clk)\n"
 
                "\t\tif (i_ce)\n"
 
                "\t\tbegin\n"
 
                        "\t\t\tu_b <= (i_b[BW-1])?(-i_b):(i_b);\n"
 
                        "\t\t\tsgn <= i_a[AW-1] ^ i_b[BW-1];\n"
 
                "\t\tend\n"
 
"\n"
 
        "\twire [(BW+LUTB-1):0] pr_a, pr_b;\n"
 
"\n"
 
        "\t//\n"
 
        "\t// Second step: First two 2xN products.\n"
 
        "\t//\n"
 
        "\t// Since we have no tableau of additions (yet), we can do both\n"
 
        "\t// of the first two rows at the same time and add them together.\n"
 
        "\t// For the next round, we'll then have a previous sum to accumulate\n"
 
        "\t// with new and subsequent product, and so only do one product at\n"
 
        "\t// a time can follow this--but the first clock can do two at a time.\n"
 
        "\tbimpy\t#(BW) lmpy_0(i_clk,i_ce,u_a[(  LUTB-1):   0], u_b, pr_a);\n"
 
        "\tbimpy\t#(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);\n"
 
        "\talways @(posedge i_clk)\n"
 
                "\t\tif (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];\n"
 
        "\talways @(posedge i_clk)\n"
 
                "\t\tif (i_ce) r_b[0] <= u_b;\n"
 
        "\talways @(posedge i_clk)\n"
 
                "\t\tif (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };\n"
 
        "\talways @(posedge i_clk) // One clk after p[0],p[1] become valid\n"
 
                "\t\tif (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}\n"
 
                        "\t\t\t  +{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };\n"
 
"\n"
 
        "\tgenerate // Keep track of intermediate values, before multiplying them\n"
 
        "\tif (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)\n"
 
        "\tbegin : gencopies\n"
 
                "\t\talways @(posedge i_clk)\n"
 
                "\t\tif (i_ce)\n"
 
                "\t\tbegin\n"
 
                        "\t\t\tr_a[k+1] <= { {(LUTB){1'b0}},\n"
 
                                "\t\t\t\tr_a[k][(IW-1-(2*LUTB)):LUTB] };\n"
 
                        "\t\t\tr_b[k+1] <= r_b[k];\n"
 
                        "\t\tend\n"
 
        "\tend endgenerate\n"
 
"\n"
 
        "\tgenerate // The actual multiply and accumulate stage\n"
 
        "\tif (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)\n"
 
        "\tbegin : genstages\n"
 
                "\t\t// First, the multiply: 2-bits times BW bits\n"
 
                "\t\twire\t[(BW+LUTB-1):0] genp;\n"
 
                "\t\tbimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);\n"
 
"\n"
 
                "\t\t// Then the accumulate step -- on the next clock\n"
 
                "\t\talways @(posedge i_clk)\n"
 
                        "\t\t\tif (i_ce)\n"
 
                                "\t\t\t\tacc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},\n"
 
                                        "\t\t\t\t\tgenp, {{(LUTB*(k+2))}{1'b0}} };\n"
 
        "\tend endgenerate\n"
 
"\n"
 
        "\twire [(IW+BW-1):0]   w_r;\n"
 
        "\tassign\tw_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];\n"
 
        "\talways @(posedge i_clk)\n"
 
                "\t\tif (i_ce)\n"
 
                        "\t\t\to_r <= w_r[(AW+BW-1):0];\n"
 
"\n"
 
"endmodule\n");
 
 
 
        fclose(fp);
 
}
 
 
void    build_dblreverse(const char *fname) {
void    build_dblreverse(const char *fname) {
        FILE    *fp = fopen(fname, "w");
        FILE    *fp = fopen(fname, "w");
        if (NULL == fp) {
        if (NULL == fp) {
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
                perror("O/S Err was:");
                perror("O/S Err was:");
Line 1012... Line 1268...
"//\n"
"//\n"
"//\n");
"//\n");
        fprintf(fp,
        fprintf(fp,
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
"module dblreverse(i_clk, i_rst, i_ce, i_in_0, i_in_1,\n"
        "\t\to_out_0, o_out_1, o_sync);\n"
        "\t\to_out_0, o_out_1, o_sync);\n"
        "\tparameter\t\t\tLGSIZE=5, WIDTH=24;\n"
        "\tparameter\t\t\tLGSIZE=%d, WIDTH=24;\n"
        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
        "\tinput\t\t\t\ti_clk, i_rst, i_ce;\n"
        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
        "\tinput\t\t[(2*WIDTH-1):0]\ti_in_0, i_in_1;\n"
        "\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
        "\toutput\twire\t[(2*WIDTH-1):0]\to_out_0, o_out_1;\n"
        "\toutput\treg\t\t\to_sync;\n"
        "\toutput\treg\t\t\to_sync;\n", TST_DBLREVERSE_LGSIZE);
 
 
 
        fprintf(fp,
"\n"
"\n"
        "\treg\t\t\tin_reset;\n"
        "\treg\t\t\tin_reset;\n"
        "\treg\t[(LGSIZE-1):0]\tiaddr;\n"
        "\treg\t[(LGSIZE-1):0]\tiaddr;\n"
        "\twire\t[(LGSIZE-3):0]\tbraddr;\n"
        "\twire\t[(LGSIZE-3):0]\tbraddr;\n"
"\n"
"\n"
Line 1181... Line 1439...
 
 
        fprintf(fp,
        fprintf(fp,
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
"module\tbutterfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
                "\t\to_left, o_right, o_aux);\n"
                "\t\to_left, o_right, o_aux);\n"
        "\t// Public changeable parameters ...\n"
        "\t// Public changeable parameters ...\n"
        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
        "\tparameter IWIDTH=%d,", TST_BUTTERFLY_IWIDTH);
 
#ifdef  TST_BUTTERFLY_CWIDTH
 
        fprintf(fp, "CWIDTH=%d,", TST_BUTTERFLY_CWIDTH);
 
#else
 
        fprintf(fp, "CWIDTH=IWIDTH+%d,", xtracbits);
 
#endif
 
#ifdef  TST_BUTTERFLY_OWIDTH
 
        fprintf(fp, "OWIDTH=%d;\n", TST_BUTTERFLY_OWIDTH);
 
#else
 
        fprintf(fp, "OWIDTH=IWIDTH+1;\n");
 
#endif
 
        fprintf(fp,
        "\t// Parameters specific to the core that should not be changed.\n"
        "\t// Parameters specific to the core that should not be changed.\n"
        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
        "\tparameter    MPYDELAY=%d'd%d,\n"
                        "\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"
                        "\t\t\tSHIFT=0, AUXLEN=(MPYDELAY+3);\n"
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
        "\t// this value is fractional, then round up to the nearest\n"
        "\t// this value is fractional, then round up to the nearest\n"
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
        "\tparameter\tLGDELAY=%d;\n"
        "\tparameter\tLGDELAY=%d;\n"
Line 1195... Line 1464...
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
        "\tinput\t\t[(2*CWIDTH-1):0] i_coef;\n"
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
        "\tinput\t\t[(2*IWIDTH-1):0] i_left, i_right;\n"
        "\tinput\t\ti_aux;\n"
        "\tinput\t\ti_aux;\n"
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
        "\toutput\twire [(2*OWIDTH-1):0] o_left, o_right;\n"
        "\toutput\treg\to_aux;\n"
        "\toutput\treg\to_aux;\n"
        "\n", 16, xtracbits, lgdelay(16,xtracbits),
        "\n", lgdelay(16,xtracbits), bflydelay(16, xtracbits),
        bflydelay(16, xtracbits), lgdelay(16,xtracbits));
                lgdelay(16,xtracbits));
        fprintf(fp,
        fprintf(fp,
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
        "\twire\t[(OWIDTH-1):0] o_left_r, o_left_i, o_right_r, o_right_i;\n"
"\n"
"\n"
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
        "\treg\t[(2*IWIDTH-1):0]\tr_left, r_right;\n"
        "\treg\t\t\t\tr_aux, r_aux_2;\n"
        "\treg\t\t\t\tr_aux, r_aux_2;\n"
Line 1292... Line 1561...
"\n");
"\n");
        fprintf(fp,
        fprintf(fp,
        "\t// This should really be based upon an IF, such as in\n"
        "\t// This should really be based upon an IF, such as in\n"
        "\t// if (IWIDTH < CWIDTH) then ...\n"
        "\t// if (IWIDTH < CWIDTH) then ...\n"
        "\t// However, this is the only (other) way I know to do it.\n"
        "\t// However, this is the only (other) way I know to do it.\n"
        "\tgenerate\n"
        "\tgenerate if (CWIDTH < IWIDTH+1)\n"
        "\tif (CWIDTH < IWIDTH+1)\n"
 
        "\tbegin\n"
        "\tbegin\n"
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
                "\n"
                "\n"
                "\t\t// We need to pad these first two multiplies by an extra\n"
                "\t\t// We need to pad these first two multiplies by an extra\n"
                "\t\t// bit just to keep them aligned with the third,\n"
                "\t\t// bit just to keep them aligned with the third,\n"
                "\t\t// simpler, multiply.\n"
                "\t\t// simpler, multiply.\n"
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p1(i_clk, i_ce,\n"
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r},\n"
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r}, p_one);\n"
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p2(i_clk, i_ce,\n"
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i},\n"
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
                                "\t\t\t\t{r_dif_i[IWIDTH],r_dif_i}, p_two);\n"
                "\t\tshiftaddmpy #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
                "\t\t%s #(CWIDTH+1,IWIDTH+2) p3(i_clk, i_ce,\n"
                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"
                        "\t\t\t\tp3c_in, p3d_in, p_three);\n"
        "\tend else begin\n"
        "\tend else begin\n"
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
                "\t\twire\t[(CWIDTH):0]\tp3c_in;\n"
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
                "\t\twire\t[(IWIDTH+1):0]\tp3d_in;\n"
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
                "\t\tassign\tp3c_in = ir_coef_i + ir_coef_r;\n"
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
                "\t\tassign\tp3d_in = r_dif_r + r_dif_i;\n"
                "\n"
                "\n"
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p1a(i_clk, i_ce,\n"
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
                                "\t\t\t\t{r_dif_r[IWIDTH],r_dif_r},\n"
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
                                "\t\t\t\t{ir_coef_r[CWIDTH-1],ir_coef_r}, p_one);\n"
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p2a(i_clk, i_ce,\n"
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
                                "\t\t\t\t{r_dif_i[IWIDTH], r_dif_i},\n"
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
                                "\t\t\t\t{ir_coef_i[CWIDTH-1],ir_coef_i}, p_two);\n"
                "\t\tshiftaddmpy #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
                "\t\t%s #(IWIDTH+2,CWIDTH+1) p3a(i_clk, i_ce,\n"
                                "\t\t\t\tp3d_in, p3c_in, p_three);\n"
                                "\t\t\t\tp3d_in, p3c_in, p_three);\n"
        "\tend\n"
        "\tend\n"
        "\tendgenerate\n"
        "\tendgenerate\n"
"\n");
"\n",
 
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
 
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
 
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
 
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
 
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy",
 
                (USE_OLD_MULTIPLY)?"shiftaddmpy":"longbimpy");
        fprintf(fp,
        fprintf(fp,
        "\t// These values are held in memory and delayed during the\n"
        "\t// These values are held in memory and delayed during the\n"
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
        "\t// therefore, the left_x values need to be right shifted by\n"
        "\t// therefore, the left_x values need to be right shifted by\n"
Line 2268... Line 2542...
                                (inverse)?"I":"");
                                (inverse)?"I":"");
                if (real_fft)
                if (real_fft)
                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");
                        fprintf(hdr, "#define\tRL%sFFT\n\n", (inverse)?"I":"");
                if (!single_clock)
                if (!single_clock)
                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");
                        fprintf(hdr, "#define\tDBLCLK%sFFT\n\n", (inverse)?"I":"");
 
                if (USE_OLD_MULTIPLY)
 
                        fprintf(hdr, "#define\tUSE_OLD_MULTIPLY\n\n");
 
 
 
                fprintf(hdr, "// Parameters for testing the longbimpy\n");
 
                fprintf(hdr, "#define\tTST_LONGBIMPY_AW\t%d\n", TST_LONGBIMPY_AW);
 
#ifdef  TST_LONGBIMPY_BW
 
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\t%d\n\n", TST_LONGBIMPY_BW);
 
#else
 
                fprintf(hdr, "#define\tTST_LONGBIMPY_BW\tTST_LONGBIMPY_AW\n\n");
 
#endif
 
 
 
                fprintf(hdr, "// Parameters for testing the shift add multiply\n");
 
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_AW\t%d\n", TST_SHIFTADDMPY_AW);
 
#ifdef  TST_SHIFTADDMPY_BW
 
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\t%d\n\n", TST_SHIFTADDMPY_BW);
 
#else
 
                fprintf(hdr, "#define\tTST_SHIFTADDMPY_BW\tTST_SHIFTADDMPY_AW\n\n");
 
#endif
 
 
 
#define TST_SHIFTADDMPY_AW      16
 
#define TST_SHIFTADDMPY_BW      20      // Leave undefined to match AW
 
                fprintf(hdr, "// Parameters for testing the butterfly\n");
 
                fprintf(hdr, "#define\tTST_BUTTERFLY_IWIDTH\t%d\n", TST_BUTTERFLY_IWIDTH);
 
                fprintf(hdr, "#define\tTST_BUTTERFLY_CWIDTH\t%d\n", TST_BUTTERFLY_CWIDTH);
 
                fprintf(hdr, "#define\tTST_BUTTERFLY_OWIDTH\t%d\n", TST_BUTTERFLY_OWIDTH);
 
                fprintf(hdr, "#define\tTST_BUTTERFLY_MPYDELAY\t%d\n\n",
 
                                bflydelay(TST_BUTTERFLY_IWIDTH,
 
                                        TST_BUTTERFLY_CWIDTH-TST_BUTTERFLY_IWIDTH));
 
 
 
                fprintf(hdr, "// Parameters for testing the quarter stage\n");
 
                fprintf(hdr, "#define\tTST_QTRSTAGE_IWIDTH\t%d\n", TST_QTRSTAGE_IWIDTH);
 
                fprintf(hdr, "#define\tTST_QTRSTAGE_LGWIDTH\t%d\n\n", TST_QTRSTAGE_LGWIDTH);
 
 
 
                fprintf(hdr, "// Parameters for testing the double stage\n");
 
                fprintf(hdr, "#define\tTST_DBLSTAGE_IWIDTH\t%d\n", TST_DBLSTAGE_IWIDTH);
 
                fprintf(hdr, "#define\tTST_DBLSTAGE_SHIFT\t%d\n\n", TST_DBLSTAGE_SHIFT);
 
 
 
                fprintf(hdr, "// Parameters for testing the bit reversal stage\n");
 
                fprintf(hdr, "#define\tTST_DBLREVERSE_LGSIZE\t%d\n\n", TST_DBLREVERSE_LGSIZE);
                fprintf(hdr, "\n" "#endif\n\n");
                fprintf(hdr, "\n" "#endif\n\n");
                fclose(hdr);
                fclose(hdr);
        }
        }
 
 
        {
        {
Line 2614... Line 2927...
                if (nummpy > 0) {
                if (nummpy > 0) {
                        fname = coredir + "/hwbfly.v";
                        fname = coredir + "/hwbfly.v";
                        build_hwbfly(fname.c_str(), xtracbits, rounding);
                        build_hwbfly(fname.c_str(), xtracbits, rounding);
                }
                }
 
 
 
                {
 
                        // To make debugging easier, we build both of these
                fname = coredir + "/shiftaddmpy.v";
                fname = coredir + "/shiftaddmpy.v";
                build_multiply(fname.c_str());
                build_multiply(fname.c_str());
 
 
 
                        fname = coredir + "/longbimpy.v";
 
                        build_longbimpy(fname.c_str());
 
                        fname = coredir + "/bimpy.v";
 
                        build_bimpy(fname.c_str());
 
                }
 
 
                if ((dbg)&&(dbgstage == 4)) {
                if ((dbg)&&(dbgstage == 4)) {
                        fname = coredir + "/qtrstage_dbg.v";
                        fname = coredir + "/qtrstage_dbg.v";
                        build_quarters(fname.c_str(), rounding, true);
                        build_quarters(fname.c_str(), rounding, true);
                }
                }
                fname = coredir + "/qtrstage.v";
                fname = coredir + "/qtrstage.v";

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.