OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [sw/] [fftgen.cpp] - Diff between revs 22 and 23

Go to most recent revision | Show entire file | Details | Blame | View Log

Rev 22 Rev 23
Line 59... Line 59...
#include <ctype.h>
#include <ctype.h>
#include <assert.h>
#include <assert.h>
 
 
#define COREDIR "fft-core"
#define COREDIR "fft-core"
 
 
 
typedef enum {
 
        RND_TRUNCATE, RND_FROMZERO, RND_HALFUP, RND_CONVERGENT
 
} ROUND_T;
 
 
const char      cpyleft[] =
const char      cpyleft[] =
"///////////////////////////////////////////////////////////////////////////\n"
"///////////////////////////////////////////////////////////////////////////\n"
"//\n"
"//\n"
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
"//\n"
"//\n"
Line 123... Line 127...
        // more storage space to look for these values, but without a 
        // more storage space to look for these values, but without a 
        // redesign that's just what we'll deal with.
        // redesign that's just what we'll deal with.
        return lgval(bflydelay(nbits, xtra)+3);
        return lgval(bflydelay(nbits, xtra)+3);
}
}
 
 
void    build_quarters(const char *fname) {
void    build_truncator(const char *fname) {
 
        printf("TRUNCATING!\n");
 
        FILE    *fp = fopen(fname, "w");
 
        if (NULL == fp) {
 
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
 
                perror("O/S Err was:");
 
                return;
 
        }
 
 
 
        fprintf(fp,
 
"///////////////////////////////////////////////////////////////////////////\n"
 
"//\n"
 
"// Filename:   truncate.v\n"
 
"//             \n"
 
"// Project:    %s\n"
 
"//\n"
 
"// Purpose:    Truncation is one of several options that can be used\n"
 
"//             internal to the various FFT stages to drop bits from one \n"
 
"//             stage to the next.  In general, it is the simplest method\n"
 
"//             of dropping bits, since it requires only a bit selection.\n"
 
"//\n"
 
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
 
"//             since it tends to produce a DC bias in the result.  (Other\n"
 
"//             less pronounced biases may also exist.)\n"
 
"//\n"
 
"//             This particular version also registers the output with the\n"
 
"//             clock, so there will be a delay of one going through this\n"
 
"//             module.  This will keep it in line with the other forms of\n"
 
"//             rounding that can be used.\n"
 
"//\n"
 
"//\n%s"
 
"//\n",
 
                prjname, creator);
 
 
 
        fprintf(fp, "%s", cpyleft);
 
        fprintf(fp,
 
"module truncate(i_clk, i_ce, i_val, o_val);\n"
 
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
 
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
 
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
 
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
 
"\n"
 
        "\talways @(posedge i_clk)\n"
 
                "\t\tif (i_ce)\n"
 
                "\t\t\to_val <= i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
 
"\n"
 
"endmodule\n");
 
}
 
 
 
 
 
void    build_roundhalfup(const char *fname) {
 
        FILE    *fp = fopen(fname, "w");
 
        if (NULL == fp) {
 
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
 
                perror("O/S Err was:");
 
                return;
 
        }
 
 
 
        fprintf(fp,
 
"///////////////////////////////////////////////////////////////////////////\n"
 
"//\n"
 
"// Filename:   roundhalfup.v\n"
 
"//             \n"
 
"// Project:    %s\n"
 
"//\n"
 
"// Purpose:    Rounding half up is the way I was always taught to round in\n"
 
"//             school.  A one half value is added to the result, and then\n"
 
"//             the result is truncated.  When used in an FFT, this produces\n"
 
"//             less bias than the truncation method, although a bias still\n"
 
"//             tends to remain.\n"
 
"//\n"
 
"//\n%s"
 
"//\n",
 
                prjname, creator);
 
 
 
        fprintf(fp, "%s", cpyleft);
 
        fprintf(fp,
 
"module roundhalfup(i_clk, i_ce, i_val, o_val);\n"
 
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
 
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
 
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
 
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
 
"\n"
 
        "\t// Let's deal with two cases to be as general as we can be here\n"
 
        "\t//\n"
 
        "\t//   1. The desired output would lose no bits at all\n"
 
        "\t//   2. One or more bits would be dropped, so the rounding is simply\n"
 
        "\t//\t\ta matter of adding one to the bit about to be dropped,\n"
 
        "\t//\t\tmoving all halfway and above numbers up to the next\n"
 
        "\t//\t\tvalue.\n"
 
        "\tgenerate\n"
 
        "\tif (IWID-SHIFT == OWID)\n"
 
        "\tbegin // No truncation or rounding, output drops no bits\n"
 
"\n"
 
                "\t\talways @(posedge i_clk)\n"
 
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
 
"\n"
 
        "\tend else // if (IWID-SHIFT-1 >= OWID)\n"
 
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
 
                "\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
 
                "\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
 
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
 
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
 
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
 
"\n"
 
                "\t\talways @(posedge i_clk)\n"
 
                "\t\t\tif (i_ce)\n"
 
                "\t\t\tbegin\n"
 
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
 
                        "\t\t\t\t\to_val <= truncated_value;\n"
 
                        "\t\t\t\telse\n"
 
                        "\t\t\t\t\to_val <= rounded_up; // even value\n"
 
                "\t\t\tend\n"
 
"\n"
 
        "\tend\n"
 
        "\tendgenerate\n"
 
"\n"
 
"endmodule\n");
 
}
 
 
 
void    build_roundfromzero(const char *fname) {
 
        FILE    *fp = fopen(fname, "w");
 
        if (NULL == fp) {
 
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
 
                perror("O/S Err was:");
 
                return;
 
        }
 
 
 
        fprintf(fp,
 
"///////////////////////////////////////////////////////////////////////////\n"
 
"//\n"
 
"// Filename:   roundfromzero.v\n"
 
"//             \n"
 
"// Project:    %s\n"
 
"//\n"
 
"// Purpose:    Truncation is one of several options that can be used\n"
 
"//             internal to the various FFT stages to drop bits from one \n"
 
"//             stage to the next.  In general, it is the simplest method\n"
 
"//             of dropping bits, since it requires only a bit selection.\n"
 
"//\n"
 
"//             This form of rounding isn\'t really that great for FFT\'s,\n"
 
"//             since it tends to produce a DC bias in the result.  (Other\n"
 
"//             less pronounced biases may also exist.)\n"
 
"//\n"
 
"//             This particular version also registers the output with the\n"
 
"//             clock, so there will be a delay of one going through this\n"
 
"//             module.  This will keep it in line with the other forms of\n"
 
"//             rounding that can be used.\n"
 
"//\n"
 
"//\n%s"
 
"//\n",
 
                prjname, creator);
 
 
 
        fprintf(fp, "%s", cpyleft);
 
        fprintf(fp,
 
"module convround(i_clk, i_ce, i_val, o_val);\n"
 
        "\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
 
        "\tinput\t\t\t\t\ti_clk, i_ce;\n"
 
        "\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
 
        "\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
 
"\n"
 
        "\t// Let's deal with three cases to be as general as we can be here\n"
 
        "\t//\n"
 
        "\t//\t1. The desired output would lose no bits at all\n"
 
        "\t//\t2. One bit would be dropped, so the rounding is simply\n"
 
        "\t//\t\tadjusting the value to be the closer to zero in\n"
 
        "\t//\t\tcases of being halfway between two.  If identically\n"
 
        "\t//\t\tequal to a number, we just leave it as is.\n"
 
        "\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
 
        "\t//\t\tnormally unless we are rounding a value of exactly\n"
 
        "\t//\t\thalfway between the two.  In the halfway case, we\n"
 
        "\t//\t\tround away from zero.\n"
 
        "\tgenerate\n"
 
        "\tif (IWID-SHIFT == OWID)\n"
 
        "\tbegin // No truncation or rounding, output drops no bits\n"
 
"\n"
 
                "\t\talways @(posedge i_clk)\n"
 
                        "\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
 
"\n"
 
        "\tend else if (IWID-SHIFT-1 == OWID)\n"
 
        "\tbegin // Output drops one bit, can only add one or ... not.\n"
 
        "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
 
        "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
 
        "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
 
        "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
 
        "\t\tassign\tfirst_lost_bit = i_val[0];\n"
 
        "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
 
"\n"
 
        "\t\talways @(posedge i_clk)\n"
 
                "\t\t\tif (i_ce)\n"
 
                "\t\t\tbegin\n"
 
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
 
                                "\t\t\t\t\to_val <= truncated_value;\n"
 
                        "\t\t\t\telse if (sign_bit)\n"
 
                                "\t\t\t\t\to_val <= truncated_value;\n"
 
                        "\t\t\t\telse\n"
 
                                "\t\t\t\t\to_val <= rounded_up;\n"
 
                "\t\t\tend\n"
 
"\n"
 
        "\tend else // If there's more than one bit we are dropping\n"
 
        "\tbegin\n"
 
                "\t\twire\t[(OWID-1):0]\ttruncated_value, rounded_up;\n"
 
                "\t\twire\t\t\tsign_bit, first_lost_bit;\n"
 
                "\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
 
                "\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
 
                "\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
 
                "\t\tassign\tsign_bit = i_val[(IWID-1)];\n"
 
"\n"
 
                "\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
 
                "\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
 
"\n"
 
                "\t\talways @(posedge i_clk)\n"
 
                        "\t\t\tif (i_ce)\n"
 
                        "\t\t\tbegin\n"
 
                        "\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
 
                                "\t\t\t\t\to_val <= truncated_value;\n"
 
                        "\t\t\t\telse if (|other_lost_bits) // Round up to\n"
 
                                "\t\t\t\t\to_val <= rounded_up; // closest value\n"
 
                        "\t\t\t\telse if (sign_bit)\n"
 
                                "\t\t\t\t\to_val <= truncated_value;\n"
 
                        "\t\t\t\telse\n"
 
                                "\t\t\t\t\to_val <= rounded_up;\n"
 
                        "\t\t\tend\n"
 
        "\tend\n"
 
        "\tendgenerate\n"
 
"\n"
 
"endmodule\n");
 
}
 
 
 
void    build_convround(const char *fname) {
 
        printf("CONVERGENT--ROUNDING!\n");
 
        FILE    *fp = fopen(fname, "w");
 
        if (NULL == fp) {
 
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
 
                perror("O/S Err was:");
 
                return;
 
        }
 
 
 
        fprintf(fp,
 
"///////////////////////////////////////////////////////////////////////////\n"
 
"//\n"
 
"// Filename:   convround.v\n"
 
"//             \n"
 
"// Project:    %s\n"
 
"//\n"
 
"// Purpose:    A convergent rounding routine, also known as banker\'s\n"
 
"//             rounding, Dutch rounding, Gaussian rounding, unbiased\n"
 
"//             rounding, or ... more, at least according to Wikipedia.\n"
 
"//\n"
 
"//             This form of rounding works by rounding, when the direction\n"
 
"//             is in question, towards the nearest even value.\n"
 
"//\n"
 
"//\n%s"
 
"//\n",
 
                prjname, creator);
 
 
 
        fprintf(fp, "%s", cpyleft);
 
        fprintf(fp,
 
"module convround(i_clk, i_ce, i_val, o_val);\n"
 
"\tparameter\tIWID=16, OWID=8, SHIFT=0;\n"
 
"\tinput\t\t\t\t\ti_clk, i_ce;\n"
 
"\tinput\t\tsigned\t[(IWID-1):0]\ti_val;\n"
 
"\toutput\treg\tsigned\t[(OWID-1):0]\to_val;\n"
 
"\n"
 
"\t// Let's deal with three cases to be as general as we can be here\n"
 
"\t//\n"
 
"\t//\t1. The desired output would lose no bits at all\n"
 
"\t//\t2. One bit would be dropped, so the rounding is simply\n"
 
"\t//\t\tadjusting the value to be the nearest even number in\n"
 
"\t//\t\tcases of being halfway between two.  If identically\n"
 
"\t//\t\tequal to a number, we just leave it as is.\n"
 
"\t//\t3. Two or more bits would be dropped.  In this case, we round\n"
 
"\t//\t\tnormally unless we are rounding a value of exactly\n"
 
"\t//\t\thalfway between the two.  In the halfway case we round\n"
 
"\t//\t\tto the nearest even number.\n"
 
"\tgenerate\n"
 
"\tif (IWID-SHIFT == OWID)\n"
 
"\tbegin // No truncation or rounding, output drops no bits\n"
 
"\n"
 
"\t\talways @(posedge i_clk)\n"
 
"\t\t\tif (i_ce)\to_val <= i_val[(IWID-SHIFT-1):0];\n"
 
"\n"
 
"\tend else if (IWID-SHIFT-1 == OWID)\n"
 
"\tbegin // Output drops one bit, can only add one or ... not.\n"
 
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
 
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
 
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
 
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
 
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
 
"\t\tassign\tfirst_lost_bit = i_val[0];\n"
 
"\n"
 
"\t\talways @(posedge i_clk)\n"
 
"\t\t\tif (i_ce)\n"
 
"\t\t\tbegin\n"
 
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
 
"\t\t\t\t\to_val <= truncated_value;\n"
 
"\t\t\t\telse if (last_valid_bit)// Round up to nearest\n"
 
"\t\t\t\t\to_val <= rounded_up; // even value\n"
 
"\t\t\t\telse // else round down to the nearest\n"
 
"\t\t\t\t\to_val <= truncated_value; // even value\n"
 
"\t\t\tend\n"
 
"\n"
 
"\tend else // If there's more than one bit we are dropping\n"
 
"\tbegin\n"
 
"\t\twire\t[(OWID-1):0] truncated_value, rounded_up;\n"
 
"\t\twire\t\t\tlast_valid_bit, first_lost_bit;\n"
 
"\t\tassign\ttruncated_value=i_val[(IWID-1-SHIFT):(IWID-SHIFT-OWID)];\n"
 
"\t\tassign\trounded_up=truncated_value + {{(OWID-1){1'b0}}, 1'b1 };\n"
 
"\t\tassign\tlast_valid_bit = truncated_value[0];\n"
 
"\t\tassign\tfirst_lost_bit = i_val[(IWID-SHIFT-OWID-1)];\n"
 
"\n"
 
"\t\twire\t[(IWID-SHIFT-OWID-2):0]\tother_lost_bits;\n"
 
"\t\tassign\tother_lost_bits = i_val[(IWID-SHIFT-OWID-2):0];\n"
 
"\n"
 
"\t\talways @(posedge i_clk)\n"
 
"\t\t\tif (i_ce)\n"
 
"\t\t\tbegin\n"
 
"\t\t\t\tif (~first_lost_bit) // Round down / truncate\n"
 
"\t\t\t\t\to_val <= truncated_value;\n"
 
"\t\t\t\telse if (|other_lost_bits) // Round up to\n"
 
"\t\t\t\t\to_val <= rounded_up; // closest value\n"
 
"\t\t\t\telse if (last_valid_bit) // Round up to\n"
 
"\t\t\t\t\to_val <= rounded_up; // nearest even\n"
 
"\t\t\t\telse   // else round down to nearest even\n"
 
"\t\t\t\t\to_val <= truncated_value;\n"
 
"\t\t\tend\n"
 
"\tend\n"
 
"\tendgenerate\n"
 
"\n"
 
"endmodule\n");
 
}
 
 
 
void    build_quarters(const char *fname, ROUND_T rounding) {
        FILE    *fp = fopen(fname, "w");
        FILE    *fp = fopen(fname, "w");
        if (NULL == fp) {
        if (NULL == fp) {
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
                perror("O/S Err was:");
                perror("O/S Err was:");
                return;
                return;
        }
        }
 
        const   char    *rnd_string;
 
        if (rounding == RND_TRUNCATE)
 
                rnd_string = "truncate";
 
        else if (rounding == RND_FROMZERO)
 
                rnd_string = "roundfromzero";
 
        else if (rounding == RND_HALFUP)
 
                rnd_string = "roundhalfup";
 
        else
 
                rnd_string = "convround";
 
 
 
 
        fprintf(fp,
        fprintf(fp,
"///////////////////////////////////////////////////////////////////////////\n"
"///////////////////////////////////////////////////////////////////////////\n"
"//\n"
"//\n"
"// Filename:   qtrstage.v\n"
"// Filename:   qtrstage.v\n"
Line 154... Line 500...
"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"
"module\tqtrstage(i_clk, i_rst, i_ce, i_sync, i_data, o_data, o_sync);\n"
        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"
        "\tparameter    IWIDTH=16, OWIDTH=IWIDTH+1;\n"
        "\t// Parameters specific to the core that should be changed when this\n"
        "\t// Parameters specific to the core that should be changed when this\n"
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
        "\t// core is built ... Note that the minimum LGSPAN is 2.  Smaller \n"
        "\t// spans must use the fftdoubles stage.\n"
        "\t// spans must use the fftdoubles stage.\n"
        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0,ROUND=1;\n"
        "\tparameter\tLGWIDTH=8, ODD=0, INVERSE=0,SHIFT=0;\n"
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
        "\tinput\t                              i_clk, i_rst, i_ce, i_sync;\n"
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
        "\tinput\t      [(2*IWIDTH-1):0]        i_data;\n"
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
        "\toutput\treg  [(2*OWIDTH-1):0]        o_data;\n"
        "\toutput\treg                          o_sync;\n"
        "\toutput\treg                          o_sync;\n"
        "\t\n");
        "\t\n");
        fprintf(fp,
        fprintf(fp,
        "\treg\t        wait_for_sync;\n"
        "\treg\t        wait_for_sync;\n"
        "\treg\t[2:0]   pipeline;\n"
        "\treg\t[3:0]   pipeline;\n"
"\n"
"\n"
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
        "\treg\t[(IWIDTH):0]    sum_r, sum_i, diff_r, diff_i;\n"
        "\twire\t[(IWIDTH):0]   n_diff_r, n_diff_i;\n"
        "\twire\t[(IWIDTH):0]   n_diff_r, n_diff_i;\n"
        "\tassign n_diff_r = -diff_r;\n"
        "\tassign n_diff_r = -diff_r;\n"
        "\tassign n_diff_i = -diff_i;\n"
        "\tassign n_diff_i = -diff_i;\n"
"\n"
"\n"
        "\treg\t[(2*OWIDTH-1):0]        ob_a;\n"
        "\treg\t[(2*OWIDTH-1):0]\tob_a;\n"
        "\twire\t[(2*OWIDTH-1):0]       ob_b;\n"
        "\twire\t[(2*OWIDTH-1):0]\tob_b;\n"
        "\treg\t[(OWIDTH-1):0]          ob_b_r, ob_b_i;\n"
        "\treg\t[(OWIDTH-1):0]\t\tob_b_r, ob_b_i;\n"
        "\tassign       ob_b = { ob_b_r, ob_b_i };\n"
        "\tassign\tob_b = { ob_b_r, ob_b_i };\n"
"\n"
"\n"
        "\treg\t[(LGWIDTH-1):0]         iaddr;\n"
        "\treg\t[(LGWIDTH-1):0]\t\tiaddr;\n"
        "\treg\t[(2*IWIDTH-1):0]        imem;\n"
        "\treg\t[(2*IWIDTH-1):0]\timem;\n"
"\n"
"\n"
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
        "\twire\tsigned\t[(IWIDTH-1):0]\timem_r, imem_i;\n"
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
        "\tassign\timem_r = imem[(2*IWIDTH-1):(IWIDTH)];\n"
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
        "\tassign\timem_i = imem[(IWIDTH-1):0];\n"
"\n"
"\n"
Line 188... Line 534...
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
        "\tassign\ti_data_i = i_data[(IWIDTH-1):0];\n"
"\n"
"\n"
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
        "\treg  [(2*OWIDTH-1):0]        omem;\n"
"\n");
"\n");
        fprintf(fp,
        fprintf(fp,
        "\twire [(IWIDTH-1):0]  rnd;\n"
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_sum_r, rnd_sum_i, rnd_diff_r, rnd_diff_i,\n");
        "\tgenerate\n"
        fprintf(fp,
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
        "\t\t\t\t\tn_rnd_diff_r, n_rnd_diff_i;\n");
                "\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"
        fprintf(fp,
        "\telse\n"
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_r(i_clk, i_ce,\n"
                "\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"
        "\t\t\t\tsum_r, rnd_sum_r);\n\n", rnd_string);
        "\tendgenerate\n"
        fprintf(fp,
"\n"
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_sum_i(i_clk, i_ce,\n"
 
        "\t\t\t\tsum_i, rnd_sum_i);\n\n", rnd_string);
 
        fprintf(fp,
 
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_r(i_clk, i_ce,\n"
 
        "\t\t\t\tdiff_r, rnd_diff_r);\n\n", rnd_string);
 
        fprintf(fp,
 
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_diff_i(i_clk, i_ce,\n"
 
        "\t\t\t\tdiff_i, rnd_diff_i);\n\n", rnd_string);
 
        fprintf(fp, "\tassign n_rnd_diff_r = - rnd_diff_r;\n"
 
                "\tassign n_rnd_diff_i = - rnd_diff_i;\n");
 
/*
 
        fprintf(fp,
 
        "\twire [(IWIDTH-1):0]  rnd;\n"
 
        "\tgenerate\n"
 
        "\tif ((ROUND)&&((IWIDTH+1-OWIDTH-SHIFT)>0))\n"
 
                "\t\tassign rnd = { {(IWIDTH-1){1'b0}}, 1'b1 };\n"
 
        "\telse\n"
 
                "\t\tassign rnd = { {(IWIDTH){1'b0}}};\n"
 
        "\tendgenerate\n"
 
"\n"
 
*/
 
        fprintf(fp,
        "\talways @(posedge i_clk)\n"
        "\talways @(posedge i_clk)\n"
                "\t\tif (i_rst)\n"
                "\t\tif (i_rst)\n"
                "\t\tbegin\n"
                "\t\tbegin\n"
                        "\t\t\twait_for_sync <= 1'b1;\n"
                        "\t\t\twait_for_sync <= 1'b1;\n"
                        "\t\t\tiaddr <= 0;\n"
                        "\t\t\tiaddr <= 0;\n"
                        "\t\t\tpipeline <= 3'b000;\n"
                "\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
                "\t\tend\n"
 
                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
 
                "\t\tbegin\n"
                "\t\tbegin\n"
                        "\t\t\t// Always\n"
 
                        "\t\t\timem <= i_data;\n"
                        "\t\t\timem <= i_data;\n"
                        "\t\t\tiaddr <= iaddr + 1;\n"
                        "\t\t\tiaddr <= iaddr + 1;\n"
                        "\t\t\twait_for_sync <= 1'b0;\n"
                        "\t\t\twait_for_sync <= 1'b0;\n"
"\n"
                "\t\tend\n\n");
                        "\t\t\t// In sequence, clock = 0\n"
        fprintf(fp,
                        "\t\t\tif (iaddr[0])\n"
        "\t// Note that we don\'t check on wait_for_sync or i_sync here.\n"
                        "\t\t\tbegin\n"
        "\t// Why not?  Because iaddr will always be zero until after the\n"
                                "\t\t\t\tsum_r  <= imem_r + i_data_r + rnd;\n"
        "\t// first i_ce, so we are safe.\n"
                                "\t\t\t\tsum_i  <= imem_i + i_data_i + rnd;\n"
        "\talways\t@(posedge i_clk)\n"
                                "\t\t\t\tdiff_r <= imem_r - i_data_r + rnd;\n"
                "\t\tif (i_rst)\n"
                                "\t\t\t\tdiff_i <= imem_i - i_data_i + rnd;\n"
                        "\t\t\tpipeline <= 4'h0;\n"
"\n"
                "\t\telse if (i_ce) // is our pipeline process full?  Which stages?\n"
                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b1 };\n"
                        "\t\t\tpipeline <= { pipeline[2:0], iaddr[0] };\n\n");
                        "\t\t\tend else\n"
        fprintf(fp,
                        "\t\t\t\tpipeline[2:0] <= { pipeline[1:0], 1'b0 };\n"
        "\t// This is the pipeline[-1] stage, pipeline[0] will be set next.\n"
"\n"
        "\talways\t@(posedge i_clk)\n"
                        "\t\t\t// In sequence, clock = 1\n"
                "\t\tif ((i_ce)&&(iaddr[0]))\n"
                        "\t\t\tif (pipeline[1])\n"
                "\t\tbegin\n"
 
                        "\t\t\tsum_r  <= imem_r + i_data_r;\n"
 
                        "\t\t\tsum_i  <= imem_i + i_data_i;\n"
 
                        "\t\t\tdiff_r <= imem_r - i_data_r;\n"
 
                        "\t\t\tdiff_i <= imem_i - i_data_i;\n"
 
                "\t\tend\n\n");
 
        fprintf(fp,
 
        "\t// pipeline[1] takes sum_x and diff_x and produces rnd_x\n\n");
 
        fprintf(fp,
 
        "\t// Now for pipeline[2]\n"
 
        "\talways\t@(posedge i_clk)\n"
 
                "\t\tif ((i_ce)&&(pipeline[2]))\n"
 
                "\t\tbegin\n"
 
                        "\t\t\tob_a <= { rnd_sum_r, rnd_sum_i };\n"
 
                        "\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
 
                        "\t\t\tif (ODD == 0)\n"
                        "\t\t\tbegin\n"
                        "\t\t\tbegin\n"
"\t\t\t\tob_a <= { sum_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)],\n"
                        "\t\t\t\tob_b_r <= rnd_diff_r;\n"
        "\t\t\t\t\t\tsum_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)] };\n"
                        "\t\t\t\tob_b_i <= rnd_diff_i;\n"
                                "\t\t\t\t// on Even, W = e^{-j2pi 1/4 0} = 1\n"
                        "\t\t\tend else if (INVERSE==0) begin\n"
                                "\t\t\t\tif (ODD == 0)\n"
                        "\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
                                "\t\t\t\tbegin\n"
                        "\t\t\t\tob_b_r <=   rnd_diff_i;\n"
"\t\t\t\t\tob_b_r <= diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
                        "\t\t\t\tob_b_i <= n_rnd_diff_r;\n"
"\t\t\t\t\tob_b_i <= diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
                        "\t\t\tend else begin\n"
// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"
                        "\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"
                        "\t\t\t\tob_b_r <= n_rnd_diff_i;\n"
                                "\t\t\t\tend else if (INVERSE==0) begin\n"
                        "\t\t\t\tob_b_i <=   rnd_diff_r;\n"
"\t\t\t\t\t// on Odd, W = e^{-j2pi 1/4} = -j\n"
 
"\t\t\t\t\tob_b_r <=   diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
 
"\t\t\t\t\tob_b_i <= n_diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
 
// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"
 
// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"
 
                                "\t\t\t\tend else begin\n"
 
"\t\t\t\t\t// on Odd, W = e^{j2pi 1/4} = j\n"
 
"\t\t\t\t\tob_b_r <= n_diff_i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
 
"\t\t\t\t\tob_b_i <=   diff_r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
 
// "\t\t\t\t\tob_b_r <=   { (OWIDTH) {1'b0} };\n"
 
// "\t\t\t\t\tob_b_i <=   { (OWIDTH) {1'b0} };\n"
 
 
 
                                "\t\t\t\tend\n"
 
                                "\t\t\t\t// (wire) ob_b <= { ob_b_r, ob_b_i };\n"
 
                        "\t\t\tend\n"
                        "\t\t\tend\n"
                        "\t\t\t// In sequence, clock = 2\n"
                "\t\tend\n\n");
                        "\t\t\tif (pipeline[2])\n"
        fprintf(fp,
 
        "\talways\t@(posedge i_clk)\n"
 
                "\t\tif (i_ce)\n"
 
                "\t\tbegin // In sequence, clock = 3\n"
 
                        "\t\t\tif (pipeline[3])\n"
                        "\t\t\tbegin\n"
                        "\t\t\tbegin\n"
                                "\t\t\t\tomem <= ob_b;\n"
                                "\t\t\t\tomem <= ob_b;\n"
                                "\t\t\t\to_data <= ob_a;\n"
                                "\t\t\t\to_data <= ob_a;\n"
                        "\t\t\tend else\n"
                        "\t\t\tend else\n"
                                "\t\t\t\to_data <= omem;\n"
                                "\t\t\t\to_data <= omem;\n"
                        "\t\t\t// Don\'t forget in the sync check that we are running\n"
                "\t\tend\n\n");
                        "\t\t\t// at two clocks per sample.  Thus we need to\n"
 
                        "\t\t\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
        fprintf(fp,
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b100);\n"
        "\t// Don\'t forget in the sync check that we are running\n"
                "\t\tend\n"
        "\t// at two clocks per sample.  Thus we need to\n"
"endmodule\n");
        "\t// produce a sync every 2^(LGWIDTH-1) clocks.\n"
 
        "\talways\t@(posedge i_clk)\n"
 
                "\t\tif (i_ce)\n"
 
                        "\t\t\to_sync <= &(~iaddr[(LGWIDTH-2):3]) && (iaddr[2:0] == 3'b101);\n");
 
        fprintf(fp, "endmodule\n");
}
}
 
 
void    build_dblstage(const char *fname) {
void    build_dblstage(const char *fname, ROUND_T rounding) {
        FILE    *fp = fopen(fname, "w");
        FILE    *fp = fopen(fname, "w");
        if (NULL == fp) {
        if (NULL == fp) {
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
                perror("O/S Err was:");
                perror("O/S Err was:");
                return;
                return;
        }
        }
 
 
 
        const   char    *rnd_string;
 
        if (rounding == RND_TRUNCATE)
 
                rnd_string = "truncate";
 
        else if (rounding == RND_FROMZERO)
 
                rnd_string = "roundfromzero";
 
        else if (rounding == RND_HALFUP)
 
                rnd_string = "roundhalfup";
 
        else
 
                rnd_string = "convround";
 
 
 
 
        fprintf(fp,
        fprintf(fp,
"///////////////////////////////////////////////////////////////////////////\n"
"///////////////////////////////////////////////////////////////////////////\n"
"//\n"
"//\n"
"// Filename:   dblstage.v\n"
"// Filename:   dblstage.v\n"
"//\n"
"//\n"
Line 311... Line 695...
"//\n", prjname, creator);
"//\n", prjname, creator);
 
 
        fprintf(fp, "%s", cpyleft);
        fprintf(fp, "%s", cpyleft);
        fprintf(fp,
        fprintf(fp,
"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"
"module\tdblstage(i_clk, i_rst, i_ce, i_sync, i_left, i_right, o_left, o_right, o_sync);\n"
        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0, ROUND=1;\n"
        "\tparameter\tIWIDTH=16,OWIDTH=IWIDTH+1, SHIFT=0;\n"
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
        "\tinput\t\ti_clk, i_rst, i_ce, i_sync;\n"
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
        "\toutput\treg\t\t\to_sync;\n"
        "\toutput\treg\t\t\to_sync;\n"
        "\n");
        "\n");
Line 329... Line 713...
                                "\t\t\t\t\to_out_1r, o_out_1i;\n"
                                "\t\t\t\t\to_out_1r, o_out_1i;\n"
"\n"
"\n"
"\n"
"\n"
        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"
        "\t// Handle a potential rounding situation, when IWIDTH>=OWIDTH.\n"
"\n"
"\n"
        "\twire\tsigned\t[(IWIDTH):0]\trnd;\n"
"\n");
"\n"
        fprintf(fp,
        "\tgenerate\n"
 
        "\tif ((ROUND==0)||(IWIDTH+1-OWIDTH-SHIFT==0))\n"
 
                "\t\tassign rnd = { {(IWIDTH+1){1'b0}} };\n"
 
        "\telse if (IWIDTH+1-OWIDTH-SHIFT==1)\n"
 
                "\t\tassign rnd = { {(IWIDTH){1'b0}}, 1'b1 };\n"
 
        "\telse if (IWIDTH+1-OWIDTH-SHIFT>1)\n"
 
                "\t\tassign rnd = { {(IWIDTH-(IWIDTH+1-OWIDTH-SHIFT-1)){1'b0}}, 1'b1, {(IWIDTH+1-OWIDTH-SHIFT-1){1'b0}} };\n"
 
        "\tendgenerate\n"
 
"\n"
 
        "\t// Don't forget that we accumulate a bit by adding two values\n"
        "\t// Don't forget that we accumulate a bit by adding two values\n"
        "\t// together. Therefore our intermediate value must have one more\n"
        "\t// together. Therefore our intermediate value must have one more\n"
        "\t// bit than the two originals.\n"
        "\t// bit than the two originals.\n"
        "\treg\t[IWIDTH:0]\tout_0r, out_0i, out_1r, out_1i;\n"
        "\twire\tsigned\t[(IWIDTH):0]\trnd_in_0r, rnd_in_0i, rnd_in_1r, rnd_in_1i;\n\n");
"\n"
        fprintf(fp,
        "\treg\twait_for_sync;\n"
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_0r(i_clk, i_ce,\n"
 
        "\t\t\t\t\t\t\t\trnd_in_0r, o_out_0r);\n\n", rnd_string);
 
        fprintf(fp,
 
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_0i(i_clk, i_ce,\n"
 
        "\t\t\t\t\t\t\t\trnd_in_0i, o_out_0i);\n\n", rnd_string);
 
        fprintf(fp,
 
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_1r(i_clk, i_ce,\n"
 
        "\t\t\t\t\t\t\t\trnd_in_1r, o_out_1r);\n\n", rnd_string);
 
        fprintf(fp,
 
        "\t%s\t#(IWIDTH+1,OWIDTH,SHIFT)\tdo_rnd_1i(i_clk, i_ce,\n"
 
        "\t\t\t\t\t\t\t\trnd_in_1i, o_out_1i);\n\n", rnd_string);
 
 
 
        fprintf(fp,
 
        "\treg\twait_for_sync, rnd_sync;\n"
"\n"
"\n"
        "\talways @(posedge i_clk)\n"
        "\talways @(posedge i_clk)\n"
                "\t\tif (i_rst)\n"
                "\t\tif (i_rst)\n"
 
                "\t\tbegin\n"
 
                        "\t\t\trnd_sync <= 1'b0;\n"
 
                        "\t\t\to_sync <= 1'b0;\n"
                        "\t\t\twait_for_sync <= 1'b1;\n"
                        "\t\t\twait_for_sync <= 1'b1;\n"
                "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
                "\t\tend else if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
                "\t\tbegin\n"
                "\t\tbegin\n"
                        "\t\t\twait_for_sync <= 1'b0;\n"
                        "\t\t\twait_for_sync <= 1'b0;\n"
                        "\t\t\t//\n"
                        "\t\t\t//\n"
                        "\t\t\tout_0r <= i_in_0r + i_in_1r + rnd;\n"
                        "\t\t\trnd_in_0r <= i_in_0r + i_in_1r;\n"
                        "\t\t\tout_0i <= i_in_0i + i_in_1i + rnd;\n"
                        "\t\t\trnd_in_0i <= i_in_0i + i_in_1i;\n"
                        "\t\t\t//\n"
                        "\t\t\t//\n"
                        "\t\t\tout_1r <= i_in_0r - i_in_1r + rnd;\n"
                        "\t\t\trnd_in_1r <= i_in_0r - i_in_1r;\n"
                        "\t\t\tout_1i <= i_in_0i - i_in_1i + rnd;\n"
                        "\t\t\trnd_in_1i <= i_in_0i - i_in_1i;\n"
                        "\t\t\t//\n"
                        "\t\t\t//\n"
                        "\t\t\to_sync <= i_sync;\n"
                        "\t\t\trnd_sync <= i_sync;\n"
 
                        "\t\t\to_sync <= rnd_sync;\n"
                "\t\tend\n"
                "\t\tend\n"
"\n"
"\n"
        "\t// Now, if the master control program doesn't want to keep all of\n"
 
        "\t// our bits, we can shift down to OWIDTH bits here.\n"
 
        "\tassign\to_out_0r = out_0r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
 
        "\tassign\to_out_0i = out_0i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
 
        "\tassign\to_out_1r = out_1r[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
 
        "\tassign\to_out_1i = out_1i[(IWIDTH-SHIFT):(IWIDTH+1-OWIDTH-SHIFT)];\n"
 
"\n"
 
        "\tassign\to_left  = { o_out_0r, o_out_0i };\n"
        "\tassign\to_left  = { o_out_0r, o_out_0i };\n"
        "\tassign\to_right = { o_out_1r, o_out_1i };\n"
        "\tassign\to_right = { o_out_1r, o_out_1i };\n"
"\n"
"\n"
"endmodule\n");
"endmodule\n");
        fclose(fp);
        fclose(fp);
Line 611... Line 996...
"endmodule\n");
"endmodule\n");
 
 
        fclose(fp);
        fclose(fp);
}
}
 
 
void    build_butterfly(const char *fname, int xtracbits) {
void    build_butterfly(const char *fname, int xtracbits, ROUND_T rounding) {
        FILE    *fp = fopen(fname, "w");
        FILE    *fp = fopen(fname, "w");
        if (NULL == fp) {
        if (NULL == fp) {
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
                perror("O/S Err was:");
                perror("O/S Err was:");
                return;
                return;
        }
        }
 
        const   char    *rnd_string;
 
        if (rounding == RND_TRUNCATE)
 
                rnd_string = "truncate";
 
        else if (rounding == RND_FROMZERO)
 
                rnd_string = "roundfromzero";
 
        else if (rounding == RND_HALFUP)
 
                rnd_string = "roundhalfup";
 
        else
 
                rnd_string = "convround";
 
 
        fprintf(fp,
        fprintf(fp,
"///////////////////////////////////////////////////////////////////////////\n"
"///////////////////////////////////////////////////////////////////////////\n"
"//\n"
"//\n"
"// Filename:   butterfly.v\n"
"// Filename:   butterfly.v\n"
Line 698... Line 1092...
                "\t\to_left, o_right, o_aux);\n"
                "\t\to_left, o_right, o_aux);\n"
        "\t// Public changeable parameters ...\n"
        "\t// Public changeable parameters ...\n"
        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
        "\tparameter IWIDTH=%d,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
        "\t// Parameters specific to the core that should not be changed.\n"
        "\t// Parameters specific to the core that should not be changed.\n"
        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
        "\tparameter    MPYDELAY=%d'd%d, // (IWIDTH+1 < CWIDTH)?(IWIDTH+4):(CWIDTH+3),\n"
                        "\t\t\tSHIFT=0, ROUND=1;\n"
                        "\t\t\tSHIFT=0;\n"
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
        "\t// The LGDELAY should be the base two log of the MPYDELAY.  If\n"
        "\t// this value is fractional, then round up to the nearest\n"
        "\t// this value is fractional, then round up to the nearest\n"
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
        "\t// integer: LGDELAY=ceil(log(MPYDELAY)/log(2));\n"
        "\tparameter\tLGDELAY=%d;\n"
        "\tparameter\tLGDELAY=%d;\n"
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
Line 865... Line 1259...
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
        "\tassign\tfifo_r = { {2{fifo_read[2*(IWIDTH+1)-1]}}, fifo_read[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
        "\tassign\tfifo_i = { {2{fifo_read[(IWIDTH+1)-1]}}, fifo_read[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
        "\tassign\taux = fifo_read[2*IWIDTH+2];\n"
        "\tassign\taux = fifo_read[2*IWIDTH+2];\n"
"\n"
"\n"
"\n"
"\n"
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"
        "\treg\tsigned\t[(OWIDTH-1):0]  b_left_r, b_left_i,\n"
                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
        "\treg\tsigned\t[(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
        "\twire\tsigned\t[(CWIDTH+IWIDTH+3-1):0]        rnd;\n"
 
        "\tgenerate\n"
 
        "\tif ((ROUND==0)||(CWIDTH+IWIDTH-OWIDTH-SHIFT<2))\n"
 
                "\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"
 
        "\telse if ((IWIDTH+CWIDTH)-(OWIDTH+SHIFT) == 2)\n"
 
                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1 });\n"
 
        "\telse\n"
 
                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1,\n"
 
                "\t\t\t\t{((IWIDTH+CWIDTH+3)-(OWIDTH+SHIFT+5)){1'b0}} });\n"
 
        "\tendgenerate\n"
 
"\n");
 
        fprintf(fp,
 
        "\talways @(posedge i_clk)\n"
 
                "\t\tif (i_ce)\n"
 
                "\t\tbegin\n"
 
                        "\t\t\t// First clock, recover all values\n"
 
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
 
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
 
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
 
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
 
                        "\t\t\t// extra bits we need to get rid of.)\n"
 
                        "\t\t\tmpy_r <= p_one - p_two;\n"
 
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
 
"\n"
 
                        "\t\t\t// Second clock, round and latch for final clock\n"
 
                        "\t\t\tb_right_r <= mpy_r + rnd;\n"
 
                        "\t\t\tb_right_i <= mpy_i + rnd;\n"
 
                        "\t\t\tb_left_r <= { {2{fifo_r[(IWIDTH+CWIDTH)]}},fifo_r } + rnd;\n"
 
                        "\t\t\tb_left_i <= { {2{fifo_i[(IWIDTH+CWIDTH)]}},fifo_i } + rnd;\n"
 
                        "\t\t\to_aux <= aux & ovalid;\n"
 
                "\t\tend\n"
 
"\n");
"\n");
        fprintf(fp,
        fprintf(fp,
        "\t// Final clock--clock and remove unnecessary bits.\n"
        "\t// Let's do some rounding and remove unnecessary bits.\n"
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
        "\t// We have (IWIDTH+CWIDTH+3) bits here, we need to drop down to\n"
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
        "\t// OWIDTH, and SHIFT by SHIFT bits in the process.  The trick is\n"
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
        "\t// that we don\'t need (IWIDTH+CWIDTH+3) bits.  We\'ve accumulated\n"
        "\t// them, but the actual values will never fill all these bits.\n"
        "\t// them, but the actual values will never fill all these bits.\n"
        "\t// In particular, we only need:\n"
        "\t// In particular, we only need:\n"
Line 925... Line 1288...
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
        "\t//\t -------- ... multiply.  (This last bit may be shifted out.)\n"
        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"
        "\t//\t (IWIDTH+CWIDTH) valid output bits. \n"
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
        "\t// Now, if the user wants to keep any extras of these (via OWIDTH),\n"
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
        "\t// or if he wishes to arbitrarily shift some of these off (via\n"
        "\t// SHIFT) we accomplish that here.\n"
        "\t// SHIFT) we accomplish that here.\n"
        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
"\n");
        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
        fprintf(fp,
        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
 
 
        fprintf(fp,
 
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_r(i_clk, i_ce,\n"
 
        "\t\t\t\t{ {2{fifo_r[(IWIDTH+CWIDTH)]}}, fifo_r }, rnd_left_r);\n\n",
 
                rnd_string);
 
        fprintf(fp,
 
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_i(i_clk, i_ce,\n"
 
        "\t\t\t\t{ {2{fifo_i[(IWIDTH+CWIDTH)]}}, fifo_i }, rnd_left_i);\n\n",
 
                rnd_string);
 
        fprintf(fp,
 
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_r(i_clk, i_ce,\n"
 
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
 
        fprintf(fp,
 
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_i(i_clk, i_ce,\n"
 
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
 
        fprintf(fp,
 
        "\talways @(posedge i_clk)\n"
 
                "\t\tif (i_ce)\n"
 
                "\t\tbegin\n"
 
                        "\t\t\t// First clock, recover all values\n"
 
                        "\t\t\tfifo_read <= fifo_left[fifo_read_addr];\n"
 
                        "\t\t\t// These values are IWIDTH+CWIDTH+3 bits wide\n"
 
                        "\t\t\t// although they only need to be (IWIDTH+1)\n"
 
                        "\t\t\t// + (CWIDTH) bits wide.  (We\'ve got two\n"
 
                        "\t\t\t// extra bits we need to get rid of.)\n"
 
                        "\t\t\tmpy_r <= p_one - p_two;\n"
 
                        "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
"\n"
"\n"
 
                        "\t\t\t// Second clock, round and latch for final clock\n"
 
                        "\t\t\tb_right_r <= rnd_right_r;\n"
 
                        "\t\t\tb_right_i <= rnd_right_i;\n"
 
                        "\t\t\tb_left_r <= rnd_left_r;\n"
 
                        "\t\t\tb_left_i <= rnd_left_i;\n"
 
                        "\t\t\to_aux <= aux & ovalid;\n"
 
                "\t\tend\n"
 
"\n");
 
        fprintf(fp,
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
        "\t// As a final step, we pack our outputs into two packed two\'s\n"
        "\t// complement numbers per output word, so that each output word\n"
        "\t// complement numbers per output word, so that each output word\n"
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
        "\t// portion and the bottom half being the imaginary portion.\n"
        "\t// portion and the bottom half being the imaginary portion.\n"
        "\tassign       o_left = { o_left_r, o_left_i };\n"
        "\tassign       o_left = { rnd_left_r, rnd_left_i };\n"
        "\tassign       o_right= { o_right_r,o_right_i};\n"
        "\tassign       o_right= { rnd_right_r,rnd_right_i};\n"
"\n"
"\n"
"endmodule\n");
"endmodule\n");
        fclose(fp);
        fclose(fp);
}
}
 
 
void    build_hwbfly(const char *fname, int xtracbits) {
void    build_hwbfly(const char *fname, int xtracbits, ROUND_T rounding) {
        FILE    *fp = fopen(fname, "w");
        FILE    *fp = fopen(fname, "w");
        if (NULL == fp) {
        if (NULL == fp) {
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
                fprintf(stderr, "Could not open \'%s\' for writing\n", fname);
                perror("O/S Err was:");
                perror("O/S Err was:");
                return;
                return;
        }
        }
 
 
 
        const   char    *rnd_string;
 
        if (rounding == RND_TRUNCATE)
 
                rnd_string = "truncate";
 
        else if (rounding == RND_FROMZERO)
 
                rnd_string = "roundfromzero";
 
        else if (rounding == RND_HALFUP)
 
                rnd_string = "roundhalfup";
 
        else
 
                rnd_string = "convround";
 
 
 
 
        fprintf(fp,
        fprintf(fp,
"///////////////////////////////////////////////////////////////////////////\n"
"///////////////////////////////////////////////////////////////////////////\n"
"//\n"
"//\n"
"// Filename:   hwbfly.v\n"
"// Filename:   hwbfly.v\n"
"//\n"
"//\n"
Line 973... Line 1382...
"module hwbfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
"module hwbfly(i_clk, i_rst, i_ce, i_coef, i_left, i_right, i_aux,\n"
                "\t\to_left, o_right, o_aux);\n"
                "\t\to_left, o_right, o_aux);\n"
        "\t// Public changeable parameters ...\n"
        "\t// Public changeable parameters ...\n"
        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
        "\tparameter IWIDTH=16,CWIDTH=IWIDTH+%d,OWIDTH=IWIDTH+1;\n"
        "\t// Parameters specific to the core that should not be changed.\n"
        "\t// Parameters specific to the core that should not be changed.\n"
        "\tparameter\tSHIFT=0, ROUND=1;\n"
        "\tparameter\tSHIFT=0;\n"
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
        "\tinput\t\ti_clk, i_rst, i_ce;\n"
        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"
        "\tinput\t\t[(2*CWIDTH-1):0]\ti_coef;\n"
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
        "\tinput\t\t[(2*IWIDTH-1):0]\ti_left, i_right;\n"
        "\tinput\t\ti_aux;\n"
        "\tinput\t\ti_aux;\n"
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
        "\toutput\twire\t[(2*OWIDTH-1):0]\to_left, o_right;\n"
Line 1073... Line 1482...
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
        "\t// multiply.  Here, we recover them.  During the multiply,\n"
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
        "\t// values were multiplied by 2^(CWIDTH-2)*exp{-j*2*pi*...},\n"
        "\t// therefore, the left_x values need to be right shifted by\n"
        "\t// therefore, the left_x values need to be right shifted by\n"
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
        "\t// CWIDTH-2 as well.  The additional bits come from a sign\n"
        "\t// extension.\n"
        "\t// extension.\n"
        "\twire\taux_s;\n"
        "\twire\taux_s, aux_ss;\n"
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"
        "\twire\tsigned\t[(IWIDTH+CWIDTH):0]    left_si, left_sr;\n"
        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"
        "\treg\t\t[(2*IWIDTH+2):0]      left_saved;\n"
        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
        "\tassign\tleft_sr = { {2{left_saved[2*(IWIDTH+1)-1]}}, left_saved[(2*(IWIDTH+1)-1):(IWIDTH+1)], {(CWIDTH-2){1'b0}} };\n"
        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
        "\tassign\tleft_si = { {2{left_saved[(IWIDTH+1)-1]}}, left_saved[((IWIDTH+1)-1):0], {(CWIDTH-2){1'b0}} };\n"
        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"
        "\tassign\taux_s = left_saved[2*IWIDTH+2];\n"
"\n"
"\n"
"\n"
"\n"
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] b_left_r, b_left_i,\n"
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n");
                                        "\t\t\t\t\t\tb_right_r, b_right_i;\n"
        fprintf(fp,
        "\treg  signed  [(CWIDTH+IWIDTH+3-1):0] mpy_r, mpy_i;\n"
        "\twire\tsigned\t[(OWIDTH-1):0]\trnd_left_r, rnd_left_i, rnd_right_r, rnd_right_i;\n\n");
        "\twire signed  [(CWIDTH+IWIDTH+3-1):0] rnd;\n"
 
        "\tgenerate\n"
        fprintf(fp,
        "\tif ((ROUND==0)||(CWIDTH+IWIDTH-OWIDTH-SHIFT<2))\n"
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_r(i_clk, i_ce,\n"
                "\t\tassign rnd = ({(CWIDTH+IWIDTH+3){1'b0}});\n"
        "\t\t\t\t{ {2{left_sr[(IWIDTH+CWIDTH)]}}, left_sr }, rnd_left_r);\n\n",
        "\telse if ((IWIDTH+CWIDTH)-(OWIDTH+SHIFT) == 2)\n"
                rnd_string);
                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1 });\n"
        fprintf(fp,
        "\telse\n"
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_left_i(i_clk, i_ce,\n"
                "\t\tassign rnd = ({ {(OWIDTH+4+SHIFT){1'b0}},1'b1,\n"
        "\t\t\t\t{ {2{left_si[(IWIDTH+CWIDTH)]}}, left_si }, rnd_left_i);\n\n",
                        "\t\t\t\t{((IWIDTH+CWIDTH+3)-(OWIDTH+SHIFT+5)){1'b0}} });\n"
                rnd_string);
        "\tendgenerate\n"
        fprintf(fp,
"\n");
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_r(i_clk, i_ce,\n"
 
        "\t\t\t\tmpy_r, rnd_right_r);\n\n", rnd_string);
 
        fprintf(fp,
 
        "\t%s\t#(CWIDTH+IWIDTH+3,OWIDTH,SHIFT+4)\tdo_rnd_right_i(i_clk, i_ce,\n"
 
        "\t\t\t\tmpy_i, rnd_right_i);\n\n", rnd_string);
 
 
        fprintf(fp,
        fprintf(fp,
        "\talways @(posedge i_clk)\n"
        "\talways @(posedge i_clk)\n"
        "\t\tif (i_rst)\n"
        "\t\tif (i_rst)\n"
        "\t\tbegin\n"
        "\t\tbegin\n"
                "\t\t\tleft_saved <= 0;\n"
                "\t\t\tleft_saved <= 0;\n"
                "\t\t\tb_left_r <= 0;\n"
 
                "\t\t\tb_left_i <= 0;\n"
 
                "\t\t\tb_right_r <= 0;\n"
 
                "\t\t\tb_right_i <= 0;\n"
 
                "\t\t\to_aux <= 1'b0;\n"
                "\t\t\to_aux <= 1'b0;\n"
        "\t\tend else if (i_ce)\n"
        "\t\tend else if (i_ce)\n"
        "\t\tbegin\n"
        "\t\tbegin\n"
                "\t\t\t// First clock, recover all values\n"
                "\t\t\t// First clock, recover all values\n"
                "\t\t\tleft_saved <= leftvv;\n"
                "\t\t\tleft_saved <= leftvv;\n"
Line 1118... Line 1527...
                "\t\t\t// extra bits we need to get rid of.)\n"
                "\t\t\t// extra bits we need to get rid of.)\n"
                "\t\t\tmpy_r <= p_one - p_two;\n"
                "\t\t\tmpy_r <= p_one - p_two;\n"
                "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
                "\t\t\tmpy_i <= p_three - p_one - p_two;\n"
"\n"
"\n"
                "\t\t\t// Second clock, round and latch for final clock\n"
                "\t\t\t// Second clock, round and latch for final clock\n"
                "\t\t\tb_right_r <= mpy_r + rnd;\n"
 
                "\t\t\tb_right_i <= mpy_i + rnd;\n"
 
                "\t\t\tb_left_r <= { {2{left_sr[(IWIDTH+CWIDTH)]}},left_sr } + rnd;\n"
 
                "\t\t\tb_left_i <= { {2{left_si[(IWIDTH+CWIDTH)]}},left_si } + rnd;\n"
 
"\n"
"\n"
                "\t\t\to_aux <= aux_s;\n"
                "\t\t\to_aux <= aux_s;\n"
        "\t\tend\n"
        "\t\tend\n"
        "\n");
        "\n");
 
 
        fprintf(fp,
        fprintf(fp,
        "\t// Final step--remove unnecessary bits.\n"
 
        "\tassign o_left_r  = b_left_r[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
 
        "\tassign o_left_i  = b_left_i[ (CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
 
        "\tassign o_right_r = b_right_r[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
 
        "\tassign o_right_i = b_right_i[(CWIDTH+IWIDTH-1-SHIFT-1):(CWIDTH+IWIDTH-OWIDTH-SHIFT-1)];\n"
 
"\n"
 
        "\t// As a final step, we pack our outputs into two packed two's\n"
        "\t// As a final step, we pack our outputs into two packed two's\n"
        "\t// complement numbers per output word, so that each output word\n"
        "\t// complement numbers per output word, so that each output word\n"
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
        "\t// has (2*OWIDTH) bits in it, with the top half being the real\n"
        "\t// portion and the bottom half being the imaginary portion.\n"
        "\t// portion and the bottom half being the imaginary portion.\n"
        "\tassign\to_left = { o_left_r, o_left_i };\n"
        "\tassign\to_left = { rnd_left_r, rnd_left_i };\n"
        "\tassign\to_right= { o_right_r,o_right_i};\n"
        "\tassign\to_right= { rnd_right_r,rnd_right_i};\n"
"\n"
"\n"
"endmodule\n");
"endmodule\n");
 
 
}
}
 
 
Line 1203... Line 1602...
"\treg  [(2*CWIDTH-1):0]        ib_c;\n"
"\treg  [(2*CWIDTH-1):0]        ib_c;\n"
"\treg  ib_sync;\n"
"\treg  ib_sync;\n"
"\n"
"\n"
"\treg  b_started;\n"
"\treg  b_started;\n"
"\twire ob_sync;\n"
"\twire ob_sync;\n"
"\twire [(2*OWIDTH-1):0]        ob_a, ob_b;\n");
"\twire [(2*OWIDTH-1):0]\tob_a, ob_b;\n");
        fprintf(fstage,
        fprintf(fstage,
"\n"
"\n"
"\t// %scmem is defined as an array of real and complex values,\n"
"\t// %scmem is defined as an array of real and complex values,\n"
"\t// where the top CWIDTH bits are the real value and the bottom\n"
"\t// where the top CWIDTH bits are the real value and the bottom\n"
"\t// CWIDTH bits are the imaginary value.\n"
"\t// CWIDTH bits are the imaginary value.\n"
Line 1279... Line 1678...
"\talways @(posedge i_clk)\n"
"\talways @(posedge i_clk)\n"
        "\t\tif (i_rst)\n"
        "\t\tif (i_rst)\n"
        "\t\tbegin\n"
        "\t\tbegin\n"
                "\t\t\twait_for_sync <= 1'b1;\n"
                "\t\t\twait_for_sync <= 1'b1;\n"
                "\t\t\tiaddr <= 0;\n"
                "\t\t\tiaddr <= 0;\n"
                "\t\t\toB <= 0;\n"
 
                "\t\t\tib_sync   <= 1'b0;\n"
                "\t\t\tib_sync   <= 1'b0;\n"
                "\t\t\to_sync    <= 1'b0;\n"
 
                "\t\t\tb_started <= 1'b0;\n"
 
        "\t\tend\n"
        "\t\tend\n"
        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
        "\t\telse if ((i_ce)&&((~wait_for_sync)||(i_sync)))\n"
        "\t\tbegin\n"
        "\t\tbegin\n"
                "\t\t\t//\n"
                "\t\t\t//\n"
                "\t\t\t// First step: Record what we\'re not ready to use yet\n"
                "\t\t\t// First step: Record what we\'re not ready to use yet\n"
                "\t\t\t//\n"
                "\t\t\t//\n"
                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data;\n"
                "\t\t\timem[iaddr[(LGSPAN-1):0]] <= i_data;\n"
                "\t\t\tiaddr <= iaddr + 1;\n"
                "\t\t\tiaddr <= iaddr + 1;\n"
                "\t\t\twait_for_sync <= 1'b0;\n"
                "\t\t\twait_for_sync <= 1'b0;\n"
"\n"
        "\t\tend\n\n");
                "\t\t\t//\n"
 
                "\t\t\t// Now, we have all the inputs, so let\'s feed the\n"
        fprintf(fstage,
                "\t\t\t// butterfly\n"
        "\t//\n"
                "\t\t\t//\n"
        "\t// Now, we have all the inputs, so let\'s feed the butterfly\n"
                "\t\t\tif (iaddr[LGSPAN])\n"
        "\t//\n"
                "\t\t\tbegin\n"
        "\talways\t@(posedge i_clk)\n"
                        "\t\t\t\t// One input from memory, ...\n"
        "\tif ((i_ce)&&(iaddr[LGSPAN]))\n"
                        "\t\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
                "\t\tbegin\n"
                        "\t\t\t\t// One input clocked in from the top\n"
                        "\t\t\t// One input from memory, ...\n"
                        "\t\t\t\tib_b <= i_data;\n"
                        "\t\t\tib_a <= imem[iaddr[(LGSPAN-1):0]];\n"
                        "\t\t\t\t// Set the sync to true on the very first\n"
                        "\t\t\t// One input clocked in from the top\n"
                        "\t\t\t\t// valid input in, and hence on the very\n"
                        "\t\t\tib_b <= i_data;\n"
                        "\t\t\t\t// first valid data out per FFT.\n"
                        "\t\t\t// Set the sync to true on the very first\n"
                        "\t\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
                        "\t\t\t// valid input in, and hence on the very\n"
                        "\t\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
                        "\t\t\t// first valid data out per FFT.\n"
                "\t\t\tend else begin\n"
                        "\t\t\tib_sync <= (iaddr==(1<<(LGSPAN)));\n"
                        "\t\t\t\t// Just to make debugging easier, let\'s\n"
                        "\t\t\tib_c <= %scmem[iaddr[(LGSPAN-1):0]];\n"
                        "\t\t\t\t// clear these registers.  That\'ll make\n"
                "\t\tend\n\n", (inv)?"i":"");
                        "\t\t\t\t// the transition easier to watch.\n"
 
                        "\t\t\t\tib_a <= {(2*IWIDTH){1'b0}};\n"
        if (hwmpy) {
                        "\t\t\t\tib_b <= {(2*IWIDTH){1'b0}};\n"
                fprintf(fstage,
                        "\t\t\t\tib_sync <= 1'b0;\n"
        "\thwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
                "\t\t\tend\n"
                        "\t\t\t.SHIFT(BFLYSHIFT))\n"
"\n"
                "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
                "\t\t\t//\n"
                        "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n");
                "\t\t\t// Next step: recover the outputs from the butterfly\n"
        } else {
                "\t\t\t//\n"
        fprintf(fstage,
 
        "\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
 
                "\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
 
        "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
 
                "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n",
 
                        lgdelay(nbits, xtra), bflydelay(nbits, xtra));
 
        }
 
 
 
        fprintf(fstage,
 
        "\t//\n"
 
        "\t// Next step: recover the outputs from the butterfly\n"
 
        "\t//\n"
 
        "\talways\t@(posedge i_clk)\n"
 
        "\t\tif (i_rst)\n"
 
        "\t\tbegin\n"
 
                "\t\t\toB <= 0;\n"
 
                "\t\t\to_sync <= 0;\n"
 
                "\t\t\tb_started <= 0;\n"
 
        "\t\tend else if (i_ce)\n"
 
        "\t\tbegin\n"
                "\t\t\tif ((ob_sync||b_started)&&(~oB[LGSPAN]))\n"
                "\t\t\tif ((ob_sync||b_started)&&(~oB[LGSPAN]))\n"
                "\t\t\tbegin // A butterfly output is available\n"
                "\t\t\tbegin // A butterfly output is available\n"
                        "\t\t\t\tb_started <= 1'b1;\n"
                        "\t\t\t\tb_started <= 1'b1;\n"
                        "\t\t\t\tomem[oB[(LGSPAN-1):0]] <= ob_b;\n"
                        "\t\t\t\tomem[oB[(LGSPAN-1):0]] <= ob_b;\n"
                        "\t\t\t\toB <= oB+1;\n"
                        "\t\t\t\toB <= oB+1;\n"
Line 1336... Line 1751...
                        "\t\t\t\to_data <= omem[oB[(LGSPAN-1):0]];\n"
                        "\t\t\t\to_data <= omem[oB[(LGSPAN-1):0]];\n"
                        "\t\t\t\toB <= oB + 1;\n"
                        "\t\t\t\toB <= oB + 1;\n"
                        "\t\t\t\to_sync <= 1'b0;\n"
                        "\t\t\t\to_sync <= 1'b0;\n"
                "\t\t\tend else\n"
                "\t\t\tend else\n"
                        "\t\t\t\to_sync <= 1'b0;\n"
                        "\t\t\t\to_sync <= 1'b0;\n"
        "\t\tend\n"
        "\t\tend\n\n");
"\n", (inv)?"i":"");
 
        if (hwmpy) {
 
                fprintf(fstage,
 
        "\thwbfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
 
                        "\t\t\t.SHIFT(BFLYSHIFT))\n"
 
                "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
 
                        "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n");
 
        } else {
 
        fprintf(fstage,
 
        "\tbutterfly #(.IWIDTH(IWIDTH),.CWIDTH(CWIDTH),.OWIDTH(OWIDTH),\n"
 
                "\t\t\t.MPYDELAY(%d\'d%d),.LGDELAY(LGBDLY),.SHIFT(BFLYSHIFT))\n"
 
        "\t\tbfly(i_clk, i_rst, i_ce, ib_c,\n"
 
                "\t\t\tib_a, ib_b, ib_sync, ob_a, ob_b, ob_sync);\n",
 
                        lgdelay(nbits, xtra), bflydelay(nbits, xtra));
 
        }
 
        fprintf(fstage, "endmodule\n");
        fprintf(fstage, "endmodule\n");
}
}
 
 
void    usage(void) {
void    usage(void) {
        fprintf(stderr,
        fprintf(stderr,
Line 1403... Line 1803...
        int     nbitsout, maxbitsout = -1, xtrapbits=0;
        int     nbitsout, maxbitsout = -1, xtrapbits=0;
        bool    bitreverse = true, inverse=false, interactive = false,
        bool    bitreverse = true, inverse=false, interactive = false,
                verbose_flag = false;
                verbose_flag = false;
        FILE    *vmain;
        FILE    *vmain;
        std::string     coredir = "fft-core", cmdline = "";
        std::string     coredir = "fft-core", cmdline = "";
 
        ROUND_T rounding = RND_CONVERGENT;
 
        // ROUND_T      rounding = RND_HALFUP;
 
 
        if (argc <= 1)
        if (argc <= 1)
                usage();
                usage();
 
 
        cmdline = argv[0];
        cmdline = argv[0];
Line 1849... Line 2251...
                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
                fprintf(vmain, "\tassign\tbr_right = w_o2;\n");
                fprintf(vmain, "\n");
                fprintf(vmain, "\n");
                if (bitreverse) {
                if (bitreverse) {
                        fprintf(vmain, "\twire\tbr_start;\n");
                        fprintf(vmain, "\twire\tbr_start;\n");
                        fprintf(vmain, "\treg\tr_br_started;\n");
                        fprintf(vmain, "\treg\tr_br_started;\n");
                        fprintf(vmain, "\t// A delay of one clock here is perfect, as it matches the delay in\n");
 
                        fprintf(vmain, "\t// our dblstage.\n");
 
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
                        fprintf(vmain, "\talways @(posedge i_clk)\n");
                        fprintf(vmain, "\t\tif (i_rst)\n");
                        fprintf(vmain, "\t\tif (i_rst)\n");
                        fprintf(vmain, "\t\t\tr_br_started <= 1'b0;\n");
                        fprintf(vmain, "\t\t\tr_br_started <= 1'b0;\n");
                        fprintf(vmain, "\t\telse\n");
                        fprintf(vmain, "\t\telse\n");
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s4;\n");
                        fprintf(vmain, "\t\t\tr_br_started <= r_br_started || w_s2;\n");
                        fprintf(vmain, "\tassign\tbr_start = r_br_started;\n");
                        fprintf(vmain, "\tassign\tbr_start = r_br_started || w_s2;\n");
                }
                }
        }
        }
 
 
        fprintf(vmain, "\n");
        fprintf(vmain, "\n");
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
        fprintf(vmain, "\t// Now for the bit-reversal stage.\n");
Line 1890... Line 2290...
 
 
        {
        {
                std::string     fname;
                std::string     fname;
 
 
                fname = coredir + "/butterfly.v";
                fname = coredir + "/butterfly.v";
                build_butterfly(fname.c_str(), xtracbits);
                build_butterfly(fname.c_str(), xtracbits, rounding);
 
 
                if (nummpy > 0) {
                if (nummpy > 0) {
                        fname = coredir + "/hwbfly.v";
                        fname = coredir + "/hwbfly.v";
                        build_hwbfly(fname.c_str(), xtracbits);
                        build_hwbfly(fname.c_str(), xtracbits, rounding);
                }
                }
 
 
                fname = coredir + "/shiftaddmpy.v";
                fname = coredir + "/shiftaddmpy.v";
                build_multiply(fname.c_str());
                build_multiply(fname.c_str());
 
 
                fname = coredir + "/qtrstage.v";
                fname = coredir + "/qtrstage.v";
                build_quarters(fname.c_str());
                build_quarters(fname.c_str(), rounding);
 
 
                fname = coredir + "/dblstage.v";
                fname = coredir + "/dblstage.v";
                build_dblstage(fname.c_str());
                build_dblstage(fname.c_str(), rounding);
 
 
                if (bitreverse) {
                if (bitreverse) {
                        fname = coredir + "/dblreverse.v";
                        fname = coredir + "/dblreverse.v";
                        build_dblreverse(fname.c_str());
                        build_dblreverse(fname.c_str());
                }
                }
 
 
 
                const   char    *rnd_string = "";
 
                switch(rounding) {
 
                        case RND_TRUNCATE:      rnd_string = "/truncate.v"; break;
 
                        case RND_FROMZERO:      rnd_string = "/roundfromzero.v"; break;
 
                        case RND_HALFUP:        rnd_string = "/roundhalfup.v"; break;
 
                        default:
 
                                rnd_string = "/convround.v"; break;
 
                } fname = coredir + rnd_string;
 
                switch(rounding) {
 
                        case RND_TRUNCATE: build_truncator(fname.c_str()); break;
 
                        case RND_FROMZERO: build_roundfromzero(fname.c_str()); break;
 
                        case RND_HALFUP: build_roundhalfup(fname.c_str()); break;
 
                        default:
 
                                build_convround(fname.c_str()); break;
 
                }
 
 
        }
        }
}
}
 
 
 
 
 
 
 No newline at end of file
 No newline at end of file

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.