URL
https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk
Subversion Repositories dblclockfft
[/] [dblclockfft/] [trunk/] [sw/] [softmpy.cpp] - Rev 41
Go to most recent revision | Compare with Previous | Blame | View Log
//////////////////////////////////////////////////////////////////////////////// // // Filename: softmpy.cpp // // Project: A General Purpose Pipelined FFT Implementation // // Purpose: If the chip doesn't have any hardware multiplies, you'll need // a soft-multiply implementation. This provides that // implementation. // // Creator: Dan Gisselquist, Ph.D. // Gisselquist Technology, LLC // //////////////////////////////////////////////////////////////////////////////// // // Copyright (C) 2015-2018, Gisselquist Technology, LLC // // This program is free software (firmware): you can redistribute it and/or // modify it under the terms of the GNU General Public License as published // by the Free Software Foundation, either version 3 of the License, or (at // your option) any later version. // // This program is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // // You should have received a copy of the GNU General Public License along // with this program. (It's in the $(ROOT)/doc directory. Run make with no // target there if the PDF file isn't present.) If not, see // <http://www.gnu.org/licenses/> for a copy. // // License: GPL, v3, as defined and found on www.gnu.org, // http://www.gnu.org/licenses/gpl.html // // //////////////////////////////////////////////////////////////////////////////// // // #define _CRT_SECURE_NO_WARNINGS // ms vs 2012 doesn't like fopen #include <stdio.h> #include <stdlib.h> #ifdef _MSC_VER // added for ms vs compatibility #include <io.h> #include <direct.h> #define _USE_MATH_DEFINES #endif #include <string.h> #include <string> #include <math.h> #include <ctype.h> #include <assert.h> #include "defaults.h" #include "legal.h" #include "softmpy.h" void build_multiply(const char *fname) { FILE *fp = fopen(fname, "w"); if (NULL == fp) { fprintf(stderr, "Could not open \'%s\' for writing\n", fname); perror("O/S Err was:"); return; } fprintf(fp, SLASHLINE "//\n" "// Filename:\tshiftaddmpy.v\n" "//\n" "// Project:\t%s\n" "//\n" "// Purpose:\tA portable shift and add multiply.\n" "//\n" "// While both Xilinx and Altera will offer single clock multiplies, this\n" "// simple approach will multiply two numbers on any architecture. The\n" "// result maintains the full width of the multiply, there are no extra\n" "// stuff bits, no rounding, no shifted bits, etc.\n" "//\n" "// Further, for those applications that can support it, this multiply\n" "// is pipelined and will produce one answer per clock.\n" "//\n" "// For minimal processing delay, make the first parameter the one with\n" "// the least bits, so that AWIDTH <= BWIDTH.\n" "//\n" "// The processing delay in this multiply is (AWIDTH+1) cycles. That is,\n" "// if the data is present on the input at clock t=0, the result will be\n" "// present on the output at time t=AWIDTH+1;\n" "//\n" "//\n%s" "//\n", prjname, creator); fprintf(fp, "%s", cpyleft); fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n"); fprintf(fp, "module shiftaddmpy(i_clk, i_ce, i_a, i_b, o_r);\n" "\tparameter\tAWIDTH=%d,BWIDTH=", TST_SHIFTADDMPY_AW); #ifdef TST_SHIFTADDMPY_BW fprintf(fp, "%d;\n", TST_SHIFTADDMPY_BW); #else fprintf(fp, "AWIDTH;\n"); #endif fprintf(fp, "\tinput\twire\t\t\t\ti_clk, i_ce;\n" "\tinput\twire\t[(AWIDTH-1):0]\t\ti_a;\n" "\tinput\twire\t[(BWIDTH-1):0]\t\ti_b;\n" "\toutput\treg\t[(AWIDTH+BWIDTH-1):0]\to_r;\n" "\n" "\treg\t[(AWIDTH-1):0]\tu_a;\n" "\treg\t[(BWIDTH-1):0]\tu_b;\n" "\treg\t\t\tsgn;\n" "\n" "\treg\t[(AWIDTH-2):0]\t\tr_a[0:(AWIDTH-1)];\n" "\treg\t[(AWIDTH+BWIDTH-2):0]\tr_b[0:(AWIDTH-1)];\n" "\treg\t\t\t\tr_s[0:(AWIDTH-1)];\n" "\treg\t[(AWIDTH+BWIDTH-1):0]\tacc[0:(AWIDTH-1)];\n" "\tgenvar k;\n" "\n" "\t// If we were forced to stay within two\'s complement arithmetic,\n" "\t// taking the absolute value here would require an additional bit.\n" "\t// However, because our results are now unsigned, we can stay\n" "\t// within the number of bits given (for now).\n" "\talways @(posedge i_clk)\n" "\t\tif (i_ce)\n" "\t\tbegin\n" "\t\t\tu_a <= (i_a[AWIDTH-1])?(-i_a):(i_a);\n" "\t\t\tu_b <= (i_b[BWIDTH-1])?(-i_b):(i_b);\n" "\t\t\tsgn <= i_a[AWIDTH-1] ^ i_b[BWIDTH-1];\n" "\t\tend\n" "\n" "\talways @(posedge i_clk)\n" "\t\tif (i_ce)\n" "\t\tbegin\n" "\t\t\tacc[0] <= (u_a[0]) ? { {(AWIDTH){1\'b0}}, u_b }\n" "\t\t\t\t\t: {(AWIDTH+BWIDTH){1\'b0}};\n" "\t\t\tr_a[0] <= { u_a[(AWIDTH-1):1] };\n" "\t\t\tr_b[0] <= { {(AWIDTH-1){1\'b0}}, u_b };\n" "\t\t\tr_s[0] <= sgn; // The final sign, needs to be preserved\n" "\t\tend\n" "\n" "\tgenerate\n" "\tfor(k=0; k<AWIDTH-1; k=k+1)\n" "\tbegin : genstages\n" "\t\talways @(posedge i_clk)\n" "\t\tif (i_ce)\n" "\t\tbegin\n" "\t\t\tacc[k+1] <= acc[k] + ((r_a[k][0]) ? {r_b[k],1\'b0}:0);\n" "\t\t\tr_a[k+1] <= { 1\'b0, r_a[k][(AWIDTH-2):1] };\n" "\t\t\tr_b[k+1] <= { r_b[k][(AWIDTH+BWIDTH-3):0], 1\'b0};\n" "\t\t\tr_s[k+1] <= r_s[k];\n" "\t\tend\n" "\tend\n" "\tendgenerate\n" "\n" "\talways @(posedge i_clk)\n" "\t\tif (i_ce)\n" "\t\t\to_r <= (r_s[AWIDTH-1]) ? (-acc[AWIDTH-1]) : acc[AWIDTH-1];\n" "\n" "endmodule\n"); fclose(fp); } void build_bimpy(const char *fname) { FILE *fp = fopen(fname, "w"); if (NULL == fp) { fprintf(stderr, "Could not open \'%s\' for writing\n", fname); perror("O/S Err was:"); return; } fprintf(fp, SLASHLINE "//\n" "// Filename:\t%s\n" "//\n" "// Project:\t%s\n" "//\n" "// Purpose:\tA simple 2-bit multiply based upon the fact that LUT's allow\n" "// 6-bits of input. In other words, I could build a 3-bit\n" "// multiply from 6 LUTs (5 actually, since the first could have two\n" "// outputs). This would allow multiplication of three bit digits, save\n" "// only for the fact that you would need two bits of carry. The bimpy\n" "// approach throttles back a bit and does a 2x2 bit multiply in a LUT,\n" "// guaranteeing that it will never carry more than one bit. While this\n" "// multiply is hardware independent (and can still run under Verilator\n" "// therefore), it is really motivated by trying to optimize for a\n" "// specific piece of hardware (Xilinx-7 series ...) that has at least\n" "// 4-input LUT's with carry chains.\n" "//\n" "//\n" "//\n%s" "//\n", fname, prjname, creator); fprintf(fp, "%s", cpyleft); fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n"); fprintf(fp, "module bimpy(i_clk, i_ce, i_a, i_b, o_r);\n" "\tparameter\tBW=18; // Number of bits in i_b\n" "\tlocalparam\tLUTB=2; // Number of bits in i_a for our LUT multiply\n" "\tinput\twire\t\t\ti_clk, i_ce;\n" "\tinput\twire\t[(LUTB-1):0]\ti_a;\n" "\tinput\twire\t[(BW-1):0]\ti_b;\n" "\toutput\treg\t[(BW+LUTB-1):0] o_r;\n" "\n" "\twire [(BW+LUTB-2):0] w_r;\n" "\twire [(BW+LUTB-3):1] c;\n" "\n" "\tassign\tw_r = { ((i_a[1])?i_b:{(BW){1\'b0}}), 1\'b0 }\n" "\t\t\t\t^ { 1\'b0, ((i_a[0])?i_b:{(BW){1\'b0}}) };\n" "\tassign\tc = { ((i_a[1])?i_b[(BW-2):0]:{(BW-1){1\'b0}}) }\n" "\t\t\t& ((i_a[0])?i_b[(BW-1):1]:{(BW-1){1\'b0}});\n" "\n" "\tinitial o_r = 0;\n" "\talways @(posedge i_clk)\n" "\tif (i_ce)\n" "\t\to_r <= w_r + { c, 2'b0 };\n" "\n"); // Formal properties // // This module is formally verified as part of longbimpy.v // Hence, we'll use an ifdef LONGBIMPY to capture if it is being verified as part of // LONGBIMPY, or placed within another module. fprintf(fp, "`ifdef FORMAL\n"); if (formal_property_flag) { fprintf(fp, "\treg\tf_past_valid;\n" "\n" "\tinitial\tf_past_valid = 1'b0;\n" "\talways @(posedge i_clk)\n" "\tf_past_valid <= 1'b1;\n" "\n" "`define\tASSERT\tassert\n" "\n"); // Now for our module specific assertions // These properties will be assumed if this proof is not part of LONGBIMPY's proof fprintf(fp, "\talways @(posedge i_clk)\n" "\tif ((f_past_valid)&&($past(i_ce)))\n" "\tbegin\n" "\t\tif ($past(i_a)==0)\n" "\t\t\t`ASSERT(o_r == 0);\n" "\t\telse if ($past(i_a) == 1)\n" "\t\t\t`ASSERT(o_r == $past(i_b));\n" "\n" "\t\tif ($past(i_b)==0)\n" "\t\t\t`ASSERT(o_r == 0);\n" "\t\telse if ($past(i_b) == 1)\n" "\t\t\t`ASSERT(o_r[(LUTB-1):0] == $past(i_a));\n" "\tend\n"); } else { fprintf(fp, "// Enable the formal_property_flag to include formal properties\n"); } fprintf(fp, "`endif\n"); // And then the end of the module fprintf(fp, "endmodule\n"); fclose(fp); } void build_longbimpy(const char *fname) { FILE *fp = fopen(fname, "w"); if (NULL == fp) { fprintf(stderr, "Could not open \'%s\' for writing\n", fname); perror("O/S Err was:"); return; } fprintf(fp, SLASHLINE "//\n" "// Filename: %s\n" "//\n" "// Project: %s\n" "//\n" "// Purpose: A portable shift and add multiply, built with the knowledge\n" "// of the existence of a six bit LUT and carry chain. That knowledge\n" "// allows us to multiply two bits from one value at a time against all\n" "// of the bits of the other value. This sub multiply is called the\n" "// bimpy.\n" "//\n" "// For minimal processing delay, make the first parameter the one with\n" "// the least bits, so that AWIDTH <= BWIDTH.\n" "//\n" "//\n" "//\n%s" "//\n", fname, prjname, creator); fprintf(fp, "%s", cpyleft); fprintf(fp, "//\n//\n`default_nettype\tnone\n//\n"); fprintf(fp, "module longbimpy(i_clk, i_ce, i_a_unsorted, i_b_unsorted, o_r\n"); if (formal_property_flag) fprintf(fp, "`ifdef FORMAL\n" "\t, f_past_a_unsorted, f_past_b_unsorted\n" "`endif\n\t\t"); fprintf(fp, ");\n" "\tparameter IAW=%d, // The width of i_a, min width is 5\n" "\t\t\tIBW=", TST_LONGBIMPY_AW); #ifdef TST_LONGBIMPY_BW fprintf(fp, "%d", TST_LONGBIMPY_BW); #else fprintf(fp, "IAW"); #endif fprintf(fp, ", // The width of i_b, can be anything\n" "\t\t\t// The following three parameters should not be changed\n" "\t\t\t// by any implementation, but are based upon hardware\n" "\t\t\t// and the above values:\n" "\t\t\tOW=IAW+IBW; // The output width\n"); fprintf(fp, "\tlocalparam AW = (IAW<IBW) ? IAW : IBW,\n" "\t\t\tBW = (IAW<IBW) ? IBW : IAW,\n" "\t\t\tIW=(AW+1)&(-2), // Internal width of A\n" "\t\t\tLUTB=2, // How many bits we can multiply by at once\n" "\t\t\tTLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau\n" "\tinput\twire\t\t\ti_clk, i_ce;\n" "\tinput\twire\t[(IAW-1):0]\ti_a_unsorted;\n" "\tinput\twire\t[(IBW-1):0]\ti_b_unsorted;\n" "\toutput\treg\t[(AW+BW-1):0]\to_r;\n" "\n"); if (formal_property_flag) fprintf(fp, "`ifdef FORMAL\n" "\toutput\twire\t[(IAW-1):0]\tf_past_a_unsorted;\n" "\toutput\twire\t[(IBW-1):0]\tf_past_b_unsorted;\n" "`endif\n"); fprintf(fp, "\n" "\t//\n" "\t// Swap parameter order, so that AW <= BW -- for performance\n" "\t// reasons\n" "\twire [AW-1:0] i_a;\n" "\twire [BW-1:0] i_b;\n" "\tgenerate if (IAW <= IBW)\n" "\tbegin : NO_PARAM_CHANGE\n" "\t\tassign i_a = i_a_unsorted;\n" "\t\tassign i_b = i_b_unsorted;\n" "\tend else begin : SWAP_PARAMETERS\n" "\t\tassign i_a = i_b_unsorted;\n" "\t\tassign i_b = i_a_unsorted;\n" "\tend endgenerate\n" "\n" "\treg\t[(IW-1):0]\tu_a;\n" "\treg\t[(BW-1):0]\tu_b;\n" "\treg\t\t\tsgn;\n" "\n" "\treg\t[(IW-1-2*(LUTB)):0]\tr_a[0:(TLEN-3)];\n" "\treg\t[(BW-1):0]\t\tr_b[0:(TLEN-3)];\n" "\treg\t[(TLEN-1):0]\t\tr_s;\n" "\treg\t[(IW+BW-1):0]\t\tacc[0:(TLEN-2)];\n" "\tgenvar k;\n" "\n" "\t// First step:\n" "\t// Switch to unsigned arithmetic for our multiply, keeping track\n" "\t// of the along the way. We'll then add the sign again later at\n" "\t// the end.\n" "\t//\n" "\t// If we were forced to stay within two's complement arithmetic,\n" "\t// taking the absolute value here would require an additional bit.\n" "\t// However, because our results are now unsigned, we can stay\n" "\t// within the number of bits given (for now).\n" "\tinitial u_a = 0;\n" "\tgenerate if (IW > AW)\n" "\tbegin : ABS_AND_ADD_BIT_TO_A\n" "\t\talways @(posedge i_clk)\n" "\t\t\tif (i_ce)\n" "\t\t\t\tu_a <= { 1\'b0, (i_a[AW-1])?(-i_a):(i_a) };\n" "\tend else begin : ABS_A\n" "\t\talways @(posedge i_clk)\n" "\t\t\tif (i_ce)\n" "\t\t\t\tu_a <= (i_a[AW-1])?(-i_a):(i_a);\n" "\tend endgenerate\n" "\n" "\tinitial sgn = 0;\n" "\tinitial u_b = 0;\n" "\talways @(posedge i_clk)\n" "\tif (i_ce)\n" "\tbegin : ABS_B\n" "\t\tu_b <= (i_b[BW-1])?(-i_b):(i_b);\n" "\t\tsgn <= i_a[AW-1] ^ i_b[BW-1];\n" "\tend\n" "\n" "\twire [(BW+LUTB-1):0] pr_a, pr_b;\n" "\n" "\t//\n" "\t// Second step: First two 2xN products.\n" "\t//\n" "\t// Since we have no tableau of additions (yet), we can do both\n" "\t// of the first two rows at the same time and add them together.\n" "\t// For the next round, we'll then have a previous sum to accumulate\n" "\t// with new and subsequent product, and so only do one product at\n" "\t// a time can follow this--but the first clock can do two at a time.\n" "\tbimpy\t#(BW) lmpy_0(i_clk,i_ce,u_a[( LUTB-1): 0], u_b, pr_a);\n" "\tbimpy\t#(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);\n" "\n" "\tinitial r_s = 0;\n" "\tinitial r_a[0] = 0;\n" "\tinitial r_b[0] = 0;\n" "\talways @(posedge i_clk)\n" "\t\tif (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];\n" "\talways @(posedge i_clk)\n" "\t\tif (i_ce) r_b[0] <= u_b;\n" "\talways @(posedge i_clk)\n" "\t\tif (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };\n" "\n" "\tinitial acc[0] = 0;\n" "\talways @(posedge i_clk) // One clk after p[0],p[1] become valid\n" "\tif (i_ce) acc[0] <= { {(IW-LUTB){1\'b0}}, pr_a}\n" "\t\t +{ {(IW-(2*LUTB)){1\'b0}}, pr_b, {(LUTB){1\'b0}} };\n" "\n" "\tgenerate // Keep track of intermediate values, before multiplying them\n" "\tif (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)\n" "\tbegin : GENCOPIES\n" "\n" "\t\tinitial r_a[k+1] = 0;\n" "\t\tinitial r_b[k+1] = 0;\n" "\t\talways @(posedge i_clk)\n" "\t\tif (i_ce)\n" "\t\tbegin\n" "\t\t\tr_a[k+1] <= { {(LUTB){1\'b0}},\n" "\t\t\t\tr_a[k][(IW-1-(2*LUTB)):LUTB] };\n" "\t\t\tr_b[k+1] <= r_b[k];\n" "\t\tend\n" "\tend endgenerate\n" "\n" "\tgenerate // The actual multiply and accumulate stage\n" "\tif (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)\n" "\tbegin : GENSTAGES\n" "\t\twire\t[(BW+LUTB-1):0] genp;\n" "\n" "\t\t// First, the multiply: 2-bits times BW bits\n" "\t\tbimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);\n" "\n" "\t\t// Then the accumulate step -- on the next clock\n" "\t\tinitial acc[k+1] = 0;\n" "\t\talways @(posedge i_clk)\n" "\t\tif (i_ce)\n" "\t\t\tacc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1\'b0}},\n" "\t\t\t\tgenp, {(LUTB*(k+2)){1\'b0}} };\n" "\tend endgenerate\n" "\n" "\twire [(IW+BW-1):0] w_r;\n" "\tassign\tw_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];\n" "\n" "\tinitial o_r = 0;\n" "\talways @(posedge i_clk)\n" "\tif (i_ce)\n" "\t\to_r <= w_r[(AW+BW-1):0];\n" "\n"); // Make Verilator happy fprintf(fp, "\tgenerate if (IW > AW)\n" "\tbegin : VUNUSED\n" "\t\t// verilator lint_off UNUSED\n" "\t\twire\t[(IW-AW)-1:0]\tunused;\n" "\t\tassign\tunused = w_r[(IW+BW-1):(AW+BW)];\n" "\t\t// verilator lint_on UNUSED\n" "\tend endgenerate\n" "\n"); // The formal property section // Starting with properties specific to this component's proof, and not // any parent modules fprintf(fp, "`ifdef FORMAL\n"); if (formal_property_flag) { fprintf(fp, "\treg f_past_valid;\n" "\tinitial\tf_past_valid = 1'b0;\n" "\talways @(posedge i_clk)\n" "\t\tf_past_valid <= 1'b1;\n" "\n" "`define\tASSERT assert\n" "`ifdef LONGBIMPY\n" "\n" "\talways @(posedge i_clk)\n" "\tif (!$past(i_ce))\n" "\t\tassume(i_ce);\n" "\n" "`endif\n" "\n"); // Now for properties specific to this core fprintf(fp, "\treg [AW-1:0] f_past_a [0:TLEN];\n" "\treg [BW-1:0] f_past_b [0:TLEN];\n" "\treg [TLEN+1:0] f_sgn_a, f_sgn_b;\n" "\n"); fprintf(fp, "\tinitial\tf_past_a[0] = 0;\n" "\tinitial\tf_past_b[0] = 0;\n" "\tinitial\tf_sgn_a = 0;\n" "\tinitial\tf_sgn_b = 0;\n" "\talways @(posedge i_clk)\n" "\tif (i_ce)\n" "\tbegin\n" "\t\tf_past_a[0] <= u_a;\n" "\t\tf_past_b[0] <= u_b;\n" "\t\tf_sgn_a[0] <= i_a[AW-1];\n" "\t\tf_sgn_b[0] <= i_b[BW-1];\n" "\tend\n" "\n"); fprintf(fp, "\tgenerate for(k=0; k<TLEN; k=k+1)\n" "\tbegin\n" "\t\tinitial\tf_past_a[k+1] = 0;\n" "\t\tinitial\tf_past_b[k+1] = 0;\n" "\t\tinitial\tf_sgn_a[k+1] = 0;\n" "\t\tinitial\tf_sgn_b[k+1] = 0;\n" "\t\talways @(posedge i_clk)\n" "\t\tif (i_ce)\n" "\t\tbegin\n" "\t\t\tf_past_a[k+1] <= f_past_a[k];\n" "\t\t\tf_past_b[k+1] <= f_past_b[k];\n" "\n" "\t\t\tf_sgn_a[k+1] <= f_sgn_a[k];\n" "\t\t\tf_sgn_b[k+1] <= f_sgn_b[k];\n" "\t\tend\n" "\tend endgenerate\n" "\n"); fprintf(fp, "\talways @(posedge i_clk)\n" "\tif (i_ce)\n" "\tbegin\n" "\t\tf_sgn_a[TLEN+1] <= f_sgn_a[TLEN];\n" "\t\tf_sgn_b[TLEN+1] <= f_sgn_b[TLEN];\n" "\tend\n" "\n"); fprintf(fp, "\talways @(posedge i_clk)\n" "\tbegin\n" "\t\tassert(sgn == (f_sgn_a[0] ^ f_sgn_b[0]));\n" "\t\tassert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));\n" "\t\tassert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));\n" "\tend\n" "\n"); fprintf(fp, "\talways @(posedge i_clk)\n" "\tif ((f_past_valid)&&($past(i_ce)))\n" "\tbegin\n" "\t\tif ($past(i_a)==0)\n" "\t\t\t`ASSERT(u_a == 0);\n" "\t\telse if ($past(i_a[AW-1]) == 1'b0)\n" "\t\t\t`ASSERT(u_a == $past(i_a));\n" "\n" "\t\tif ($past(i_b)==0)\n" "\t\t\t`ASSERT(u_b == 0);\n" "\t\telse if ($past(i_b[BW-1]) == 1'b0)\n" "\t\t\t`ASSERT(u_b == $past(i_b));\n" "\tend\n" "\n"); fprintf(fp, "\tgenerate // Keep track of intermediate values, before multiplying them\n" "\tif (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)\n" "\tbegin : ASSERT_GENCOPY\n" "\t\talways @(posedge i_clk)\n" "\t\tif (i_ce)\n" "\t\tbegin\n" "\t\t\tif (f_past_a[k]==0)\n" "\t\t\t\t`ASSERT(r_a[k] == 0);\n" "\t\t\telse if (f_past_a[k]==1)\n" "\t\t\t\t`ASSERT(r_a[k] == 0);\n" "\t\t\t`ASSERT(r_b[k] == f_past_b[k]);\n" "\t\tend\n" "\tend endgenerate\n" "\n"); fprintf(fp, "\tgenerate // The actual multiply and accumulate stage\n" "\tif (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)\n" "\tbegin : ASSERT_GENSTAGE\n" "\t\talways @(posedge i_clk)\n" "\t\tif ((f_past_valid)&&($past(i_ce)))\n" "\t\tbegin\n" "\t\t\tif (f_past_a[k+1]==0)\n" "\t\t\t\t`ASSERT(acc[k] == 0);\n" "\t\t\tif (f_past_a[k+1]==1)\n" "\t\t\t\t`ASSERT(acc[k] == f_past_b[k+1]);\n" "\t\t\tif (f_past_b[k+1]==0)\n" "\t\t\t\t`ASSERT(acc[k] == 0);\n" "\t\t\tif (f_past_b[k+1]==1)\n" "\t\t\tbegin\n" "\t\t\t\t`ASSERT(acc[k][(2*k)+3:0]\n" "\t\t\t\t == f_past_a[k+1][(2*k)+3:0]);\n" "\t\t\t\t`ASSERT(acc[k][(IW+BW-1):(2*k)+4] == 0);\n" "\t\t\tend\n" "\t\tend\n" "\tend endgenerate\n" "\n"); fprintf(fp, "\twire [AW-1:0]\tf_past_a_neg = - f_past_a[TLEN];\n" "\twire [BW-1:0]\tf_past_b_neg = - f_past_b[TLEN];\n" "\n" "\twire [AW-1:0]\tf_past_a_pos = f_past_a[TLEN][AW-1]\n" "\t\t\t\t\t? f_past_a_neg : f_past_a[TLEN];\n" "\twire [BW-1:0]\tf_past_b_pos = f_past_b[TLEN][BW-1]\n" "\t\t\t\t\t? f_past_b_neg : f_past_b[TLEN];\n\n"); fprintf(fp, "\talways @(posedge i_clk)\n" "\tif ((f_past_valid)&&($past(i_ce)))\n" "\tbegin\n" "\t\tif ((f_past_a[TLEN]==0)||(f_past_b[TLEN]==0))\n" "\t\t\t`ASSERT(o_r == 0);\n" "\t\telse if (f_past_a[TLEN]==1)\n" "\t\tbegin\n" "\t\t\tif ((f_sgn_a[TLEN+1]^f_sgn_b[TLEN+1])==0)\n" "\t\t\tbegin\n" "\t\t\t\t`ASSERT(o_r[BW-1:0] == f_past_b_pos[BW-1:0]);\n" "\t\t\t\t`ASSERT(o_r[AW+BW-1:BW] == 0);\n" "\t\t\tend else begin // if (f_sgn_b[TLEN+1]) begin\n" "\t\t\t\t`ASSERT(o_r[BW-1:0] == f_past_b_neg);\n" "\t\t\t\t`ASSERT(o_r[AW+BW-1:BW]\n" "\t\t\t\t\t== {(AW){f_past_b_neg[BW-1]}});\n" "\t\t\tend\n" "\t\tend else if (f_past_b[TLEN]==1)\n" "\t\tbegin\n" "\t\t\tif ((f_sgn_a[TLEN+1] ^ f_sgn_b[TLEN+1])==0)\n" "\t\t\tbegin\n" "\t\t\t\t`ASSERT(o_r[AW-1:0] == f_past_a_pos[AW-1:0]);\n" "\t\t\t\t`ASSERT(o_r[AW+BW-1:AW] == 0);\n" "\t\t\tend else begin\n" "\t\t\t\t`ASSERT(o_r[AW-1:0] == f_past_a_neg);\n" "\t\t\t\t`ASSERT(o_r[AW+BW-1:AW]\n" "\t\t\t\t\t== {(BW){f_past_a_neg[AW-1]}});\n" "\t\t\tend\n" "\t\tend else begin\n" "\t\t\t`ASSERT(o_r != 0);\n" "\t\t\tif (!o_r[AW+BW-1:0])\n" "\t\t\tbegin\n" "\t\t\t\t`ASSERT((o_r[AW-1:0] != f_past_a[TLEN][AW-1:0])\n" "\t\t\t\t\t||(o_r[AW+BW-1:AW]!=0));\n" "\t\t\t\t`ASSERT((o_r[BW-1:0] != f_past_b[TLEN][BW-1:0])\n" "\t\t\t\t\t||(o_r[AW+BW-1:BW]!=0));\n" "\t\t\tend else begin\n" "\t\t\t\t`ASSERT((o_r[AW-1:0] != f_past_a_neg[AW-1:0])\n" "\t\t\t\t\t||(! (&o_r[AW+BW-1:AW])));\n" "\t\t\t\t`ASSERT((o_r[BW-1:0] != f_past_b_neg[BW-1:0])\n" "\t\t\t\t\t||(! (&o_r[AW+BW-1:BW]!=0)));\n" "\t\t\tend\n" "\t\tend\n" "\tend\n" "\n"); fprintf(fp, "\tgenerate if (IAW <= IBW)\n" "\tbegin : NO_PARAM_CHANGE\n" "\t\tassign f_past_a_unsorted = (!f_sgn_a[TLEN+1])\n" "\t\t\t\t\t? f_past_a[TLEN] : f_past_a_neg;\n" "\t\tassign f_past_b_unsorted = (!f_sgn_b[TLEN+1])\n" "\t\t\t\t\t? f_past_b[TLEN] : f_past_b_neg;\n" "\tend else begin : SWAP_PARAMETERS\n" "\t\tassign f_past_a_unsorted = (!f_sgn_b[TLEN+1])\n" "\t\t\t\t\t? f_past_b[TLEN] : f_past_b_neg;\n" "\t\tassign f_past_b_unsorted = (!f_sgn_a[TLEN+1])\n" "\t\t\t\t\t? f_past_a[TLEN] : f_past_a_neg;\n" "\tend endgenerate\n"); fprintf(fp, "`ifdef BUTTERFLY\n" "\t// The following properties artificially restrict the inputs\n" "\t// to this long binary multiplier to only those values whose\n" "\t// absolute value is 0..7. It is used by the formal proof of\n" "\t// the BUTTERFLY to artificially limit the scope of the proof.\n" "\t// By the time the butterfly sees this code, it will be known\n" "\t// that the long binary multiply works. At issue will no longer\n" "\t// be whether or not this works, but rather whether it works in\n" "\t// context. For that purpose, we'll need to check timing, not\n" "\t// results. Checking against inputs of +/- 1 and 0 are perfect\n" "\t// for that task. The below assumptions (yes they are assumptions\n" "\t// just go a little bit further.\n" "\t//\n" "\t// THEREFORE, THESE PROPERTIES ARE NOT NECESSARY TO PROVING THAT\n" "\t// THIS MODULE WORKS, AND THEY WILL INTERFERE WITH THAT PROOF.\n" "\t//\n" "\t// This just limits the proof for the butterfly, the parent.\n" "\t// module that calls this one\n" "\t//\n" "\t// Start by assuming that all inputs have an absolute value less\n" "\t// than eight.\n" "\talways @(*)\n" "\t\tassume(u_a[AW-1:3] == 0);\n" "\talways @(*)\n" "\t\tassume(u_b[BW-1:3] == 0);\n" "\n" "\t// If the inputs have these properties, then so too do many of\n" "\t// our internal values. ASSERT therefore that we never get out\n" "\t// of bounds\n" "\tgenerate for(k=0; k<TLEN; k=k+1)\n" "\tbegin\n" "\t\talways @(*)\n" "\t\tbegin\n" "\t\t\tassert(f_past_a[k][AW-1:3] == 0);\n" "\t\t\tassert(f_past_b[k][BW-1:3] == 0);\n" "\t\tend\n" "\tend endgenerate\n" "\n" "\tgenerate for(k=0; k<TLEN-1; k=k+1)\n" "\tbegin\n" "\t\talways @(*)\n" "\t\t\tassert(acc[k][IW+BW-1:6] == 0);\n" "\tend endgenerate\n" "\n" "\tgenerate for(k=0; k<TLEN-2; k=k+1)\n" "\tbegin\n" "\t\talways @(*)\n" "\t\t\tassert(r_b[k][BW-1:3] == 0);\n" "\tend endgenerate\n" "`endif // BUTTERFLY\n"); } else { fprintf(fp, "// Formal properties have not been enabled\n"); } fprintf(fp, "`endif\t// FORMAL\n" "endmodule\n"); fclose(fp); }
Go to most recent revision | Compare with Previous | Blame | View Log