URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [rtl/] [longbimpy.v] - Blame information for rev 39

Details | Compare with Previous | View Log


////////////////////////////////////////////////////////////////////////////////
//
// Filename:    ../rtl/longbimpy.v
//
// Project:     A General Purpose Pipelined FFT Implementation
//
// Purpose:     A portable shift and add multiply, built with the knowledge
//      of the existence of a six bit LUT and carry chain.  That knowledge
//      allows us to multiply two bits from one value at a time against all
//      of the bits of the other value.  This sub multiply is called the
//      bimpy.
//
//      For minimal processing delay, make the first parameter the one with
//      the least bits, so that AWIDTH <= BWIDTH.
//
//
//
// Creator:     Dan Gisselquist, Ph.D.
//              Gisselquist Technology, LLC
//
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
//
// This file is part of the general purpose pipelined FFT project.
//
// The pipelined FFT project is free software (firmware): you can redistribute
// it and/or modify it under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// The pipelined FFT project is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
// General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with this program.  (It's in the $(ROOT)/doc directory.  Run make
// with no target there if the PDF file isn't present.)  If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License:     LGPL, v3, as defined and found on www.gnu.org,
//              http://www.gnu.org/licenses/lgpl.html
//
//
////////////////////////////////////////////////////////////////////////////////
//
//
`default_nettype        none
//
module  longbimpy(i_clk, i_ce, i_a_unsorted, i_b_unsorted, o_r
`ifdef  FORMAL
        , f_past_a_unsorted, f_past_b_unsorted
`endif
                );
        parameter       IAW=8,  // The width of i_a, min width is 5
                        IBW=12, // The width of i_b, can be anything
                        // The following three parameters should not be changed
                        // by any implementation, but are based upon hardware
                        // and the above values:
                        OW=IAW+IBW;     // The output width
        localparam      AW = (IAW<IBW) ? IAW : IBW,
                        BW = (IAW<IBW) ? IBW : IAW,
                        IW=(AW+1)&(-2), // Internal width of A
                        LUTB=2, // How many bits we can multiply by at once
                        TLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau
        input   wire                    i_clk, i_ce;
        input   wire    [(IAW-1):0]      i_a_unsorted;
        input   wire    [(IBW-1):0]      i_b_unsorted;
        output  reg     [(AW+BW-1):0]    o_r;
 
`ifdef  FORMAL
        output  wire    [(IAW-1):0]      f_past_a_unsorted;
        output  wire    [(IBW-1):0]      f_past_b_unsorted;
`endif
 
        //
        // Swap parameter order, so that AW <= BW -- for performance
        // reasons
        wire    [AW-1:0] i_a;
        wire    [BW-1:0] i_b;
        generate if (IAW <= IBW)
        begin : NO_PARAM_CHANGE
                assign i_a = i_a_unsorted;
                assign i_b = i_b_unsorted;
        end else begin : SWAP_PARAMETERS
                assign i_a = i_b_unsorted;
                assign i_b = i_a_unsorted;
        end endgenerate
 
        reg     [(IW-1):0]       u_a;
        reg     [(BW-1):0]       u_b;
        reg                     sgn;
 
        reg     [(IW-1-2*(LUTB)):0]      r_a[0:(TLEN-3)];
        reg     [(BW-1):0]               r_b[0:(TLEN-3)];
        reg     [(TLEN-1):0]             r_s;
        reg     [(IW+BW-1):0]            acc[0:(TLEN-2)];
        genvar k;
 
        // First step:
        // Switch to unsigned arithmetic for our multiply, keeping track
        // of the along the way.  We'll then add the sign again later at
        // the end.
        //
        // If we were forced to stay within two's complement arithmetic,
        // taking the absolute value here would require an additional bit.
        // However, because our results are now unsigned, we can stay
        // within the number of bits given (for now).
        initial u_a = 0;
        generate if (IW > AW)
        begin : ABS_AND_ADD_BIT_TO_A
                always @(posedge i_clk)
                        if (i_ce)
                                u_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };
        end else begin : ABS_A
                always @(posedge i_clk)
                        if (i_ce)
                                u_a <= (i_a[AW-1])?(-i_a):(i_a);
        end endgenerate
 
        initial sgn = 0;
        initial u_b = 0;
        always @(posedge i_clk)
        if (i_ce)
        begin : ABS_B
                u_b <= (i_b[BW-1])?(-i_b):(i_b);
                sgn <= i_a[AW-1] ^ i_b[BW-1];
        end
 
        wire    [(BW+LUTB-1):0]  pr_a, pr_b;
 
        //
        // Second step: First two 2xN products.
        //
        // Since we have no tableau of additions (yet), we can do both
        // of the first two rows at the same time and add them together.
        // For the next round, we'll then have a previous sum to accumulate
        // with new and subsequent product, and so only do one product at
        // a time can follow this--but the first clock can do two at a time.
        bimpy   #(BW) lmpy_0(i_clk,i_ce,u_a[(  LUTB-1):   0], u_b, pr_a);
        bimpy   #(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);
 
        initial r_s    = 0;
        initial r_a[0] = 0;
        initial r_b[0] = 0;
        always @(posedge i_clk)
                if (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];
        always @(posedge i_clk)
                if (i_ce) r_b[0] <= u_b;
        always @(posedge i_clk)
                if (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };
 
        initial acc[0] = 0;
        always @(posedge i_clk) // One clk after p[0],p[1] become valid
        if (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}
                  +{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };
 
        generate // Keep track of intermediate values, before multiplying them
        if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)
        begin : GENCOPIES
 
                initial r_a[k+1] = 0;
                initial r_b[k+1] = 0;
                always @(posedge i_clk)
                if (i_ce)
                begin
                        r_a[k+1] <= { {(LUTB){1'b0}},
                                r_a[k][(IW-1-(2*LUTB)):LUTB] };
                        r_b[k+1] <= r_b[k];
                end
        end endgenerate
 
        generate // The actual multiply and accumulate stage
        if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)
        begin : GENSTAGES
                wire    [(BW+LUTB-1):0] genp;
 
                // First, the multiply: 2-bits times BW bits
                bimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);
 
                // Then the accumulate step -- on the next clock
                initial acc[k+1] = 0;
                always @(posedge i_clk)
                if (i_ce)
                        acc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},
                                genp, {(LUTB*(k+2)){1'b0}} };
        end endgenerate
 
        wire    [(IW+BW-1):0]    w_r;
        assign  w_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];
 
        initial o_r = 0;
        always @(posedge i_clk)
        if (i_ce)
                o_r <= w_r[(AW+BW-1):0];
 
        generate if (IW > AW)
        begin : VUNUSED
                // verilator lint_off UNUSED
                wire    [(IW-AW)-1:0]    unused;
                assign  unused = w_r[(IW+BW-1):(AW+BW)];
                // verilator lint_on UNUSED
        end endgenerate
 
`ifdef  FORMAL
        reg     f_past_valid;
        initial f_past_valid = 1'b0;
        always @(posedge i_clk)
                f_past_valid <= 1'b1;
 
`define ASSERT  assert
`ifdef  LONGBIMPY
 
        always @(posedge i_clk)
        if (!$past(i_ce))
                assume(i_ce);
 
`endif
 
        reg     [AW-1:0] f_past_a        [0:TLEN];
        reg     [BW-1:0] f_past_b        [0:TLEN];
        reg     [TLEN+1:0]       f_sgn_a, f_sgn_b;
 
        initial f_past_a[0] = 0;
        initial f_past_b[0] = 0;
        initial f_sgn_a = 0;
        initial f_sgn_b = 0;
        always @(posedge i_clk)
        if (i_ce)
        begin
                f_past_a[0] <= u_a;
                f_past_b[0] <= u_b;
                f_sgn_a[0] <= i_a[AW-1];
                f_sgn_b[0] <= i_b[BW-1];
        end
 
        generate for(k=0; k<TLEN; k=k+1)
        begin
                initial f_past_a[k+1] = 0;
                initial f_past_b[k+1] = 0;
                initial f_sgn_a[k+1] = 0;
                initial f_sgn_b[k+1] = 0;
                always @(posedge i_clk)
                if (i_ce)
                begin
                        f_past_a[k+1] <= f_past_a[k];
                        f_past_b[k+1] <= f_past_b[k];
 
                        f_sgn_a[k+1]  <= f_sgn_a[k];
                        f_sgn_b[k+1]  <= f_sgn_b[k];
                end
        end endgenerate
 
        always @(posedge i_clk)
        if (i_ce)
        begin
                f_sgn_a[TLEN+1] <= f_sgn_a[TLEN];
                f_sgn_b[TLEN+1] <= f_sgn_b[TLEN];
        end
 
        always @(posedge i_clk)
        begin
                assert(sgn == (f_sgn_a[0] ^ f_sgn_b[0]));
                assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));
                assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));
        end
 
        always @(posedge i_clk)
        if ((f_past_valid)&&($past(i_ce)))
        begin
                if ($past(i_a)==0)
                        `ASSERT(u_a == 0);
                else if ($past(i_a[AW-1]) == 1'b0)
                        `ASSERT(u_a == $past(i_a));
 
                if ($past(i_b)==0)
                        `ASSERT(u_b == 0);
                else if ($past(i_b[BW-1]) == 1'b0)
                        `ASSERT(u_b == $past(i_b));
        end
 
        generate // Keep track of intermediate values, before multiplying them
        if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)
        begin : ASSERT_GENCOPY
                always @(posedge i_clk)
                if (i_ce)
                begin
                        if (f_past_a[k]==0)
                                `ASSERT(r_a[k] == 0);
                        else if (f_past_a[k]==1)
                                `ASSERT(r_a[k] == 0);
                        `ASSERT(r_b[k] == f_past_b[k]);
                end
        end endgenerate
 
        generate // The actual multiply and accumulate stage
        if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)
        begin : ASSERT_GENSTAGE
                always @(posedge i_clk)
                if ((f_past_valid)&&($past(i_ce)))
                begin
                        if (f_past_a[k+1]==0)
                                `ASSERT(acc[k] == 0);
                        if (f_past_a[k+1]==1)
                                `ASSERT(acc[k] == f_past_b[k+1]);
                        if (f_past_b[k+1]==0)
                                `ASSERT(acc[k] == 0);
                        if (f_past_b[k+1]==1)
                        begin
                                `ASSERT(acc[k][(2*k)+3:0]
                                                == f_past_a[k+1][(2*k)+3:0]);
                                `ASSERT(acc[k][(IW+BW-1):(2*k)+4] == 0);
                        end
                end
        end endgenerate
 
        wire    [AW-1:0] f_past_a_neg = - f_past_a[TLEN];
        wire    [BW-1:0] f_past_b_neg = - f_past_b[TLEN];
 
        wire    [AW-1:0] f_past_a_pos = f_past_a[TLEN][AW-1]
                                        ? f_past_a_neg : f_past_a[TLEN];
        wire    [BW-1:0] f_past_b_pos = f_past_b[TLEN][BW-1]
                                        ? f_past_b_neg : f_past_b[TLEN];
 
        always @(posedge i_clk)
        if ((f_past_valid)&&($past(i_ce)))
        begin
                if ((f_past_a[TLEN]==0)||(f_past_b[TLEN]==0))
                        `ASSERT(o_r == 0);
                else if (f_past_a[TLEN]==1)
                begin
                        if ((f_sgn_a[TLEN+1]^f_sgn_b[TLEN+1])==0)
                        begin
                                `ASSERT(o_r[BW-1:0] == f_past_b_pos[BW-1:0]);
                                `ASSERT(o_r[AW+BW-1:BW] == 0);
                        end else begin // if (f_sgn_b[TLEN+1]) begin
                                `ASSERT(o_r[BW-1:0] == f_past_b_neg);
                                `ASSERT(o_r[AW+BW-1:BW]
                                        == {(AW){f_past_b_neg[BW-1]}});
                        end
                end else if (f_past_b[TLEN]==1)
                begin
                        if ((f_sgn_a[TLEN+1] ^ f_sgn_b[TLEN+1])==0)
                        begin
                                `ASSERT(o_r[AW-1:0] == f_past_a_pos[AW-1:0]);
                                `ASSERT(o_r[AW+BW-1:AW] == 0);
                        end else begin
                                `ASSERT(o_r[AW-1:0] == f_past_a_neg);
                                `ASSERT(o_r[AW+BW-1:AW]
                                        == {(BW){f_past_a_neg[AW-1]}});
                        end
                end else begin
                        `ASSERT(o_r != 0);
                        if (!o_r[AW+BW-1:0])
                        begin
                                `ASSERT((o_r[AW-1:0] != f_past_a[TLEN][AW-1:0])
                                        ||(o_r[AW+BW-1:AW]!=0));
                                `ASSERT((o_r[BW-1:0] != f_past_b[TLEN][BW-1:0])
                                        ||(o_r[AW+BW-1:BW]!=0));
                        end else begin
                                `ASSERT((o_r[AW-1:0] != f_past_a_neg[AW-1:0])
                                        ||(! (&o_r[AW+BW-1:AW])));
                                `ASSERT((o_r[BW-1:0] != f_past_b_neg[BW-1:0])
                                        ||(! (&o_r[AW+BW-1:BW]!=0)));
                        end
                end
        end
 
        generate if (IAW <= IBW)
        begin : NO_PARAM_CHANGE
                assign f_past_a_unsorted = (!f_sgn_a[TLEN+1])
                                        ? f_past_a[TLEN] : f_past_a_neg;
                assign f_past_b_unsorted = (!f_sgn_b[TLEN+1])
                                        ? f_past_b[TLEN] : f_past_b_neg;
        end else begin : SWAP_PARAMETERS
                assign f_past_a_unsorted = (!f_sgn_b[TLEN+1])
                                        ? f_past_b[TLEN] : f_past_b_neg;
                assign f_past_b_unsorted = (!f_sgn_a[TLEN+1])
                                        ? f_past_a[TLEN] : f_past_a_neg;
        end endgenerate
`ifdef  BUTTERFLY
        // The following properties artificially restrict the inputs
        // to this long binary multiplier to only those values whose
        // absolute value is 0..7.  It is used by the formal proof of
        // the BUTTERFLY to artificially limit the scope of the proof.
        // By the time the butterfly sees this code, it will be known
        // that the long binary multiply works.  At issue will no longer
        // be whether or not this works, but rather whether it works in
        // context.  For that purpose, we'll need to check timing, not
        // results.  Checking against inputs of +/- 1 and 0 are perfect
        // for that task.  The below assumptions (yes they are assumptions
        // just go a little bit further.
        //
        // THEREFORE, THESE PROPERTIES ARE NOT NECESSARY TO PROVING THAT
        // THIS MODULE WORKS, AND THEY WILL INTERFERE WITH THAT PROOF.
        //
        // This just limits the proof for the butterfly, the parent.
        // module that calls this one
        //
        // Start by assuming that all inputs have an absolute value less
        // than eight.
        always @(*)
                assume(u_a[AW-1:3] == 0);
        always @(*)
                assume(u_b[BW-1:3] == 0);
 
        // If the inputs have these properties, then so too do many of
        // our internal values.  ASSERT therefore that we never get out
        // of bounds
        generate for(k=0; k<TLEN; k=k+1)
        begin
                always @(*)
                begin
                        assert(f_past_a[k][AW-1:3] == 0);
                        assert(f_past_b[k][BW-1:3] == 0);
                end
        end endgenerate
 
        generate for(k=0; k<TLEN-1; k=k+1)
        begin
                always @(*)
                        assert(acc[k][IW+BW-1:6] == 0);
        end endgenerate
 
        generate for(k=0; k<TLEN-2; k=k+1)
        begin
                always @(*)
                        assert(r_b[k][BW-1:3] == 0);
        end endgenerate
`endif  // BUTTERFLY
`endif  // FORMAL
endmodule

Line No.	Rev	Author	Line
1	36	dgisselq	`////////////////////////////////////////////////////////////////////////////////`
2			`//`
3			`// Filename: ../rtl/longbimpy.v`
4			`//`
5			`// Project: A General Purpose Pipelined FFT Implementation`
6			`//`
7			`// Purpose: A portable shift and add multiply, built with the knowledge`
8			`// of the existence of a six bit LUT and carry chain. That knowledge`
9			`// allows us to multiply two bits from one value at a time against all`
10			`// of the bits of the other value. This sub multiply is called the`
11			`// bimpy.`
12			`//`
13			`// For minimal processing delay, make the first parameter the one with`
14			`// the least bits, so that AWIDTH <= BWIDTH.`
15			`//`
16			`//`
17			`//`
18			`// Creator: Dan Gisselquist, Ph.D.`
19			`// Gisselquist Technology, LLC`
20			`//`
21			`////////////////////////////////////////////////////////////////////////////////`
22			`//`
23			`// Copyright (C) 2015-2018, Gisselquist Technology, LLC`
24			`//`
25	39	dgisselq	`// This file is part of the general purpose pipelined FFT project.`
26	36	dgisselq	`//`
27	39	dgisselq	`// The pipelined FFT project is free software (firmware): you can redistribute`
28			`// it and/or modify it under the terms of the GNU Lesser General Public License`
29			`// as published by the Free Software Foundation, either version 3 of the`
30			`// License, or (at your option) any later version.`
31	36	dgisselq	`//`
32	39	dgisselq	`// The pipelined FFT project is distributed in the hope that it will be useful,`
33			`// but WITHOUT ANY WARRANTY; without even the implied warranty of`
34			`// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser`
35			`// General Public License for more details.`
36			`//`
37			`// You should have received a copy of the GNU Lesser General Public License`
38			`// along with this program. (It's in the $(ROOT)/doc directory. Run make`
39			`// with no target there if the PDF file isn't present.) If not, see`
40	36	dgisselq	`// <http://www.gnu.org/licenses/> for a copy.`
41			`//`
42	39	dgisselq	`// License: LGPL, v3, as defined and found on www.gnu.org,`
43			`// http://www.gnu.org/licenses/lgpl.html`
44	36	dgisselq	`//`
45			`//`
46			`////////////////////////////////////////////////////////////////////////////////`
47			`//`
48			`//`
49			`default_nettype none
50			`//`
51	39	dgisselq	`module longbimpy(i_clk, i_ce, i_a_unsorted, i_b_unsorted, o_r`
52			`ifdef FORMAL
53			`, f_past_a_unsorted, f_past_b_unsorted`
54			`endif
55			`);`
56	36	dgisselq	`parameter IAW=8, // The width of i_a, min width is 5`
57			`IBW=12, // The width of i_b, can be anything`
58			`// The following three parameters should not be changed`
59			`// by any implementation, but are based upon hardware`
60			`// and the above values:`
61			`OW=IAW+IBW; // The output width`
62			`localparam AW = (IAW<IBW) ? IAW : IBW,`
63			`BW = (IAW<IBW) ? IBW : IAW,`
64			`IW=(AW+1)&(-2), // Internal width of A`
65			`LUTB=2, // How many bits we can multiply by at once`
66			`TLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau`
67	39	dgisselq	`input wire i_clk, i_ce;`
68			`input wire [(IAW-1):0] i_a_unsorted;`
69			`input wire [(IBW-1):0] i_b_unsorted;`
70	36	dgisselq	`output reg [(AW+BW-1):0] o_r;`
71
72	39	dgisselq	`ifdef FORMAL
73			`output wire [(IAW-1):0] f_past_a_unsorted;`
74			`output wire [(IBW-1):0] f_past_b_unsorted;`
75			`endif
76
77	36	dgisselq	`//`
78			`// Swap parameter order, so that AW <= BW -- for performance`
79			`// reasons`
80			`wire [AW-1:0] i_a;`
81			`wire [BW-1:0] i_b;`
82			`generate if (IAW <= IBW)`
83			`begin : NO_PARAM_CHANGE`
84			`assign i_a = i_a_unsorted;`
85			`assign i_b = i_b_unsorted;`
86			`end else begin : SWAP_PARAMETERS`
87			`assign i_a = i_b_unsorted;`
88			`assign i_b = i_a_unsorted;`
89			`end endgenerate`
90
91			`reg [(IW-1):0] u_a;`
92			`reg [(BW-1):0] u_b;`
93			`reg sgn;`
94
95			`reg [(IW-1-2*(LUTB)):0] r_a[0:(TLEN-3)];`
96			`reg [(BW-1):0] r_b[0:(TLEN-3)];`
97			`reg [(TLEN-1):0] r_s;`
98			`reg [(IW+BW-1):0] acc[0:(TLEN-2)];`
99			`genvar k;`
100
101			`// First step:`
102			`// Switch to unsigned arithmetic for our multiply, keeping track`
103			`// of the along the way. We'll then add the sign again later at`
104			`// the end.`
105			`//`
106			`// If we were forced to stay within two's complement arithmetic,`
107			`// taking the absolute value here would require an additional bit.`
108			`// However, because our results are now unsigned, we can stay`
109			`// within the number of bits given (for now).`
110	39	dgisselq	`initial u_a = 0;`
111	36	dgisselq	`generate if (IW > AW)`
112	39	dgisselq	`begin : ABS_AND_ADD_BIT_TO_A`
113	36	dgisselq	`always @(posedge i_clk)`
114			`if (i_ce)`
115			`u_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };`
116	39	dgisselq	`end else begin : ABS_A`
117	36	dgisselq	`always @(posedge i_clk)`
118			`if (i_ce)`
119			`u_a <= (i_a[AW-1])?(-i_a):(i_a);`
120			`end endgenerate`
121
122	39	dgisselq	`initial sgn = 0;`
123			`initial u_b = 0;`
124	36	dgisselq	`always @(posedge i_clk)`
125	39	dgisselq	`if (i_ce)`
126			`begin : ABS_B`
127			`u_b <= (i_b[BW-1])?(-i_b):(i_b);`
128			`sgn <= i_a[AW-1] ^ i_b[BW-1];`
129			`end`
130	36	dgisselq
131			`wire [(BW+LUTB-1):0] pr_a, pr_b;`
132
133			`//`
134			`// Second step: First two 2xN products.`
135			`//`
136			`// Since we have no tableau of additions (yet), we can do both`
137			`// of the first two rows at the same time and add them together.`
138			`// For the next round, we'll then have a previous sum to accumulate`
139			`// with new and subsequent product, and so only do one product at`
140			`// a time can follow this--but the first clock can do two at a time.`
141			`bimpy #(BW) lmpy_0(i_clk,i_ce,u_a[( LUTB-1): 0], u_b, pr_a);`
142			`bimpy #(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);`
143	39	dgisselq
144			`initial r_s = 0;`
145			`initial r_a[0] = 0;`
146			`initial r_b[0] = 0;`
147	36	dgisselq	`always @(posedge i_clk)`
148			`if (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];`
149			`always @(posedge i_clk)`
150			`if (i_ce) r_b[0] <= u_b;`
151			`always @(posedge i_clk)`
152			`if (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };`
153	39	dgisselq
154			`initial acc[0] = 0;`
155	36	dgisselq	`always @(posedge i_clk) // One clk after p[0],p[1] become valid`
156	39	dgisselq	`if (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}`
157			`+{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };`
158	36	dgisselq
159			`generate // Keep track of intermediate values, before multiplying them`
160			`if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)`
161	39	dgisselq	`begin : GENCOPIES`
162
163			`initial r_a[k+1] = 0;`
164			`initial r_b[k+1] = 0;`
165	36	dgisselq	`always @(posedge i_clk)`
166			`if (i_ce)`
167			`begin`
168			`r_a[k+1] <= { {(LUTB){1'b0}},`
169			`r_a[k][(IW-1-(2*LUTB)):LUTB] };`
170			`r_b[k+1] <= r_b[k];`
171			`end`
172			`end endgenerate`
173
174			`generate // The actual multiply and accumulate stage`
175			`if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)`
176	39	dgisselq	`begin : GENSTAGES`
177			`wire [(BW+LUTB-1):0] genp;`
178
179	36	dgisselq	`// First, the multiply: 2-bits times BW bits`
180			`bimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);`
181
182			`// Then the accumulate step -- on the next clock`
183	39	dgisselq	`initial acc[k+1] = 0;`
184	36	dgisselq	`always @(posedge i_clk)`
185	39	dgisselq	`if (i_ce)`
186			`acc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},`
187			`genp, {(LUTB*(k+2)){1'b0}} };`
188	36	dgisselq	`end endgenerate`
189
190			`wire [(IW+BW-1):0] w_r;`
191			`assign w_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];`
192	39	dgisselq
193			`initial o_r = 0;`
194	36	dgisselq	`always @(posedge i_clk)`
195	39	dgisselq	`if (i_ce)`
196			`o_r <= w_r[(AW+BW-1):0];`
197	36	dgisselq
198			`generate if (IW > AW)`
199			`begin : VUNUSED`
200			`// verilator lint_off UNUSED`
201			`wire [(IW-AW)-1:0] unused;`
202			`assign unused = w_r[(IW+BW-1):(AW+BW)];`
203			`// verilator lint_on UNUSED`
204			`end endgenerate`
205
206	39	dgisselq	`ifdef FORMAL
207			`reg f_past_valid;`
208			`initial f_past_valid = 1'b0;`
209			`always @(posedge i_clk)`
210			`f_past_valid <= 1'b1;`
211
212			`define ASSERT assert
213			`ifdef LONGBIMPY
214
215			`always @(posedge i_clk)`
216			`if (!$past(i_ce))`
217			`assume(i_ce);`
218
219			`endif
220
221			`reg [AW-1:0] f_past_a [0:TLEN];`
222			`reg [BW-1:0] f_past_b [0:TLEN];`
223			`reg [TLEN+1:0] f_sgn_a, f_sgn_b;`
224
225			`initial f_past_a[0] = 0;`
226			`initial f_past_b[0] = 0;`
227			`initial f_sgn_a = 0;`
228			`initial f_sgn_b = 0;`
229			`always @(posedge i_clk)`
230			`if (i_ce)`
231			`begin`
232			`f_past_a[0] <= u_a;`
233			`f_past_b[0] <= u_b;`
234			`f_sgn_a[0] <= i_a[AW-1];`
235			`f_sgn_b[0] <= i_b[BW-1];`
236			`end`
237
238			`generate for(k=0; k<TLEN; k=k+1)`
239			`begin`
240			`initial f_past_a[k+1] = 0;`
241			`initial f_past_b[k+1] = 0;`
242			`initial f_sgn_a[k+1] = 0;`
243			`initial f_sgn_b[k+1] = 0;`
244			`always @(posedge i_clk)`
245			`if (i_ce)`
246			`begin`
247			`f_past_a[k+1] <= f_past_a[k];`
248			`f_past_b[k+1] <= f_past_b[k];`
249
250			`f_sgn_a[k+1] <= f_sgn_a[k];`
251			`f_sgn_b[k+1] <= f_sgn_b[k];`
252			`end`
253			`end endgenerate`
254
255			`always @(posedge i_clk)`
256			`if (i_ce)`
257			`begin`
258			`f_sgn_a[TLEN+1] <= f_sgn_a[TLEN];`
259			`f_sgn_b[TLEN+1] <= f_sgn_b[TLEN];`
260			`end`
261
262			`always @(posedge i_clk)`
263			`begin`
264			`assert(sgn == (f_sgn_a[0] ^ f_sgn_b[0]));`
265			`assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));`
266			`assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));`
267			`end`
268
269			`always @(posedge i_clk)`
270			`if ((f_past_valid)&&($past(i_ce)))`
271			`begin`
272			`if ($past(i_a)==0)`
273			`ASSERT(u_a == 0);
274			`else if ($past(i_a[AW-1]) == 1'b0)`
275			`ASSERT(u_a == $past(i_a));
276
277			`if ($past(i_b)==0)`
278			`ASSERT(u_b == 0);
279			`else if ($past(i_b[BW-1]) == 1'b0)`
280			`ASSERT(u_b == $past(i_b));
281			`end`
282
283			`generate // Keep track of intermediate values, before multiplying them`
284			`if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)`
285			`begin : ASSERT_GENCOPY`
286			`always @(posedge i_clk)`
287			`if (i_ce)`
288			`begin`
289			`if (f_past_a[k]==0)`
290			`ASSERT(r_a[k] == 0);
291			`else if (f_past_a[k]==1)`
292			`ASSERT(r_a[k] == 0);
293			`ASSERT(r_b[k] == f_past_b[k]);
294			`end`
295			`end endgenerate`
296
297			`generate // The actual multiply and accumulate stage`
298			`if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)`
299			`begin : ASSERT_GENSTAGE`
300			`always @(posedge i_clk)`
301			`if ((f_past_valid)&&($past(i_ce)))`
302			`begin`
303			`if (f_past_a[k+1]==0)`
304			`ASSERT(acc[k] == 0);
305			`if (f_past_a[k+1]==1)`
306			`ASSERT(acc[k] == f_past_b[k+1]);
307			`if (f_past_b[k+1]==0)`
308			`ASSERT(acc[k] == 0);
309			`if (f_past_b[k+1]==1)`
310			`begin`
311			`ASSERT(acc[k][(2*k)+3:0]
312			`== f_past_a[k+1][(2*k)+3:0]);`
313			`ASSERT(acc[k][(IW+BW-1):(2*k)+4] == 0);
314			`end`
315			`end`
316			`end endgenerate`
317
318			`wire [AW-1:0] f_past_a_neg = - f_past_a[TLEN];`
319			`wire [BW-1:0] f_past_b_neg = - f_past_b[TLEN];`
320
321			`wire [AW-1:0] f_past_a_pos = f_past_a[TLEN][AW-1]`
322			`? f_past_a_neg : f_past_a[TLEN];`
323			`wire [BW-1:0] f_past_b_pos = f_past_b[TLEN][BW-1]`
324			`? f_past_b_neg : f_past_b[TLEN];`
325
326			`always @(posedge i_clk)`
327			`if ((f_past_valid)&&($past(i_ce)))`
328			`begin`
329			`if ((f_past_a[TLEN]==0)\|\|(f_past_b[TLEN]==0))`
330			`ASSERT(o_r == 0);
331			`else if (f_past_a[TLEN]==1)`
332			`begin`
333			`if ((f_sgn_a[TLEN+1]^f_sgn_b[TLEN+1])==0)`
334			`begin`
335			`ASSERT(o_r[BW-1:0] == f_past_b_pos[BW-1:0]);
336			`ASSERT(o_r[AW+BW-1:BW] == 0);
337			`end else begin // if (f_sgn_b[TLEN+1]) begin`
338			`ASSERT(o_r[BW-1:0] == f_past_b_neg);
339			`ASSERT(o_r[AW+BW-1:BW]
340			`== {(AW){f_past_b_neg[BW-1]}});`
341			`end`
342			`end else if (f_past_b[TLEN]==1)`
343			`begin`
344			`if ((f_sgn_a[TLEN+1] ^ f_sgn_b[TLEN+1])==0)`
345			`begin`
346			`ASSERT(o_r[AW-1:0] == f_past_a_pos[AW-1:0]);
347			`ASSERT(o_r[AW+BW-1:AW] == 0);
348			`end else begin`
349			`ASSERT(o_r[AW-1:0] == f_past_a_neg);
350			`ASSERT(o_r[AW+BW-1:AW]
351			`== {(BW){f_past_a_neg[AW-1]}});`
352			`end`
353			`end else begin`
354			`ASSERT(o_r != 0);
355			`if (!o_r[AW+BW-1:0])`
356			`begin`
357			`ASSERT((o_r[AW-1:0] != f_past_a[TLEN][AW-1:0])
358			`\|\|(o_r[AW+BW-1:AW]!=0));`
359			`ASSERT((o_r[BW-1:0] != f_past_b[TLEN][BW-1:0])
360			`\|\|(o_r[AW+BW-1:BW]!=0));`
361			`end else begin`
362			`ASSERT((o_r[AW-1:0] != f_past_a_neg[AW-1:0])
363			`\|\|(! (&o_r[AW+BW-1:AW])));`
364			`ASSERT((o_r[BW-1:0] != f_past_b_neg[BW-1:0])
365			`\|\|(! (&o_r[AW+BW-1:BW]!=0)));`
366			`end`
367			`end`
368			`end`
369
370			`generate if (IAW <= IBW)`
371			`begin : NO_PARAM_CHANGE`
372			`assign f_past_a_unsorted = (!f_sgn_a[TLEN+1])`
373			`? f_past_a[TLEN] : f_past_a_neg;`
374			`assign f_past_b_unsorted = (!f_sgn_b[TLEN+1])`
375			`? f_past_b[TLEN] : f_past_b_neg;`
376			`end else begin : SWAP_PARAMETERS`
377			`assign f_past_a_unsorted = (!f_sgn_b[TLEN+1])`
378			`? f_past_b[TLEN] : f_past_b_neg;`
379			`assign f_past_b_unsorted = (!f_sgn_a[TLEN+1])`
380			`? f_past_a[TLEN] : f_past_a_neg;`
381			`end endgenerate`
382			`ifdef BUTTERFLY
383			`// The following properties artificially restrict the inputs`
384			`// to this long binary multiplier to only those values whose`
385			`// absolute value is 0..7. It is used by the formal proof of`
386			`// the BUTTERFLY to artificially limit the scope of the proof.`
387			`// By the time the butterfly sees this code, it will be known`
388			`// that the long binary multiply works. At issue will no longer`
389			`// be whether or not this works, but rather whether it works in`
390			`// context. For that purpose, we'll need to check timing, not`
391			`// results. Checking against inputs of +/- 1 and 0 are perfect`
392			`// for that task. The below assumptions (yes they are assumptions`
393			`// just go a little bit further.`
394			`//`
395			`// THEREFORE, THESE PROPERTIES ARE NOT NECESSARY TO PROVING THAT`
396			`// THIS MODULE WORKS, AND THEY WILL INTERFERE WITH THAT PROOF.`
397			`//`
398			`// This just limits the proof for the butterfly, the parent.`
399			`// module that calls this one`
400			`//`
401			`// Start by assuming that all inputs have an absolute value less`
402			`// than eight.`
403			`always @(*)`
404			`assume(u_a[AW-1:3] == 0);`
405			`always @(*)`
406			`assume(u_b[BW-1:3] == 0);`
407
408			`// If the inputs have these properties, then so too do many of`
409			`// our internal values. ASSERT therefore that we never get out`
410			`// of bounds`
411			`generate for(k=0; k<TLEN; k=k+1)`
412			`begin`
413			`always @(*)`
414			`begin`
415			`assert(f_past_a[k][AW-1:3] == 0);`
416			`assert(f_past_b[k][BW-1:3] == 0);`
417			`end`
418			`end endgenerate`
419
420			`generate for(k=0; k<TLEN-1; k=k+1)`
421			`begin`
422			`always @(*)`
423			`assert(acc[k][IW+BW-1:6] == 0);`
424			`end endgenerate`
425
426			`generate for(k=0; k<TLEN-2; k=k+1)`
427			`begin`
428			`always @(*)`
429			`assert(r_b[k][BW-1:3] == 0);`
430			`end endgenerate`
431			`endif // BUTTERFLY
432			`endif // FORMAL
433	36	dgisselq	`endmodule`

Browse

Tools

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [rtl/] [longbimpy.v] - Blame information for rev 39