OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [rtl/] [longbimpy.v] - Blame information for rev 36

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 36 dgisselq
////////////////////////////////////////////////////////////////////////////////
2
//
3
// Filename:    ../rtl/longbimpy.v
4
//
5
// Project:     A General Purpose Pipelined FFT Implementation
6
//
7
// Purpose:     A portable shift and add multiply, built with the knowledge
8
//      of the existence of a six bit LUT and carry chain.  That knowledge
9
//      allows us to multiply two bits from one value at a time against all
10
//      of the bits of the other value.  This sub multiply is called the
11
//      bimpy.
12
//
13
//      For minimal processing delay, make the first parameter the one with
14
//      the least bits, so that AWIDTH <= BWIDTH.
15
//
16
//
17
//
18
// Creator:     Dan Gisselquist, Ph.D.
19
//              Gisselquist Technology, LLC
20
//
21
////////////////////////////////////////////////////////////////////////////////
22
//
23
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
24
//
25
// This program is free software (firmware): you can redistribute it and/or
26
// modify it under the terms of  the GNU General Public License as published
27
// by the Free Software Foundation, either version 3 of the License, or (at
28
// your option) any later version.
29
//
30
// This program is distributed in the hope that it will be useful, but WITHOUT
31
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
32
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
33
// for more details.
34
//
35
// You should have received a copy of the GNU General Public License along
36
// with this program.  (It's in the $(ROOT)/doc directory, run make with no
37
// target there if the PDF file isn't present.)  If not, see
38
// <http://www.gnu.org/licenses/> for a copy.
39
//
40
// License:     GPL, v3, as defined and found on www.gnu.org,
41
//              http://www.gnu.org/licenses/gpl.html
42
//
43
//
44
////////////////////////////////////////////////////////////////////////////////
45
//
46
//
47
`default_nettype        none
48
//
49
module  longbimpy(i_clk, i_ce, i_a_unsorted, i_b_unsorted, o_r);
50
        parameter       IAW=8,  // The width of i_a, min width is 5
51
                        IBW=12, // The width of i_b, can be anything
52
                        // The following three parameters should not be changed
53
                        // by any implementation, but are based upon hardware
54
                        // and the above values:
55
                        OW=IAW+IBW;     // The output width
56
        localparam      AW = (IAW<IBW) ? IAW : IBW,
57
                        BW = (IAW<IBW) ? IBW : IAW,
58
                        IW=(AW+1)&(-2), // Internal width of A
59
                        LUTB=2, // How many bits we can multiply by at once
60
                        TLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau
61
        input                           i_clk, i_ce;
62
        input           [(IAW-1):0]      i_a_unsorted;
63
        input           [(IBW-1):0]      i_b_unsorted;
64
        output  reg     [(AW+BW-1):0]    o_r;
65
 
66
        //
67
        // Swap parameter order, so that AW <= BW -- for performance
68
        // reasons
69
        wire    [AW-1:0] i_a;
70
        wire    [BW-1:0] i_b;
71
        generate if (IAW <= IBW)
72
        begin : NO_PARAM_CHANGE
73
                assign i_a = i_a_unsorted;
74
                assign i_b = i_b_unsorted;
75
        end else begin : SWAP_PARAMETERS
76
                assign i_a = i_b_unsorted;
77
                assign i_b = i_a_unsorted;
78
        end endgenerate
79
 
80
        reg     [(IW-1):0]       u_a;
81
        reg     [(BW-1):0]       u_b;
82
        reg                     sgn;
83
 
84
        reg     [(IW-1-2*(LUTB)):0]      r_a[0:(TLEN-3)];
85
        reg     [(BW-1):0]               r_b[0:(TLEN-3)];
86
        reg     [(TLEN-1):0]             r_s;
87
        reg     [(IW+BW-1):0]            acc[0:(TLEN-2)];
88
        genvar k;
89
 
90
        // First step:
91
        // Switch to unsigned arithmetic for our multiply, keeping track
92
        // of the along the way.  We'll then add the sign again later at
93
        // the end.
94
        //
95
        // If we were forced to stay within two's complement arithmetic,
96
        // taking the absolute value here would require an additional bit.
97
        // However, because our results are now unsigned, we can stay
98
        // within the number of bits given (for now).
99
        generate if (IW > AW)
100
        begin
101
                always @(posedge i_clk)
102
                        if (i_ce)
103
                                u_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };
104
        end else begin
105
                always @(posedge i_clk)
106
                        if (i_ce)
107
                                u_a <= (i_a[AW-1])?(-i_a):(i_a);
108
        end endgenerate
109
 
110
        always @(posedge i_clk)
111
                if (i_ce)
112
                begin
113
                        u_b <= (i_b[BW-1])?(-i_b):(i_b);
114
                        sgn <= i_a[AW-1] ^ i_b[BW-1];
115
                end
116
 
117
        wire    [(BW+LUTB-1):0]  pr_a, pr_b;
118
 
119
        //
120
        // Second step: First two 2xN products.
121
        //
122
        // Since we have no tableau of additions (yet), we can do both
123
        // of the first two rows at the same time and add them together.
124
        // For the next round, we'll then have a previous sum to accumulate
125
        // with new and subsequent product, and so only do one product at
126
        // a time can follow this--but the first clock can do two at a time.
127
        bimpy   #(BW) lmpy_0(i_clk,i_ce,u_a[(  LUTB-1):   0], u_b, pr_a);
128
        bimpy   #(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);
129
        always @(posedge i_clk)
130
                if (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];
131
        always @(posedge i_clk)
132
                if (i_ce) r_b[0] <= u_b;
133
        always @(posedge i_clk)
134
                if (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };
135
        always @(posedge i_clk) // One clk after p[0],p[1] become valid
136
                if (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}
137
                          +{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };
138
 
139
        generate // Keep track of intermediate values, before multiplying them
140
        if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)
141
        begin : gencopies
142
                always @(posedge i_clk)
143
                if (i_ce)
144
                begin
145
                        r_a[k+1] <= { {(LUTB){1'b0}},
146
                                r_a[k][(IW-1-(2*LUTB)):LUTB] };
147
                        r_b[k+1] <= r_b[k];
148
                end
149
        end endgenerate
150
 
151
        generate // The actual multiply and accumulate stage
152
        if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)
153
        begin : genstages
154
                // First, the multiply: 2-bits times BW bits
155
                wire    [(BW+LUTB-1):0] genp;
156
                bimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);
157
 
158
                // Then the accumulate step -- on the next clock
159
                always @(posedge i_clk)
160
                        if (i_ce)
161
                                acc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},
162
                                        genp, {(LUTB*(k+2)){1'b0}} };
163
        end endgenerate
164
 
165
        wire    [(IW+BW-1):0]    w_r;
166
        assign  w_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];
167
        always @(posedge i_clk)
168
                if (i_ce)
169
                        o_r <= w_r[(AW+BW-1):0];
170
 
171
        generate if (IW > AW)
172
        begin : VUNUSED
173
                // verilator lint_off UNUSED
174
                wire    [(IW-AW)-1:0]    unused;
175
                assign  unused = w_r[(IW+BW-1):(AW+BW)];
176
                // verilator lint_on UNUSED
177
        end endgenerate
178
 
179
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.