OpenCores
URL https://opencores.org/ocsvn/dblclockfft/dblclockfft/trunk

Subversion Repositories dblclockfft

[/] [dblclockfft/] [trunk/] [rtl/] [longbimpy.v] - Blame information for rev 39

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 36 dgisselq
////////////////////////////////////////////////////////////////////////////////
2
//
3
// Filename:    ../rtl/longbimpy.v
4
//
5
// Project:     A General Purpose Pipelined FFT Implementation
6
//
7
// Purpose:     A portable shift and add multiply, built with the knowledge
8
//      of the existence of a six bit LUT and carry chain.  That knowledge
9
//      allows us to multiply two bits from one value at a time against all
10
//      of the bits of the other value.  This sub multiply is called the
11
//      bimpy.
12
//
13
//      For minimal processing delay, make the first parameter the one with
14
//      the least bits, so that AWIDTH <= BWIDTH.
15
//
16
//
17
//
18
// Creator:     Dan Gisselquist, Ph.D.
19
//              Gisselquist Technology, LLC
20
//
21
////////////////////////////////////////////////////////////////////////////////
22
//
23
// Copyright (C) 2015-2018, Gisselquist Technology, LLC
24
//
25 39 dgisselq
// This file is part of the general purpose pipelined FFT project.
26 36 dgisselq
//
27 39 dgisselq
// The pipelined FFT project is free software (firmware): you can redistribute
28
// it and/or modify it under the terms of the GNU Lesser General Public License
29
// as published by the Free Software Foundation, either version 3 of the
30
// License, or (at your option) any later version.
31 36 dgisselq
//
32 39 dgisselq
// The pipelined FFT project is distributed in the hope that it will be useful,
33
// but WITHOUT ANY WARRANTY; without even the implied warranty of
34
// MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
35
// General Public License for more details.
36
//
37
// You should have received a copy of the GNU Lesser General Public License
38
// along with this program.  (It's in the $(ROOT)/doc directory.  Run make
39
// with no target there if the PDF file isn't present.)  If not, see
40 36 dgisselq
// <http://www.gnu.org/licenses/> for a copy.
41
//
42 39 dgisselq
// License:     LGPL, v3, as defined and found on www.gnu.org,
43
//              http://www.gnu.org/licenses/lgpl.html
44 36 dgisselq
//
45
//
46
////////////////////////////////////////////////////////////////////////////////
47
//
48
//
49
`default_nettype        none
50
//
51 39 dgisselq
module  longbimpy(i_clk, i_ce, i_a_unsorted, i_b_unsorted, o_r
52
`ifdef  FORMAL
53
        , f_past_a_unsorted, f_past_b_unsorted
54
`endif
55
                );
56 36 dgisselq
        parameter       IAW=8,  // The width of i_a, min width is 5
57
                        IBW=12, // The width of i_b, can be anything
58
                        // The following three parameters should not be changed
59
                        // by any implementation, but are based upon hardware
60
                        // and the above values:
61
                        OW=IAW+IBW;     // The output width
62
        localparam      AW = (IAW<IBW) ? IAW : IBW,
63
                        BW = (IAW<IBW) ? IBW : IAW,
64
                        IW=(AW+1)&(-2), // Internal width of A
65
                        LUTB=2, // How many bits we can multiply by at once
66
                        TLEN=(AW+(LUTB-1))/LUTB; // Nmbr of rows in our tableau
67 39 dgisselq
        input   wire                    i_clk, i_ce;
68
        input   wire    [(IAW-1):0]      i_a_unsorted;
69
        input   wire    [(IBW-1):0]      i_b_unsorted;
70 36 dgisselq
        output  reg     [(AW+BW-1):0]    o_r;
71
 
72 39 dgisselq
`ifdef  FORMAL
73
        output  wire    [(IAW-1):0]      f_past_a_unsorted;
74
        output  wire    [(IBW-1):0]      f_past_b_unsorted;
75
`endif
76
 
77 36 dgisselq
        //
78
        // Swap parameter order, so that AW <= BW -- for performance
79
        // reasons
80
        wire    [AW-1:0] i_a;
81
        wire    [BW-1:0] i_b;
82
        generate if (IAW <= IBW)
83
        begin : NO_PARAM_CHANGE
84
                assign i_a = i_a_unsorted;
85
                assign i_b = i_b_unsorted;
86
        end else begin : SWAP_PARAMETERS
87
                assign i_a = i_b_unsorted;
88
                assign i_b = i_a_unsorted;
89
        end endgenerate
90
 
91
        reg     [(IW-1):0]       u_a;
92
        reg     [(BW-1):0]       u_b;
93
        reg                     sgn;
94
 
95
        reg     [(IW-1-2*(LUTB)):0]      r_a[0:(TLEN-3)];
96
        reg     [(BW-1):0]               r_b[0:(TLEN-3)];
97
        reg     [(TLEN-1):0]             r_s;
98
        reg     [(IW+BW-1):0]            acc[0:(TLEN-2)];
99
        genvar k;
100
 
101
        // First step:
102
        // Switch to unsigned arithmetic for our multiply, keeping track
103
        // of the along the way.  We'll then add the sign again later at
104
        // the end.
105
        //
106
        // If we were forced to stay within two's complement arithmetic,
107
        // taking the absolute value here would require an additional bit.
108
        // However, because our results are now unsigned, we can stay
109
        // within the number of bits given (for now).
110 39 dgisselq
        initial u_a = 0;
111 36 dgisselq
        generate if (IW > AW)
112 39 dgisselq
        begin : ABS_AND_ADD_BIT_TO_A
113 36 dgisselq
                always @(posedge i_clk)
114
                        if (i_ce)
115
                                u_a <= { 1'b0, (i_a[AW-1])?(-i_a):(i_a) };
116 39 dgisselq
        end else begin : ABS_A
117 36 dgisselq
                always @(posedge i_clk)
118
                        if (i_ce)
119
                                u_a <= (i_a[AW-1])?(-i_a):(i_a);
120
        end endgenerate
121
 
122 39 dgisselq
        initial sgn = 0;
123
        initial u_b = 0;
124 36 dgisselq
        always @(posedge i_clk)
125 39 dgisselq
        if (i_ce)
126
        begin : ABS_B
127
                u_b <= (i_b[BW-1])?(-i_b):(i_b);
128
                sgn <= i_a[AW-1] ^ i_b[BW-1];
129
        end
130 36 dgisselq
 
131
        wire    [(BW+LUTB-1):0]  pr_a, pr_b;
132
 
133
        //
134
        // Second step: First two 2xN products.
135
        //
136
        // Since we have no tableau of additions (yet), we can do both
137
        // of the first two rows at the same time and add them together.
138
        // For the next round, we'll then have a previous sum to accumulate
139
        // with new and subsequent product, and so only do one product at
140
        // a time can follow this--but the first clock can do two at a time.
141
        bimpy   #(BW) lmpy_0(i_clk,i_ce,u_a[(  LUTB-1):   0], u_b, pr_a);
142
        bimpy   #(BW) lmpy_1(i_clk,i_ce,u_a[(2*LUTB-1):LUTB], u_b, pr_b);
143 39 dgisselq
 
144
        initial r_s    = 0;
145
        initial r_a[0] = 0;
146
        initial r_b[0] = 0;
147 36 dgisselq
        always @(posedge i_clk)
148
                if (i_ce) r_a[0] <= u_a[(IW-1):(2*LUTB)];
149
        always @(posedge i_clk)
150
                if (i_ce) r_b[0] <= u_b;
151
        always @(posedge i_clk)
152
                if (i_ce) r_s <= { r_s[(TLEN-2):0], sgn };
153 39 dgisselq
 
154
        initial acc[0] = 0;
155 36 dgisselq
        always @(posedge i_clk) // One clk after p[0],p[1] become valid
156 39 dgisselq
        if (i_ce) acc[0] <= { {(IW-LUTB){1'b0}}, pr_a}
157
                  +{ {(IW-(2*LUTB)){1'b0}}, pr_b, {(LUTB){1'b0}} };
158 36 dgisselq
 
159
        generate // Keep track of intermediate values, before multiplying them
160
        if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)
161 39 dgisselq
        begin : GENCOPIES
162
 
163
                initial r_a[k+1] = 0;
164
                initial r_b[k+1] = 0;
165 36 dgisselq
                always @(posedge i_clk)
166
                if (i_ce)
167
                begin
168
                        r_a[k+1] <= { {(LUTB){1'b0}},
169
                                r_a[k][(IW-1-(2*LUTB)):LUTB] };
170
                        r_b[k+1] <= r_b[k];
171
                end
172
        end endgenerate
173
 
174
        generate // The actual multiply and accumulate stage
175
        if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)
176 39 dgisselq
        begin : GENSTAGES
177
                wire    [(BW+LUTB-1):0] genp;
178
 
179 36 dgisselq
                // First, the multiply: 2-bits times BW bits
180
                bimpy #(BW) genmpy(i_clk,i_ce,r_a[k][(LUTB-1):0],r_b[k], genp);
181
 
182
                // Then the accumulate step -- on the next clock
183 39 dgisselq
                initial acc[k+1] = 0;
184 36 dgisselq
                always @(posedge i_clk)
185 39 dgisselq
                if (i_ce)
186
                        acc[k+1] <= acc[k] + {{(IW-LUTB*(k+3)){1'b0}},
187
                                genp, {(LUTB*(k+2)){1'b0}} };
188 36 dgisselq
        end endgenerate
189
 
190
        wire    [(IW+BW-1):0]    w_r;
191
        assign  w_r = (r_s[TLEN-1]) ? (-acc[TLEN-2]) : acc[TLEN-2];
192 39 dgisselq
 
193
        initial o_r = 0;
194 36 dgisselq
        always @(posedge i_clk)
195 39 dgisselq
        if (i_ce)
196
                o_r <= w_r[(AW+BW-1):0];
197 36 dgisselq
 
198
        generate if (IW > AW)
199
        begin : VUNUSED
200
                // verilator lint_off UNUSED
201
                wire    [(IW-AW)-1:0]    unused;
202
                assign  unused = w_r[(IW+BW-1):(AW+BW)];
203
                // verilator lint_on UNUSED
204
        end endgenerate
205
 
206 39 dgisselq
`ifdef  FORMAL
207
        reg     f_past_valid;
208
        initial f_past_valid = 1'b0;
209
        always @(posedge i_clk)
210
                f_past_valid <= 1'b1;
211
 
212
`define ASSERT  assert
213
`ifdef  LONGBIMPY
214
 
215
        always @(posedge i_clk)
216
        if (!$past(i_ce))
217
                assume(i_ce);
218
 
219
`endif
220
 
221
        reg     [AW-1:0] f_past_a        [0:TLEN];
222
        reg     [BW-1:0] f_past_b        [0:TLEN];
223
        reg     [TLEN+1:0]       f_sgn_a, f_sgn_b;
224
 
225
        initial f_past_a[0] = 0;
226
        initial f_past_b[0] = 0;
227
        initial f_sgn_a = 0;
228
        initial f_sgn_b = 0;
229
        always @(posedge i_clk)
230
        if (i_ce)
231
        begin
232
                f_past_a[0] <= u_a;
233
                f_past_b[0] <= u_b;
234
                f_sgn_a[0] <= i_a[AW-1];
235
                f_sgn_b[0] <= i_b[BW-1];
236
        end
237
 
238
        generate for(k=0; k<TLEN; k=k+1)
239
        begin
240
                initial f_past_a[k+1] = 0;
241
                initial f_past_b[k+1] = 0;
242
                initial f_sgn_a[k+1] = 0;
243
                initial f_sgn_b[k+1] = 0;
244
                always @(posedge i_clk)
245
                if (i_ce)
246
                begin
247
                        f_past_a[k+1] <= f_past_a[k];
248
                        f_past_b[k+1] <= f_past_b[k];
249
 
250
                        f_sgn_a[k+1]  <= f_sgn_a[k];
251
                        f_sgn_b[k+1]  <= f_sgn_b[k];
252
                end
253
        end endgenerate
254
 
255
        always @(posedge i_clk)
256
        if (i_ce)
257
        begin
258
                f_sgn_a[TLEN+1] <= f_sgn_a[TLEN];
259
                f_sgn_b[TLEN+1] <= f_sgn_b[TLEN];
260
        end
261
 
262
        always @(posedge i_clk)
263
        begin
264
                assert(sgn == (f_sgn_a[0] ^ f_sgn_b[0]));
265
                assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));
266
                assert(r_s[TLEN-1:0] == (f_sgn_a[TLEN:1] ^ f_sgn_b[TLEN:1]));
267
        end
268
 
269
        always @(posedge i_clk)
270
        if ((f_past_valid)&&($past(i_ce)))
271
        begin
272
                if ($past(i_a)==0)
273
                        `ASSERT(u_a == 0);
274
                else if ($past(i_a[AW-1]) == 1'b0)
275
                        `ASSERT(u_a == $past(i_a));
276
 
277
                if ($past(i_b)==0)
278
                        `ASSERT(u_b == 0);
279
                else if ($past(i_b[BW-1]) == 1'b0)
280
                        `ASSERT(u_b == $past(i_b));
281
        end
282
 
283
        generate // Keep track of intermediate values, before multiplying them
284
        if (TLEN > 3) for(k=0; k<TLEN-3; k=k+1)
285
        begin : ASSERT_GENCOPY
286
                always @(posedge i_clk)
287
                if (i_ce)
288
                begin
289
                        if (f_past_a[k]==0)
290
                                `ASSERT(r_a[k] == 0);
291
                        else if (f_past_a[k]==1)
292
                                `ASSERT(r_a[k] == 0);
293
                        `ASSERT(r_b[k] == f_past_b[k]);
294
                end
295
        end endgenerate
296
 
297
        generate // The actual multiply and accumulate stage
298
        if (TLEN > 2) for(k=0; k<TLEN-2; k=k+1)
299
        begin : ASSERT_GENSTAGE
300
                always @(posedge i_clk)
301
                if ((f_past_valid)&&($past(i_ce)))
302
                begin
303
                        if (f_past_a[k+1]==0)
304
                                `ASSERT(acc[k] == 0);
305
                        if (f_past_a[k+1]==1)
306
                                `ASSERT(acc[k] == f_past_b[k+1]);
307
                        if (f_past_b[k+1]==0)
308
                                `ASSERT(acc[k] == 0);
309
                        if (f_past_b[k+1]==1)
310
                        begin
311
                                `ASSERT(acc[k][(2*k)+3:0]
312
                                                == f_past_a[k+1][(2*k)+3:0]);
313
                                `ASSERT(acc[k][(IW+BW-1):(2*k)+4] == 0);
314
                        end
315
                end
316
        end endgenerate
317
 
318
        wire    [AW-1:0] f_past_a_neg = - f_past_a[TLEN];
319
        wire    [BW-1:0] f_past_b_neg = - f_past_b[TLEN];
320
 
321
        wire    [AW-1:0] f_past_a_pos = f_past_a[TLEN][AW-1]
322
                                        ? f_past_a_neg : f_past_a[TLEN];
323
        wire    [BW-1:0] f_past_b_pos = f_past_b[TLEN][BW-1]
324
                                        ? f_past_b_neg : f_past_b[TLEN];
325
 
326
        always @(posedge i_clk)
327
        if ((f_past_valid)&&($past(i_ce)))
328
        begin
329
                if ((f_past_a[TLEN]==0)||(f_past_b[TLEN]==0))
330
                        `ASSERT(o_r == 0);
331
                else if (f_past_a[TLEN]==1)
332
                begin
333
                        if ((f_sgn_a[TLEN+1]^f_sgn_b[TLEN+1])==0)
334
                        begin
335
                                `ASSERT(o_r[BW-1:0] == f_past_b_pos[BW-1:0]);
336
                                `ASSERT(o_r[AW+BW-1:BW] == 0);
337
                        end else begin // if (f_sgn_b[TLEN+1]) begin
338
                                `ASSERT(o_r[BW-1:0] == f_past_b_neg);
339
                                `ASSERT(o_r[AW+BW-1:BW]
340
                                        == {(AW){f_past_b_neg[BW-1]}});
341
                        end
342
                end else if (f_past_b[TLEN]==1)
343
                begin
344
                        if ((f_sgn_a[TLEN+1] ^ f_sgn_b[TLEN+1])==0)
345
                        begin
346
                                `ASSERT(o_r[AW-1:0] == f_past_a_pos[AW-1:0]);
347
                                `ASSERT(o_r[AW+BW-1:AW] == 0);
348
                        end else begin
349
                                `ASSERT(o_r[AW-1:0] == f_past_a_neg);
350
                                `ASSERT(o_r[AW+BW-1:AW]
351
                                        == {(BW){f_past_a_neg[AW-1]}});
352
                        end
353
                end else begin
354
                        `ASSERT(o_r != 0);
355
                        if (!o_r[AW+BW-1:0])
356
                        begin
357
                                `ASSERT((o_r[AW-1:0] != f_past_a[TLEN][AW-1:0])
358
                                        ||(o_r[AW+BW-1:AW]!=0));
359
                                `ASSERT((o_r[BW-1:0] != f_past_b[TLEN][BW-1:0])
360
                                        ||(o_r[AW+BW-1:BW]!=0));
361
                        end else begin
362
                                `ASSERT((o_r[AW-1:0] != f_past_a_neg[AW-1:0])
363
                                        ||(! (&o_r[AW+BW-1:AW])));
364
                                `ASSERT((o_r[BW-1:0] != f_past_b_neg[BW-1:0])
365
                                        ||(! (&o_r[AW+BW-1:BW]!=0)));
366
                        end
367
                end
368
        end
369
 
370
        generate if (IAW <= IBW)
371
        begin : NO_PARAM_CHANGE
372
                assign f_past_a_unsorted = (!f_sgn_a[TLEN+1])
373
                                        ? f_past_a[TLEN] : f_past_a_neg;
374
                assign f_past_b_unsorted = (!f_sgn_b[TLEN+1])
375
                                        ? f_past_b[TLEN] : f_past_b_neg;
376
        end else begin : SWAP_PARAMETERS
377
                assign f_past_a_unsorted = (!f_sgn_b[TLEN+1])
378
                                        ? f_past_b[TLEN] : f_past_b_neg;
379
                assign f_past_b_unsorted = (!f_sgn_a[TLEN+1])
380
                                        ? f_past_a[TLEN] : f_past_a_neg;
381
        end endgenerate
382
`ifdef  BUTTERFLY
383
        // The following properties artificially restrict the inputs
384
        // to this long binary multiplier to only those values whose
385
        // absolute value is 0..7.  It is used by the formal proof of
386
        // the BUTTERFLY to artificially limit the scope of the proof.
387
        // By the time the butterfly sees this code, it will be known
388
        // that the long binary multiply works.  At issue will no longer
389
        // be whether or not this works, but rather whether it works in
390
        // context.  For that purpose, we'll need to check timing, not
391
        // results.  Checking against inputs of +/- 1 and 0 are perfect
392
        // for that task.  The below assumptions (yes they are assumptions
393
        // just go a little bit further.
394
        //
395
        // THEREFORE, THESE PROPERTIES ARE NOT NECESSARY TO PROVING THAT
396
        // THIS MODULE WORKS, AND THEY WILL INTERFERE WITH THAT PROOF.
397
        //
398
        // This just limits the proof for the butterfly, the parent.
399
        // module that calls this one
400
        //
401
        // Start by assuming that all inputs have an absolute value less
402
        // than eight.
403
        always @(*)
404
                assume(u_a[AW-1:3] == 0);
405
        always @(*)
406
                assume(u_b[BW-1:3] == 0);
407
 
408
        // If the inputs have these properties, then so too do many of
409
        // our internal values.  ASSERT therefore that we never get out
410
        // of bounds
411
        generate for(k=0; k<TLEN; k=k+1)
412
        begin
413
                always @(*)
414
                begin
415
                        assert(f_past_a[k][AW-1:3] == 0);
416
                        assert(f_past_b[k][BW-1:3] == 0);
417
                end
418
        end endgenerate
419
 
420
        generate for(k=0; k<TLEN-1; k=k+1)
421
        begin
422
                always @(*)
423
                        assert(acc[k][IW+BW-1:6] == 0);
424
        end endgenerate
425
 
426
        generate for(k=0; k<TLEN-2; k=k+1)
427
        begin
428
                always @(*)
429
                        assert(r_b[k][BW-1:3] == 0);
430
        end endgenerate
431
`endif  // BUTTERFLY
432
`endif  // FORMAL
433 36 dgisselq
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.