OpenCores
URL https://opencores.org/ocsvn/ft816float/ft816float/trunk

Subversion Repositories ft816float

[/] [ft816float/] [trunk/] [rtl/] [verilog2/] [fpFMA.sv] - Blame information for rev 58

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 48 robfinch
// ============================================================================
2
//        __
3 58 robfinch
//   \\__/ o\    (C) 2019-2021  Robert Finch, Waterloo
4 48 robfinch
//    \  __ /    All rights reserved.
5
//     \/_//     robfinch@finitron.ca
6
//       ||
7
//
8
//      fpFMA.sv
9
//              - floating point fused multiplier + adder
10
//              - can issue every clock cycle
11
//              - parameterized FPWIDth
12
//              - IEEE 754 representation
13
//
14
//
15 58 robfinch
// BSD 3-Clause License
16
// Redistribution and use in source and binary forms, with or without
17
// modification, are permitted provided that the following conditions are met:
18
//
19
// 1. Redistributions of source code must retain the above copyright notice, this
20
//    list of conditions and the following disclaimer.
21
//
22
// 2. Redistributions in binary form must reproduce the above copyright notice,
23
//    this list of conditions and the following disclaimer in the documentation
24
//    and/or other materials provided with the distribution.
25
//
26
// 3. Neither the name of the copyright holder nor the names of its
27
//    contributors may be used to endorse or promote products derived from
28
//    this software without specific prior written permission.
29
//
30
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
33
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
34
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
36
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
37
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
38
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 48 robfinch
//
41
// ============================================================================
42
 
43
import fp::*;
44
 
45
module fpFMA (clk, ce, op, rm, a, b, c, o, under, over, inf, zero);
46
input clk;
47
input ce;
48
input op;               // operation 0 = add, 1 = subtract
49
input [2:0] rm;
50
input  [MSB:0] a, b, c;
51
output [EX:0] o;
52
output under;
53
output over;
54
output inf;
55
output zero;
56
 
57
// constants
58
wire [EMSB:0] infXp = {EMSB+1{1'b1}};   // infinite / NaN - all ones
59
// The following is the value for an exponent of zero, with the offset
60
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc.
61
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}};       //2^0 exponent
62
// The following is a template for a quiet nan. (MSB=1)
63
wire [FMSB:0] qNaN  = {1'b1,{FMSB{1'b0}}};
64
 
65
// -----------------------------------------------------------
66
// Clock #1
67
// - decode the input operands
68
// - derive basic information
69
// -----------------------------------------------------------
70
 
71
wire sa1, sb1, sc1;                     // sign bit
72
wire [EMSB:0] xa1, xb1, xc1;    // exponent bits
73
wire [FMSB+1:0] fracta1, fractb1, fractc1;      // includes unhidden bit
74
wire a_dn1, b_dn1, c_dn1;                       // a/b is denormalized
75
wire aNan1, bNan1, cNan1;
76
wire az1, bz1, cz1;
77
wire aInf1, bInf1, cInf1;
78
reg op1;
79
 
80
fpDecompReg u1a (.clk(clk), .ce(ce), .i(a), .sgn(sa1), .exp(xa1), .fract(fracta1), .xz(a_dn1), .vz(az1), .inf(aInf1), .nan(aNan1) );
81
fpDecompReg u1b (.clk(clk), .ce(ce), .i(b), .sgn(sb1), .exp(xb1), .fract(fractb1), .xz(b_dn1), .vz(bz1), .inf(bInf1), .nan(bNan1) );
82
fpDecompReg u1c (.clk(clk), .ce(ce), .i(c), .sgn(sc1), .exp(xc1), .fract(fractc1), .xz(c_dn1), .vz(cz1), .inf(cInf1), .nan(cNan1) );
83
 
84
always @(posedge clk)
85
        if (ce) op1 <= op;
86
 
87
// -----------------------------------------------------------
88
// Clock #2
89
// Compute the sum of the exponents.
90
// correct the exponent for denormalized operands
91
// adjust the sum by the exponent offset (subtract 127)
92
// mul: ex1 = xa + xb,  result should always be < 1ffh
93
// Form partial products (clocks 2 to 5)
94
// -----------------------------------------------------------
95
 
96
reg abz2;
97
reg [EMSB+2:0] ex2;
98
reg [EMSB:0] xc2;
99
reg realOp2;
100
reg xcInf2;
101
 
102
always @(posedge clk)
103
        if (ce) abz2 <= az1|bz1;
104
always @(posedge clk)
105
        if (ce) ex2 <= (xa1|a_dn1) + (xb1|b_dn1) - bias;
106
always @(posedge clk)
107
        if (ce) xc2 <= (xc1|c_dn1);
108
always @(posedge clk)
109
        if (ce) xcInf2 = &xc1;
110
 
111
// Figure out which operation is really needed an add or
112
// subtract ?
113
// If the signs are the same, use the orignal op,
114
// otherwise flip the operation
115
//  a +  b = add,+
116
//  a + -b = sub, so of larger
117
// -a +  b = sub, so of larger
118
// -a + -b = add,-
119
//  a -  b = sub, so of larger
120
//  a - -b = add,+
121
// -a -  b = add,-
122
// -a - -b = sub, so of larger
123
always @(posedge clk)
124
        if (ce) realOp2 <= op1 ^ (sa1 ^ sb1) ^ sc1;
125
 
126
 
127
reg [FX:0] fract5;
128
generate
129
if (FPWID==84) begin
130
reg [33:0] p00,p01,p02,p03;
131
reg [33:0] p10,p11,p12,p13;
132
reg [33:0] p20,p21,p22,p23;
133
reg [33:0] p30,p31,p32,p33;
134
reg [135:0] fract3a;
135
reg [135:0] fract3b;
136
reg [135:0] fract3c;
137
reg [135:0] fract3d;
138
reg [135:0] fract4a;
139
reg [135:0] fract4b;
140
 
141
        always @(posedge clk)
142
        if (ce) begin
143
                p00 <= fracta1[16: 0] * fractb1[16: 0];
144
                p01 <= fracta1[33:17] * fractb1[16: 0];
145
                p02 <= fracta1[50:34] * fractb1[16: 0];
146
                p03 <= fracta1[67:51] * fractb1[16: 0];
147
 
148
                p10 <= fracta1[16: 0] * fractb1[33:17];
149
                p11 <= fracta1[33:17] * fractb1[33:17];
150
                p12 <= fracta1[50:34] * fractb1[33:17];
151
                p13 <= fracta1[67:51] * fractb1[33:17];
152
 
153
                p20 <= fracta1[16: 0] * fractb1[50:34];
154
                p21 <= fracta1[33:17] * fractb1[50:34];
155
                p22 <= fracta1[50:34] * fractb1[50:34];
156
                p23 <= fracta1[67:51] * fractb1[50:34];
157
 
158
                p30 <= fracta1[15: 0] * fractb1[67:51];
159
                p31 <= fracta1[31:16] * fractb1[67:51];
160
                p32 <= fracta1[47:32] * fractb1[67:51];
161
                p33 <= fracta1[63:48] * fractb1[67:51];
162
        end
163
        always @(posedge clk)
164
        if (ce) begin
165
                fract3a <= {p33,p31,p20,p00};
166
                fract3b <= {p32,p12,p10,17'b0} + {p23,p03,p01,17'b0};
167
                fract3c <= {p22,p11,34'b0} + {p13,p02,34'b0};
168
                fract3d <= {p12,51'b0} + {p03,51'b0};
169
        end
170
        always @(posedge clk)
171
        if (ce) begin
172
                fract4a <= fract3a + fract3b;
173
                fract4b <= fract3c + fract3d;
174
        end
175
        always @(posedge clk)
176
        if (ce) begin
177
                fract5 <= fract4a + fract4b;
178
        end
179
end
180
else if (FPWID==80) begin
181
reg [31:0] p00,p01,p02,p03;
182
reg [31:0] p10,p11,p12,p13;
183
reg [31:0] p20,p21,p22,p23;
184
reg [31:0] p30,p31,p32,p33;
185
reg [127:0] fract3a;
186
reg [127:0] fract3b;
187
reg [127:0] fract3c;
188
reg [127:0] fract3d;
189
reg [127:0] fract4a;
190
reg [127:0] fract4b;
191
 
192
        always @(posedge clk)
193
        if (ce) begin
194
                p00 <= fracta1[15: 0] * fractb1[15: 0];
195
                p01 <= fracta1[31:16] * fractb1[15: 0];
196
                p02 <= fracta1[47:32] * fractb1[15: 0];
197
                p03 <= fracta1[63:48] * fractb1[15: 0];
198
 
199
                p10 <= fracta1[15: 0] * fractb1[31:16];
200
                p11 <= fracta1[31:16] * fractb1[31:16];
201
                p12 <= fracta1[47:32] * fractb1[31:16];
202
                p13 <= fracta1[63:48] * fractb1[31:16];
203
 
204
                p20 <= fracta1[15: 0] * fractb1[47:32];
205
                p21 <= fracta1[31:16] * fractb1[47:32];
206
                p22 <= fracta1[47:32] * fractb1[47:32];
207
                p23 <= fracta1[63:48] * fractb1[47:32];
208
 
209
                p30 <= fracta1[15: 0] * fractb1[63:48];
210
                p31 <= fracta1[31:16] * fractb1[63:48];
211
                p32 <= fracta1[47:32] * fractb1[63:48];
212
                p33 <= fracta1[63:48] * fractb1[63:48];
213
        end
214
        always @(posedge clk)
215
        if (ce) begin
216
                fract3a <= {p33,p31,p20,p00};
217
                fract3b <= {p32,p12,p10,16'b0} + {p23,p03,p01,16'b0};
218
                fract3c <= {p22,p11,32'b0} + {p13,p02,32'b0};
219
                fract3d <= {p12,48'b0} + {p03,48'b0};
220
        end
221
        always @(posedge clk)
222
        if (ce) begin
223
                fract4a <= fract3a + fract3b;
224
                fract4b <= fract3c + fract3d;
225
        end
226
        always @(posedge clk)
227
        if (ce) begin
228
                fract5 <= fract4a + fract4b;
229
        end
230
end
231
else if (FPWID==64) begin
232
reg [35:0] p00,p01,p02;
233
reg [35:0] p10,p11,p12;
234
reg [35:0] p20,p21,p22;
235
reg [71:0] fract3a;
236
reg [89:0] fract3b;
237
reg [107:0] fract3c;
238
reg [108:0] fract4a;
239
reg [108:0] fract4b;
240
 
241
        always @(posedge clk)
242
        if (ce) begin
243
                p00 <= fracta1[17: 0] * fractb1[17: 0];
244
                p01 <= fracta1[35:18] * fractb1[17: 0];
245
                p02 <= fracta1[52:36] * fractb1[17: 0];
246
                p10 <= fracta1[17: 0] * fractb1[35:18];
247
                p11 <= fracta1[35:18] * fractb1[35:18];
248
                p12 <= fracta1[52:36] * fractb1[35:18];
249
                p20 <= fracta1[17: 0] * fractb1[52:36];
250
                p21 <= fracta1[35:18] * fractb1[52:36];
251
                p22 <= fracta1[52:36] * fractb1[52:36];
252
        end
253
        always @(posedge clk)
254
        if (ce) begin
255
                fract3a <= {p02,p00};
256
                fract3b <= {p21,p10,18'b0} + {p12,p01,18'b0};
257
                fract3c <= {p22,p20,36'b0} + {p11,36'b0};
258
        end
259
        always @(posedge clk)
260
        if (ce) begin
261
                fract4a <= fract3a + fract3b;
262
                fract4b <= fract3c;
263
        end
264
        always @(posedge clk)
265
        if (ce) begin
266
                fract5 <= fract4a + fract4b;
267
        end
268
end
269
else if (FPWID==40) begin
270
reg [27:0] p00,p01,p02;
271
reg [27:0] p10,p11,p12;
272
reg [27:0] p20,p21,p22;
273
reg [79:0] fract3a;
274
reg [79:0] fract3b;
275
reg [79:0] fract3c;
276
reg [79:0] fract4a;
277
reg [79:0] fract4b;
278
        always @(posedge clk)
279
        if (ce) begin
280
                p00 <= fracta1[13: 0] * fractb1[13: 0];
281
                p01 <= fracta1[27:14] * fractb1[13: 0];
282
                p02 <= fracta1[39:28] * fractb1[13: 0];
283
                p10 <= fracta1[13: 0] * fractb1[27:14];
284
                p11 <= fracta1[27:14] * fractb1[27:14];
285
                p12 <= fracta1[39:28] * fractb1[27:14];
286
                p20 <= fracta1[13: 0] * fractb1[39:28];
287
                p21 <= fracta1[27:14] * fractb1[39:28];
288
                p22 <= fracta1[39:28] * fractb1[39:28];
289
        end
290
        always @(posedge clk)
291
        if (ce) begin
292
                fract3a <= {p02,p00};
293
                fract3b <= {p21,p10,18'b0} + {p12,p01,18'b0};
294
                fract3c <= {p22,p20,36'b0} + {p11,36'b0};
295
        end
296
        always @(posedge clk)
297
        if (ce) begin
298
                fract4a <= fract3a + fract3b;
299
                fract4b <= fract3c;
300
        end
301
        always @(posedge clk)
302
        if (ce) begin
303
                fract5 <= fract4a + fract4b;
304
        end
305
end
306
else if (FPWID==32) begin
307
reg [23:0] p00,p01,p02;
308
reg [23:0] p10,p11,p12;
309
reg [23:0] p20,p21,p22;
310
reg [63:0] fract3a;
311
reg [63:0] fract3b;
312
reg [63:0] fract4;
313
 
314
        always @(posedge clk)
315
        if (ce) begin
316
                p00 <= fracta1[11: 0] * fractb1[11: 0];
317
                p01 <= fracta1[23:12] * fractb1[11: 0];
318
                p10 <= fracta1[11: 0] * fractb1[23:12];
319
                p11 <= fracta1[23:12] * fractb1[23:12];
320
        end
321
        always @(posedge clk)
322
        if (ce) begin
323
                fract3a <= {p11,p00};
324
                fract3b <= {p01,12'b0} + {p10,12'b0};
325
        end
326
        always @(posedge clk)
327
        if (ce) begin
328
                fract4 <= fract3a + fract3b;
329
        end
330
        always @(posedge clk)
331
        if (ce) begin
332
                fract5 <= fract4;
333
        end
334
end
335
else begin
336
reg [FX:0] p00;
337
reg [FX:0] fract3;
338
reg [FX:0] fract4;
339
        always @(posedge clk)
340
    if (ce) begin
341
        p00 <= fracta1 * fractb1;
342
    end
343
        always @(posedge clk)
344
    if (ce)
345
        fract3 <= p00;
346
        always @(posedge clk)
347
    if (ce)
348
        fract4 <= fract3;
349
        always @(posedge clk)
350
    if (ce)
351
        fract5 <= fract4;
352
end
353
endgenerate
354
 
355
// -----------------------------------------------------------
356
// Clock #3
357
// Select zero exponent
358
// -----------------------------------------------------------
359
 
360
reg [EMSB+2:0] ex3;
361
reg [EMSB:0] xc3;
362
always @(posedge clk)
363
        if (ce) ex3 <= abz2 ? 1'd0 : ex2;
364
always @(posedge clk)
365
        if (ce) xc3 <= xc2;
366
 
367
// -----------------------------------------------------------
368
// Clock #4
369
// Generate partial products.
370
// -----------------------------------------------------------
371
 
372
reg [EMSB+2:0] ex4;
373
reg [EMSB:0] xc4;
374
 
375
always @(posedge clk)
376
        if (ce) ex4 <= ex3;
377
always @(posedge clk)
378
        if (ce) xc4 <= xc3;
379
 
380
// -----------------------------------------------------------
381
// Clock #5
382
// Sum partial products (above)
383
// compute multiplier overflow and underflow
384
// -----------------------------------------------------------
385
 
386
// Status
387
reg under5;
388
reg over5;
389
reg [EMSB+2:0] ex5;
390
reg [EMSB:0] xc5;
391
wire aInf5, bInf5;
392
wire aNan5, bNan5;
393
wire qNaNOut5;
394
 
395
always @(posedge clk)
396
        if (ce) under5 <= ex4[EMSB+2];
397
always @(posedge clk)
398
        if (ce) over5 <= (&ex4[EMSB:0] | ex4[EMSB+1]) & !ex4[EMSB+2];
399
always @(posedge clk)
400
        if (ce) ex5 <= ex4;
401
always @(posedge clk)
402
        if (ce) xc5 <= xc4;
403
 
404
delay4 u2a (.clk(clk), .ce(ce), .i(aInf1), .o(aInf5) );
405
delay4 u2b (.clk(clk), .ce(ce), .i(bInf1), .o(bInf5) );
406
 
407
// determine when a NaN is output
408
wire [MSB:0] a5,b5;
409
delay4 u5 (.clk(clk), .ce(ce), .i((aInf1&bz1)|(bInf1&az1)), .o(qNaNOut5) );
410
delay4 u14 (.clk(clk), .ce(ce), .i(aNan1), .o(aNan5) );
411
delay4 u15 (.clk(clk), .ce(ce), .i(bNan1), .o(bNan5) );
412
delay5 #(MSB+1) u16 (.clk(clk), .ce(ce), .i(a), .o(a5) );
413
delay5 #(MSB+1) u17 (.clk(clk), .ce(ce), .i(b), .o(b5) );
414
 
415
// -----------------------------------------------------------
416
// Clock #6
417
// - figure multiplier mantissa output
418
// - figure multiplier exponent output
419
// - correct xponent and mantissa for exceptional conditions
420
// -----------------------------------------------------------
421
 
422
reg [FX:0] mo6;
423
reg [EMSB+2:0] ex6;
424
reg [EMSB:0] xc6;
425
wire [FMSB+1:0] fractc6;
426
vtdl #(FMSB+2) u61 (.clk(clk), .ce(ce), .a(4'd4), .d(fractc1), .q(fractc6) );
427
delay1 u62 (.clk(clk), .ce(ce), .i(under5), .o(under6));
428
 
429
always @(posedge clk)
430
        if (ce) xc6 <= xc5;
431
 
432
always @(posedge clk)
433
        if (ce)
434
                casez({aNan5,bNan5,qNaNOut5,aInf5,bInf5,over5})
435
                6'b1?????:  mo6 <= {1'b1,1'b1,a5[FMSB-1:0],{FMSB+1{1'b0}}};
436
    6'b01????:  mo6 <= {1'b1,1'b1,b5[FMSB-1:0],{FMSB+1{1'b0}}};
437
                6'b001???:      mo6 <= {1'b1,qNaN|3'd4,{FMSB+1{1'b0}}}; // multiply inf * zero
438
                6'b0001??:      mo6 <= 0;       // mul inf's
439
                6'b00001?:      mo6 <= 0;       // mul inf's
440
                6'b000001:      mo6 <= 0;       // mul overflow
441
                default:        mo6 <= fract5;
442
                endcase
443
 
444
always @(posedge clk)
445
        if (ce)
446
                casez({qNaNOut5|aNan5|bNan5,aInf5,bInf5,over5,under5})
447
                5'b1????:       ex6 <= infXp;   // qNaN - infinity * zero
448
                5'b01???:       ex6 <= infXp;   // 'a' infinite
449
                5'b001??:       ex6 <= infXp;   // 'b' infinite
450
                5'b0001?:       ex6 <= infXp;   // result overflow
451
                5'b00001:       ex6 <= ex5;             //0;            // underflow
452
                default:        ex6 <= ex5;             // situation normal
453
                endcase
454
 
455
// -----------------------------------------------------------
456
// Clock #7
457
// - prep for addition, determine greater operand
458
// -----------------------------------------------------------
459
reg ex_gt_xc7;
460
reg xeq7;
461
reg ma_gt_mc7;
462
reg meq7;
463
wire az7, bz7, cz7;
464
wire realOp7;
465
 
466
// which has greater magnitude ? Used for sign calc
467
always @(posedge clk)
468
        if (ce) ex_gt_xc7 <= $signed(ex6) > $signed({2'b0,xc6});
469
always @(posedge clk)
470
        if (ce) xeq7 <= (ex6=={2'b0,xc6});
471
always @(posedge clk)
472
        if (ce) ma_gt_mc7 <= mo6 > {fractc6,{FMSB+1{1'b0}}};
473
always @(posedge clk)
474
        if (ce) meq7 <= mo6 == {fractc6,{FMSB+1{1'b0}}};
475
vtdl #(1) u71 (.clk(clk), .ce(ce), .a(4'd5), .d(az1), .q(az7));
476
vtdl #(1) u72 (.clk(clk), .ce(ce), .a(4'd5), .d(bz1), .q(bz7));
477
vtdl #(1) u73 (.clk(clk), .ce(ce), .a(4'd5), .d(cz1), .q(cz7));
478
vtdl #(1) u74 (.clk(clk), .ce(ce), .a(4'd4), .d(realOp2), .q(realOp7));
479
 
480
// -----------------------------------------------------------
481
// Clock #8
482
// - prep for addition, determine greater operand
483
// - determine if result will be zero
484
// -----------------------------------------------------------
485
 
486
reg a_gt_b8;
487
reg resZero8;
488
reg ex_gt_xc8;
489
wire [EMSB+2:0] ex8;
490
wire [EMSB:0] xc8;
491
wire xcInf8;
492
wire [2:0] rm8;
493
wire op8;
494
wire sa8, sc8;
495
 
496
delay2 #(EMSB+3) u81 (.clk(clk), .ce(ce), .i(ex6), .o(ex8));
497
delay2 #(EMSB+1) u82 (.clk(clk), .ce(ce), .i(xc6), .o(xc8));
498
vtdl #(1) u83 (.clk(clk), .ce(ce), .a(4'd5), .d(xcInf2), .q(xcInf8));
499
vtdl #(3) u84 (.clk(clk), .ce(ce), .a(4'd7), .d(rm), .q(rm8));
500
vtdl #(1) u85 (.clk(clk), .ce(ce), .a(4'd6), .d(op1), .q(op8));
501
vtdl #(1) u86 (.clk(clk), .ce(ce), .a(4'd6), .d(sa1 ^ sb1), .q(sa8));
502
vtdl #(1) u87 (.clk(clk), .ce(ce), .a(4'd6), .d(sc1), .q(sc8));
503
 
504
always @(posedge clk)
505
        if (ce) ex_gt_xc8 <= ex_gt_xc7;
506
always @(posedge clk)
507
        if (ce)
508
                a_gt_b8 <= ex_gt_xc7 || (xeq7 && ma_gt_mc7);
509
 
510
// Find out if the result will be zero.
511
always @(posedge clk)
512
        if (ce)
513
                resZero8 <= (realOp7 & xeq7 & meq7) ||  // subtract, same magnitude
514
                           ((az7 | bz7) & cz7);               // a or b zero and c zero
515
 
516
// -----------------------------------------------------------
517
// CLock #9
518
// Compute output exponent and sign
519
//
520
// The output exponent is the larger of the two exponents,
521
// unless a subtract operation is in progress and the two
522
// numbers are equal, in which case the exponent should be
523
// zero.
524
// -----------------------------------------------------------
525
 
526
reg so9;
527
reg [EMSB+2:0] ex9;
528
reg [EMSB+2:0] ex9a;
529
reg ex_gt_xc9;
530
reg [EMSB:0] xc9;
531
reg a_gt_c9;
532
wire [FX:0] mo9;
533
wire [FMSB+1:0] fractc9;
534
wire under9;
535
wire xeq9;
536
 
537
always @(posedge clk)
538
        if (ce) ex_gt_xc9 <= ex_gt_xc8;
539
always @(posedge clk)
540
        if (ce) a_gt_c9 <= a_gt_b8;
541
always @(posedge clk)
542
        if (ce) xc9 <= xc8;
543
always @(posedge clk)
544
        if (ce) ex9a <= ex8;
545
 
546
delay3 #(FX+1) u93 (.clk(clk), .ce(ce), .i(mo6), .o(mo9));
547
delay3 #(FMSB+2) u94 (.clk(clk), .ce(ce), .i(fractc6), .o(fractc9));
548
delay3 u95 (.clk(clk), .ce(ce), .i(under6), .o(under9));
549
delay2 u96 (.clk(clk), .ce(ce), .i(xeq7), .o(xeq9));
550
 
551
always @(posedge clk)
552
        if (ce) ex9 <= resZero8 ? 1'd0 : ex_gt_xc8 ? ex8 : {2'b0,xc8};
553
 
554
// Compute output sign
555
always @(posedge clk)
556
        if (ce)
557
        case ({resZero8,sa8,op8,sc8})   // synopsys full_case parallel_case
558
        4'b0000: so9 <= 0;                      // + + + = +
559
        4'b0001: so9 <= !a_gt_b8;       // + + - = sign of larger
560
        4'b0010: so9 <= !a_gt_b8;       // + - + = sign of larger
561
        4'b0011: so9 <= 0;                      // + - - = +
562
        4'b0100: so9 <= a_gt_b8;                // - + + = sign of larger
563
        4'b0101: so9 <= 1;                      // - + - = -
564
        4'b0110: so9 <= 1;                      // - - + = -
565
        4'b0111: so9 <= a_gt_b8;                // - - - = sign of larger
566
        4'b1000: so9 <= 0;                      //  A +  B, sign = +
567
        4'b1001: so9 <= rm8==3;         //  A + -B, sign = + unless rounding down
568
        4'b1010: so9 <= rm8==3;         //  A -  B, sign = + unless rounding down
569
        4'b1011: so9 <= 0;                      // +A - -B, sign = +
570
        4'b1100: so9 <= rm8==3;         // -A +  B, sign = + unless rounding down
571
        4'b1101: so9 <= 1;                      // -A + -B, sign = -
572
        4'b1110: so9 <= 1;                      // -A - +B, sign = -
573
        4'b1111: so9 <= rm8==3;         // -A - -B, sign = + unless rounding down
574
        endcase
575
 
576
// -----------------------------------------------------------
577
// Clock #10
578
// Compute the difference in exponents, provides shift amount
579
// Note that ex9a will be negative for an underflow condition
580
// so it's added rather than subtracted from xc9 as -(-num)
581
// is the same as an add. The underflow is tracked rather than
582
// using extra bits in the exponent.
583
// -----------------------------------------------------------
584
reg [EMSB+2:0] xdiff10;
585
reg [FX:0] mfs;
586
reg ops10;
587
 
588
// If the multiplier exponent was negative (underflowed) then
589
// the mantissa needs to be shifted right even more (until
590
// the exponent is zero. The total shift would be xc9-0-
591
// amount underflows which is xc9 + -ex9a.
592
 
593
always @(posedge clk)
594
        if (ce) xdiff10 <= ex_gt_xc9 ? ex9a - xc9
595
                                                                                : ex9a[EMSB+2] ? xc9 + (~ex9a+2'd1)
596
                                                                                : xc9 - ex9a;
597
 
598
// Determine which fraction to denormalize (the one with the
599
// smaller exponent is denormalized). If the exponents are equal
600
// denormalize the smaller fraction.
601
always @(posedge clk)
602
        if (ce) mfs <=
603
                xeq9 ? (a_gt_c9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9)
604
                 : ex_gt_xc9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9;
605
 
606
always @(posedge clk)
607
        if (ce) ops10 <= xeq9 ? (a_gt_c9 ? 1'b1 : 1'b0)
608
                                                                                                : (ex_gt_xc9 ? 1'b1 : 1'b0);
609
 
610
// -----------------------------------------------------------
611
// Clock #11
612
// Limit the size of the shifter to only bits needed.
613
// -----------------------------------------------------------
614
reg [7:0] xdif11;
615
 
616
always @(posedge clk)
617
        if (ce) xdif11 <= xdiff10 > FX+3 ? FX+3 : xdiff10;
618
 
619
// -----------------------------------------------------------
620
// Clock #12
621
// Determine the sticky bit
622
// -----------------------------------------------------------
623
 
624
wire sticky, sticky12;
625
wire [FX:0] mfs12;
626
wire [7:0] xdif12;
627
 
628 58 robfinch
redorN #(.BSIZE(FX+1)) uredor1 (.a({1'b0,xdif11+FMSB}), .b(mfs), .o(sticky));
629
/*
630 48 robfinch
generate
631
begin
632
if (FPWID==128)
633
  redor128 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
634
else if (FPWID==96)
635
  redor96 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
636
else if (FPWID==84)
637
  redor84 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
638
else if (FPWID==80)
639
  redor80 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
640
else if (FPWID==64)
641
  redor64 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
642
else if (FPWID==32)
643
  redor32 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
644
else begin
645
        always @* begin
646
        $display("redor operation needed in fpFMA");
647
        $finish;
648
  end
649
end
650
end
651
endgenerate
652 58 robfinch
*/
653 48 robfinch
 
654
// register inputs to shifter and shift
655
delay1 #(1)    u122(.clk(clk), .ce(ce), .i(sticky), .o(sticky12) );
656
delay1 #(8)    u123(.clk(clk), .ce(ce), .i(xdif11),   .o(xdif12) );
657
delay2 #(FX+1) u124(.clk(clk), .ce(ce), .i(mfs), .o(mfs12) );
658
 
659
// -----------------------------------------------------------
660
// Clock #13
661
// - denormalize operand (shift right)
662
// -----------------------------------------------------------
663
reg [FX+2:0] mfs13;
664
wire [FX:0] mo13;
665
wire ex_gt_xc13;
666
wire [FMSB+1:0] fractc13;
667
wire ops13;
668
 
669
delay4 #(FX+1) u131 (.clk(clk), .ce(ce), .i(mo9), .o(mo13));
670
delay4 u132 (.clk(clk), .ce(ce), .i(ex_gt_xc9), .o(ex_gt_xc13));
671
vtdl #(FMSB+2) u133 (.clk(clk), .ce(ce), .a(4'd3), .d(fractc9), .q(fractc13));
672
delay3 u134 (.clk(clk), .ce(ce), .i(ops10), .o(ops13));
673
 
674
always @(posedge clk)
675
        if (ce) mfs13 <= ({mfs12,2'b0} >> xdif12)|sticky12;
676
 
677
// -----------------------------------------------------------
678
// Clock #14
679
// Sort operands
680
// -----------------------------------------------------------
681
reg [FX+2:0] oa, ob;
682
wire a_gt_b14;
683
 
684
vtdl #(1) u141 (.clk(clk), .ce(ce), .a(4'd5), .d(a_gt_b8), .q(a_gt_b14));
685
 
686
always @(posedge clk)
687
        if (ce) oa <= ops13 ? {mo13,2'b00} : mfs13;
688
always @(posedge clk)
689
        if (ce) ob <= ops13 ? mfs13 : {fractc13,{FMSB+1{1'b0}},2'b00};
690
 
691
// -----------------------------------------------------------
692
// Clock #15
693
// - Sort operands
694
// -----------------------------------------------------------
695
reg [FX+2:0] oaa, obb;
696
wire realOp15;
697
wire [EMSB:0] ex15;
698
wire [EMSB:0] ex9c = ex9[EMSB+1] ? infXp : ex9[EMSB:0];
699
wire overflow15;
700
vtdl #(1) u151 (.clk(clk), .ce(ce), .a(4'd7), .d(realOp7), .q(realOp15));
701
vtdl #(EMSB+1) u152 (.clk(clk), .ce(ce), .a(4'd5), .d(ex9c), .q(ex15));
702
vtdl #(EMSB+1) u153 (.clk(clk), .ce(ce), .a(4'd5), .d(ex9[EMSB+1]| &ex9[EMSB:0]), .q(overflow15));
703
 
704
always @(posedge clk)
705
        if (ce) oaa <= a_gt_b14 ? oa : ob;
706
always @(posedge clk)
707
        if (ce) obb <= a_gt_b14 ? ob : oa;
708
 
709
// -----------------------------------------------------------
710
// Clock #16
711
// - perform add/subtract
712
// - addition can generate an extra bit, subtract can't go negative
713
// -----------------------------------------------------------
714
reg [FX+3:0] mab;
715
wire [FX:0] mo16;
716
wire [FMSB+1:0] fractc16;
717
wire Nan16;
718
wire cNan16;
719
wire aInf16, cInf16;
720
wire op16;
721
wire exinf16;
722
 
723
vtdl #(1) u161 (.clk(clk), .ce(ce), .a(4'd10), .d(qNaNOut5|aNan5|bNan5), .q(Nan16));
724
vtdl #(1) u162 (.clk(clk), .ce(ce), .a(4'd14), .d(cNan1), .q(cNan16));
725
vtdl #(1) u163 (.clk(clk), .ce(ce), .a(4'd9), .d(&ex6), .q(aInf16));
726
vtdl #(1) u164 (.clk(clk), .ce(ce), .a(4'd14), .d(cInf1), .q(cInf16));
727
vtdl #(1) u165 (.clk(clk), .ce(ce), .a(4'd14), .d(op1), .q(op16));
728
delay3 #(FX+1) u166 (.clk(clk), .ce(ce), .i(mo13), .o(mo16));
729
vtdl #(FMSB+2) u167 (.clk(clk), .ce(ce), .a(4'd6), .d(fractc9), .q(fractc16));
730
delay1 u169 (.clk(clk), .ce(ce), .i(&ex15), .o(exinf16));
731
 
732
always @(posedge clk)
733
        if (ce) mab <= realOp15 ? oaa - obb : oaa + obb;
734
 
735
// -----------------------------------------------------------
736
// Clock #17
737
// - adjust for Nans
738
// -----------------------------------------------------------
739
wire [EMSB:0] ex17;
740
reg [FX:0] mo17;
741
wire so17;
742
wire exinf17;
743
wire overflow17;
744
 
745
vtdl #(1)        u171 (.clk(clk), .ce(ce), .a(4'd7), .d(so9), .q(so17));
746
delay2 #(EMSB+1) u172 (.clk(clk), .ce(ce), .i(ex15), .o(ex17));
747
delay1 #(1) u173 (.clk(clk), .ce(ce), .i(exinf16), .o(exinf17));
748
delay2 u174 (.clk(clk), .ce(ce), .i(overflow15), .o(overflow17));
749
 
750
always @(posedge clk)
751
        casez({aInf16&cInf16,Nan16,cNan16,exinf16})
752
        4'b1???:        mo17 <= {1'b0,op16,{FMSB-1{1'b0}},op16,{FMSB{1'b0}}};   // inf +/- inf - generate QNaN on subtract, inf on add
753
        4'b01??:        mo17 <= {1'b0,mo16};
754
        4'b001?:        mo17 <= {1'b1,1'b1,fractc16[FMSB-1:0],{FMSB+1{1'b0}}};
755
        4'b0001:        mo17 <= 1'd0;
756
        default:        mo17 <= mab[FX+3:2];            // mab has two extra lead bits and two trailing bits
757
        endcase
758
 
759
assign o = {so17,ex17,mo17};
760
assign zero = {ex17,mo17}==1'd0;
761
assign inf = exinf17;
762
assign under = ex17==1'd0;
763
assign over = overflow17;
764
 
765
endmodule
766
 
767
 
768
// Multiplier with normalization and rounding.
769
 
770
module fpFMAnr(clk, ce, op, rm, a, b, c, o, inf, zero, overflow, underflow, inexact);
771
input clk;
772
input ce;
773
input op;
774
input [2:0] rm;
775
input  [MSB:0] a, b, c;
776
output [MSB:0] o;
777
output zero;
778
output inf;
779
output overflow;
780
output underflow;
781
output inexact;
782
 
783
wire [EX:0] fma_o;
784
wire fma_underflow;
785
wire fma_overflow;
786
wire norm_underflow;
787
wire norm_inexact;
788
wire sign_exe1, inf1, overflow1, underflow1;
789
wire [MSB+3:0] fpn0;
790
 
791
fpFMA #(FPWID) u1
792
(
793
        .clk(clk),
794
        .ce(ce),
795
        .op(op),
796
        .rm(rm),
797
        .a(a),
798
        .b(b),
799
        .c(c),
800
        .o(fma_o),
801
        .under(fma_underflow),
802
        .over(fma_overflow),
803
        .zero(),
804
        .inf()
805
);
806
fpNormalize #(FPWID) u2
807
(
808
        .clk(clk),
809
        .ce(ce),
810
        .i(fma_o),
811
        .o(fpn0),
812
        .under_i(fma_underflow),
813
        .under_o(norm_underflow),
814
        .inexact_o(norm_inexact)
815
);
816
fpRound         #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
817
fpDecomp        #(FPWID) u4(.i(o), .xz(), .vz(zero), .inf(inf));
818
vtdl                                            u5 (.clk(clk), .ce(ce), .a(4'd11), .d(fma_underflow), .q(underflow));
819
vtdl                                            u6 (.clk(clk), .ce(ce), .a(4'd11), .d(fma_overflow), .q(overflow));
820
delay3          #(1)    u7 (.clk(clk), .ce(ce), .i(norm_inexact), .o(inexact));
821
assign overflow = inf;
822
 
823
endmodule
824
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.