OpenCores
URL https://opencores.org/ocsvn/ft816float/ft816float/trunk

Subversion Repositories ft816float

[/] [ft816float/] [trunk/] [rtl/] [verilog2/] [fpFMA.v] - Blame information for rev 53

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 29 robfinch
// ============================================================================
2
//        __
3
//   \\__/ o\    (C) 2019  Robert Finch, Waterloo
4
//    \  __ /    All rights reserved.
5
//     \/_//     robfinch<remove>@finitron.ca
6
//       ||
7
//
8
//      fpFMA.v
9
//              - floating point fused multiplier + adder
10
//              - can issue every clock cycle
11
//              - parameterized FPWIDth
12
//              - IEEE 754 representation
13
//
14
//
15
// This source file is free software: you can redistribute it and/or modify 
16
// it under the terms of the GNU Lesser General Public License as published 
17
// by the Free Software Foundation, either version 3 of the License, or     
18
// (at your option) any later version.                                      
19
//                                                                          
20
// This source file is distributed in the hope that it will be useful,      
21
// but WITHOUT ANY WARRANTY; without even the implied warranty of           
22
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            
23
// GNU General Public License for more details.                             
24
//                                                                          
25
// You should have received a copy of the GNU General Public License        
26
// along with this program.  If not, see <http://www.gnu.org/licenses/>.    
27
//                                                                          
28
// ============================================================================
29
 
30
`include "fpConfig.sv"
31
 
32
module fpFMA (clk, ce, op, rm, a, b, c, o, under, over, inf, zero);
33 32 robfinch
parameter FPWID = 128;
34
parameter MUL_LATENCY = FPWID==128 ? 16 :
35
                                                                                                FPWID==80 ? 16 :
36
                                                                                                FPWID==64 ? 16 :
37
                                                                                                FPWID==32 ?  5 :
38
                                                                                                1;
39 29 robfinch
`include "fpSize.sv"
40
 
41
input clk;
42
input ce;
43
input op;               // operation 0 = add, 1 = subtract
44
input [2:0] rm;
45
input  [MSB:0] a, b, c;
46
output [EX:0] o;
47
output under;
48
output over;
49
output inf;
50
output zero;
51
 
52
// constants
53
wire [EMSB:0] infXp = {EMSB+1{1'b1}};    // infinite / NaN - all ones
54
// The following is the value for an exponent of zero, with the offset
55
// eg. 8'h7f for eight bit exponent, 11'h7ff for eleven bit exponent, etc.
56
wire [EMSB:0] bias = {1'b0,{EMSB{1'b1}}};        //2^0 exponent
57
// The following is a template for a quiet nan. (MSB=1)
58
wire [FMSB:0] qNaN  = {1'b1,{FMSB{1'b0}}};
59
 
60
// -----------------------------------------------------------
61
// Clock #1
62
// - decode the input operands
63
// - derive basic information
64
// -----------------------------------------------------------
65
 
66
wire sa1, sb1, sc1;                     // sign bit
67
wire [EMSB:0] xa1, xb1, xc1;     // exponent bits
68
wire [FMSB+1:0] fracta1, fractb1, fractc1;       // includes unhidden bit
69
wire a_dn1, b_dn1, c_dn1;                       // a/b is denormalized
70
wire aNan1, bNan1, cNan1;
71
wire az1, bz1, cz1;
72
wire aInf1, bInf1, cInf1;
73
reg op1;
74
 
75
fpDecompReg #(FPWID) u1a (.clk(clk), .ce(ce), .i(a), .sgn(sa1), .exp(xa1), .fract(fracta1), .xz(a_dn1), .vz(az1), .inf(aInf1), .nan(aNan1) );
76
fpDecompReg #(FPWID) u1b (.clk(clk), .ce(ce), .i(b), .sgn(sb1), .exp(xb1), .fract(fractb1), .xz(b_dn1), .vz(bz1), .inf(bInf1), .nan(bNan1) );
77
fpDecompReg #(FPWID) u1c (.clk(clk), .ce(ce), .i(c), .sgn(sc1), .exp(xc1), .fract(fractc1), .xz(c_dn1), .vz(cz1), .inf(cInf1), .nan(cNan1) );
78
 
79
always @(posedge clk)
80
        if (ce) op1 <= op;
81
 
82
// -----------------------------------------------------------
83
// Clock #2
84
// Compute the sum of the exponents.
85
// correct the exponent for denormalized operands
86
// adjust the sum by the exponent offset (subtract 127)
87
// mul: ex1 = xa + xb,  result should always be < 1ffh
88
// Form partial products (clocks 2 to 5)
89
// -----------------------------------------------------------
90
 
91
reg abz2;
92
reg [EMSB+2:0] ex2;
93
reg [EMSB:0] xc2;
94
reg realOp2;
95
reg xcInf2;
96
 
97
always @(posedge clk)
98
        if (ce) abz2 <= az1|bz1;
99
always @(posedge clk)
100
        if (ce) ex2 <= (xa1|a_dn1) + (xb1|b_dn1) - bias;
101
always @(posedge clk)
102
        if (ce) xc2 <= (xc1|c_dn1);
103
always @(posedge clk)
104
        if (ce) xcInf2 = &xc1;
105
 
106
// Figure out which operation is really needed an add or
107
// subtract ?
108
// If the signs are the same, use the orignal op,
109
// otherwise flip the operation
110
//  a +  b = add,+
111
//  a + -b = sub, so of larger
112
// -a +  b = sub, so of larger
113
// -a + -b = add,-
114
//  a -  b = sub, so of larger
115
//  a - -b = add,+
116
// -a -  b = add,-
117
// -a - -b = sub, so of larger
118
always @(posedge clk)
119
        if (ce) realOp2 <= op1 ^ (sa1 ^ sb1) ^ sc1;
120
 
121
 
122 32 robfinch
wire [FX:0] fract17;
123
generate begin : gMults
124
// 16 clocks for multiply
125
if (FPWID==128) begin
126
        mult114x114 umul1 (clk, ce, {1'b0,fracta1}, {1'b0,fractb1}, fract17[FX-1:0]);
127
        assign fract17[FX] = 1'b0;
128 29 robfinch
end
129 32 robfinch
else if (FPWID==80) begin
130
        mult64x64 umul2 (.CLK(clk), .CE(ce), .A(fracta1), .B(fractb1), .P(fract17[FX-1:0]));
131
        assign fract17[FX] = 1'b0;
132 29 robfinch
end
133 32 robfinch
else if (FPWID==64) begin
134
        mult53x53 umul3 (.CLK(clk), .CE(ce), .A(fracta1), .B(fractb1), .P(fract17[FX-1:0]));
135
        assign fract17[FX] = 1'b0;
136 29 robfinch
end
137 32 robfinch
else if (FPWID==32) begin
138
        mult24x24 umul4 (.CLK(clk), .CE(ce), .A(fracta1), .B(fractb1), .P(fract17[FX-1:0]));
139
        assign fract17[FX] = 1'b0;
140 29 robfinch
end
141
else begin
142 32 robfinch
        reg [FX:0] fract17a;
143 29 robfinch
        always @(posedge clk)
144 32 robfinch
                if (ce) fract17a <= fracta1 * fractb1;
145
        assign fract17 = fract17a;
146 29 robfinch
end
147 32 robfinch
end
148 29 robfinch
endgenerate
149
 
150
// -----------------------------------------------------------
151
// Clock #3
152
// Select zero exponent
153
// -----------------------------------------------------------
154
 
155
reg [EMSB+2:0] ex3;
156
reg [EMSB:0] xc3;
157
always @(posedge clk)
158
        if (ce) ex3 <= abz2 ? 1'd0 : ex2;
159
always @(posedge clk)
160
        if (ce) xc3 <= xc2;
161
 
162
// -----------------------------------------------------------
163
// Clock #4
164
// Generate partial products.
165
// -----------------------------------------------------------
166
 
167
reg [EMSB+2:0] ex4;
168
reg [EMSB:0] xc4;
169
 
170
always @(posedge clk)
171
        if (ce) ex4 <= ex3;
172
always @(posedge clk)
173
        if (ce) xc4 <= xc3;
174
 
175
// -----------------------------------------------------------
176
// Clock #5
177
// Sum partial products (above)
178
// compute multiplier overflow and underflow
179
// -----------------------------------------------------------
180
 
181
// Status
182 32 robfinch
wire under5;
183
wire over5;
184
wire [EMSB+2:0] ex5;
185
wire [EMSB:0] xc5;
186 29 robfinch
wire aInf5, bInf5;
187
wire aNan5, bNan5;
188
wire qNaNOut5;
189
 
190 32 robfinch
vtdl u5a (.clk(clk), .ce(ce), .a(MUL_LATENCY-5), .d(ex4[EMSB+2]), .q(under5));
191
vtdl u5b (.clk(clk), .ce(ce), .a(MUL_LATENCY-5), .d((&ex4[EMSB:0] | ex4[EMSB+1]) & !ex4[EMSB+2]), .q(over5));
192
vtdl #(EMSB+3) u5c (.clk(clk), .ce(ce), .a(MUL_LATENCY-5), .d(ex4), .q(ex5));
193
vtdl #(EMSB+1) u5d (.clk(clk), .ce(ce), .a(MUL_LATENCY-5), .d(xc4), .q(xc5));
194 29 robfinch
 
195 32 robfinch
vtdl u2a (.clk(clk), .ce(ce), .a(MUL_LATENCY-2), .d(aInf1), .q(aInf5) );
196
vtdl u2b (.clk(clk), .ce(ce), .a(MUL_LATENCY-2), .d(bInf1), .q(bInf5) );
197 29 robfinch
 
198
// determine when a NaN is output
199
wire [MSB:0] a5,b5;
200 32 robfinch
vtdl u5 (.clk(clk), .ce(ce), .a(MUL_LATENCY-2), .d((aInf1&bz1)|(bInf1&az1)), .q(qNaNOut5) );
201
vtdl u14 (.clk(clk), .ce(ce), .a(MUL_LATENCY-2), .d(aNan1), .q(aNan5) );
202
vtdl u15 (.clk(clk), .ce(ce), .a(MUL_LATENCY-2), .d(bNan1), .q(bNan5) );
203
vtdl #(MSB+1) u16 (.clk(clk), .ce(ce), .a(MUL_LATENCY-1), .d(a), .q(a5) );
204
vtdl #(MSB+1) u17 (.clk(clk), .ce(ce), .a(MUL_LATENCY-1), .d(b), .q(b5) );
205 29 robfinch
 
206
// -----------------------------------------------------------
207
// Clock #6
208
// - figure multiplier mantissa output
209
// - figure multiplier exponent output
210
// - correct xponent and mantissa for exceptional conditions
211
// -----------------------------------------------------------
212
 
213
reg [FX:0] mo6;
214
reg [EMSB+2:0] ex6;
215
reg [EMSB:0] xc6;
216
wire [FMSB+1:0] fractc6;
217 32 robfinch
wire under6;
218
vtdl #(FMSB+2) u61 (.clk(clk), .ce(ce), .a(MUL_LATENCY-1), .d(fractc1), .q(fractc6) );
219 29 robfinch
delay1 u62 (.clk(clk), .ce(ce), .i(under5), .o(under6));
220
 
221
always @(posedge clk)
222
        if (ce) xc6 <= xc5;
223
 
224
always @(posedge clk)
225
        if (ce)
226
                casez({aNan5,bNan5,qNaNOut5,aInf5,bInf5,over5})
227
                6'b1?????:  mo6 <= {1'b1,1'b1,a5[FMSB-1:0],{FMSB+1{1'b0}}};
228
    6'b01????:  mo6 <= {1'b1,1'b1,b5[FMSB-1:0],{FMSB+1{1'b0}}};
229
                6'b001???:      mo6 <= {1'b1,qNaN|3'd4,{FMSB+1{1'b0}}}; // multiply inf * zero
230
                6'b0001??:      mo6 <= 0;        // mul inf's
231
                6'b00001?:      mo6 <= 0;        // mul inf's
232
                6'b000001:      mo6 <= 0;        // mul overflow
233 32 robfinch
                default:        mo6 <= fract17;
234 29 robfinch
                endcase
235
 
236
always @(posedge clk)
237
        if (ce)
238
                casez({qNaNOut5|aNan5|bNan5,aInf5,bInf5,over5,under5})
239
                5'b1????:       ex6 <= infXp;   // qNaN - infinity * zero
240
                5'b01???:       ex6 <= infXp;   // 'a' infinite
241
                5'b001??:       ex6 <= infXp;   // 'b' infinite
242
                5'b0001?:       ex6 <= infXp;   // result overflow
243
                5'b00001:       ex6 <= ex5;             //0;            // underflow
244
                default:        ex6 <= ex5;             // situation normal
245
                endcase
246
 
247
// -----------------------------------------------------------
248
// Clock #7
249
// - prep for addition, determine greater operand
250
// -----------------------------------------------------------
251
reg ex_gt_xc7;
252
reg xeq7;
253
reg ma_gt_mc7;
254
reg meq7;
255
wire az7, bz7, cz7;
256
wire realOp7;
257
 
258
// which has greater magnitude ? Used for sign calc
259
always @(posedge clk)
260
        if (ce) ex_gt_xc7 <= $signed(ex6) > $signed({2'b0,xc6});
261
always @(posedge clk)
262
        if (ce) xeq7 <= (ex6=={2'b0,xc6});
263
always @(posedge clk)
264
        if (ce) ma_gt_mc7 <= mo6 > {fractc6,{FMSB+1{1'b0}}};
265
always @(posedge clk)
266
        if (ce) meq7 <= mo6 == {fractc6,{FMSB+1{1'b0}}};
267 32 robfinch
vtdl #(1,32) u71 (.clk(clk), .ce(ce), .a(MUL_LATENCY), .d(az1), .q(az7));
268
vtdl #(1,32) u72 (.clk(clk), .ce(ce), .a(MUL_LATENCY), .d(bz1), .q(bz7));
269
vtdl #(1,32) u73 (.clk(clk), .ce(ce), .a(MUL_LATENCY), .d(cz1), .q(cz7));
270
vtdl #(1,32) u74 (.clk(clk), .ce(ce), .a(MUL_LATENCY-1), .d(realOp2), .q(realOp7));
271 29 robfinch
 
272
// -----------------------------------------------------------
273
// Clock #8
274
// - prep for addition, determine greater operand
275
// - determine if result will be zero
276
// -----------------------------------------------------------
277
 
278
reg a_gt_b8;
279
reg resZero8;
280
reg ex_gt_xc8;
281
wire [EMSB+2:0] ex8;
282
wire [EMSB:0] xc8;
283
wire xcInf8;
284
wire [2:0] rm8;
285
wire op8;
286
wire sa8, sc8;
287
 
288
delay2 #(EMSB+3) u81 (.clk(clk), .ce(ce), .i(ex6), .o(ex8));
289
delay2 #(EMSB+1) u82 (.clk(clk), .ce(ce), .i(xc6), .o(xc8));
290 32 robfinch
vtdl #(1,32) u83 (.clk(clk), .ce(ce), .a(MUL_LATENCY-1), .d(xcInf2), .q(xcInf8));
291
vtdl #(3,32) u84 (.clk(clk), .ce(ce), .a(MUL_LATENCY+1), .d(rm), .q(rm8));
292
vtdl #(1,32) u85 (.clk(clk), .ce(ce), .a(MUL_LATENCY), .d(op1), .q(op8));
293
vtdl #(1,32) u86 (.clk(clk), .ce(ce), .a(MUL_LATENCY), .d(sa1 ^ sb1), .q(sa8));
294
vtdl #(1,32) u87 (.clk(clk), .ce(ce), .a(MUL_LATENCY), .d(sc1), .q(sc8));
295 29 robfinch
 
296
always @(posedge clk)
297
        if (ce) ex_gt_xc8 <= ex_gt_xc7;
298
always @(posedge clk)
299
        if (ce)
300
                a_gt_b8 <= ex_gt_xc7 || (xeq7 && ma_gt_mc7);
301
 
302
// Find out if the result will be zero.
303
always @(posedge clk)
304
        if (ce)
305
                resZero8 <= (realOp7 & xeq7 & meq7) ||  // subtract, same magnitude
306
                           ((az7 | bz7) & cz7);         // a or b zero and c zero
307
 
308
// -----------------------------------------------------------
309
// CLock #9
310
// Compute output exponent and sign
311
//
312
// The output exponent is the larger of the two exponents,
313
// unless a subtract operation is in progress and the two
314
// numbers are equal, in which case the exponent should be
315
// zero.
316
// -----------------------------------------------------------
317
 
318
reg so9;
319
reg [EMSB+2:0] ex9;
320
reg [EMSB+2:0] ex9a;
321
reg ex_gt_xc9;
322
reg [EMSB:0] xc9;
323
reg a_gt_c9;
324
wire [FX:0] mo9;
325
wire [FMSB+1:0] fractc9;
326
wire under9;
327
wire xeq9;
328
 
329
always @(posedge clk)
330
        if (ce) ex_gt_xc9 <= ex_gt_xc8;
331
always @(posedge clk)
332
        if (ce) a_gt_c9 <= a_gt_b8;
333
always @(posedge clk)
334
        if (ce) xc9 <= xc8;
335
always @(posedge clk)
336
        if (ce) ex9a <= ex8;
337
 
338
delay3 #(FX+1) u93 (.clk(clk), .ce(ce), .i(mo6), .o(mo9));
339
delay3 #(FMSB+2) u94 (.clk(clk), .ce(ce), .i(fractc6), .o(fractc9));
340
delay3 u95 (.clk(clk), .ce(ce), .i(under6), .o(under9));
341
delay2 u96 (.clk(clk), .ce(ce), .i(xeq7), .o(xeq9));
342
 
343
always @(posedge clk)
344
        if (ce) ex9 <= resZero8 ? 1'd0 : ex_gt_xc8 ? ex8 : {2'b0,xc8};
345
 
346
// Compute output sign
347
always @(posedge clk)
348
        if (ce)
349
        case ({resZero8,sa8,op8,sc8})   // synopsys full_case parallel_case
350
        4'b0000: so9 <= 0;                       // + + + = +
351
        4'b0001: so9 <= !a_gt_b8;       // + + - = sign of larger
352
        4'b0010: so9 <= !a_gt_b8;       // + - + = sign of larger
353
        4'b0011: so9 <= 0;                       // + - - = +
354
        4'b0100: so9 <= a_gt_b8;                // - + + = sign of larger
355
        4'b0101: so9 <= 1;                      // - + - = -
356
        4'b0110: so9 <= 1;                      // - - + = -
357
        4'b0111: so9 <= a_gt_b8;                // - - - = sign of larger
358
        4'b1000: so9 <= 0;                       //  A +  B, sign = +
359
        4'b1001: so9 <= rm8==3;         //  A + -B, sign = + unless rounding down
360
        4'b1010: so9 <= rm8==3;         //  A -  B, sign = + unless rounding down
361
        4'b1011: so9 <= 0;                       // +A - -B, sign = +
362
        4'b1100: so9 <= rm8==3;         // -A +  B, sign = + unless rounding down
363
        4'b1101: so9 <= 1;                      // -A + -B, sign = -
364
        4'b1110: so9 <= 1;                      // -A - +B, sign = -
365
        4'b1111: so9 <= rm8==3;         // -A - -B, sign = + unless rounding down
366
        endcase
367
 
368
// -----------------------------------------------------------
369
// Clock #10
370
// Compute the difference in exponents, provides shift amount
371
// Note that ex9a will be negative for an underflow condition
372
// so it's added rather than subtracted from xc9 as -(-num)
373
// is the same as an add. The underflow is tracked rather than
374
// using extra bits in the exponent.
375
// -----------------------------------------------------------
376
reg [EMSB+2:0] xdiff10;
377
reg [FX:0] mfs;
378
reg ops10;
379
 
380
// If the multiplier exponent was negative (underflowed) then
381
// the mantissa needs to be shifted right even more (until
382
// the exponent is zero. The total shift would be xc9-0-
383
// amount underflows which is xc9 + -ex9a.
384
 
385
always @(posedge clk)
386
        if (ce) xdiff10 <= ex_gt_xc9 ? ex9a - xc9
387
                                                                                : ex9a[EMSB+2] ? xc9 + (~ex9a+2'd1)
388
                                                                                : xc9 - ex9a;
389
 
390
// Determine which fraction to denormalize (the one with the
391
// smaller exponent is denormalized). If the exponents are equal
392
// denormalize the smaller fraction.
393
always @(posedge clk)
394
        if (ce) mfs <=
395
                xeq9 ? (a_gt_c9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9)
396
                 : ex_gt_xc9 ? {4'b0,fractc9,{FMSB+1{1'b0}}} : mo9;
397
 
398
always @(posedge clk)
399
        if (ce) ops10 <= xeq9 ? (a_gt_c9 ? 1'b1 : 1'b0)
400
                                                                                                : (ex_gt_xc9 ? 1'b1 : 1'b0);
401
 
402
// -----------------------------------------------------------
403
// Clock #11
404
// Limit the size of the shifter to only bits needed.
405
// -----------------------------------------------------------
406
reg [7:0] xdif11;
407
 
408
always @(posedge clk)
409
        if (ce) xdif11 <= xdiff10 > FX+3 ? FX+3 : xdiff10;
410
 
411
// -----------------------------------------------------------
412
// Clock #12
413
// Determine the sticky bit
414
// -----------------------------------------------------------
415
 
416
wire sticky, sticky12;
417
wire [FX:0] mfs12;
418
wire [7:0] xdif12;
419
 
420
generate
421
begin
422 32 robfinch
if (FPWID==128)
423 29 robfinch
    redor128 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
424 32 robfinch
else if (FPWID==80)
425 29 robfinch
    redor80 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
426 32 robfinch
else if (FPWID==64)
427 29 robfinch
    redor64 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
428 32 robfinch
else if (FPWID==32)
429 29 robfinch
    redor32 u121 (.a(xdif11), .b({mfs,2'b0}), .o(sticky) );
430
end
431
endgenerate
432
 
433
// register inputs to shifter and shift
434
delay1 #(1)    u122(.clk(clk), .ce(ce), .i(sticky), .o(sticky12) );
435
delay1 #(8)    u123(.clk(clk), .ce(ce), .i(xdif11),   .o(xdif12) );
436
delay2 #(FX+1) u124(.clk(clk), .ce(ce), .i(mfs), .o(mfs12) );
437
 
438
// -----------------------------------------------------------
439
// Clock #13
440
// - denormalize operand (shift right)
441
// -----------------------------------------------------------
442
reg [FX+2:0] mfs13;
443
wire [FX:0] mo13;
444
wire ex_gt_xc13;
445
wire [FMSB+1:0] fractc13;
446
wire ops13;
447
 
448
delay4 #(FX+1) u131 (.clk(clk), .ce(ce), .i(mo9), .o(mo13));
449
delay4 u132 (.clk(clk), .ce(ce), .i(ex_gt_xc9), .o(ex_gt_xc13));
450
vtdl #(FMSB+2) u133 (.clk(clk), .ce(ce), .a(4'd3), .d(fractc9), .q(fractc13));
451
delay3 u134 (.clk(clk), .ce(ce), .i(ops10), .o(ops13));
452
 
453
always @(posedge clk)
454
        if (ce) mfs13 <= ({mfs12,2'b0} >> xdif12)|sticky12;
455
 
456
// -----------------------------------------------------------
457
// Clock #14
458
// Sort operands
459
// -----------------------------------------------------------
460
reg [FX+2:0] oa, ob;
461
wire a_gt_b14;
462
 
463
vtdl #(1) u141 (.clk(clk), .ce(ce), .a(4'd5), .d(a_gt_b8), .q(a_gt_b14));
464
 
465
always @(posedge clk)
466
        if (ce) oa <= ops13 ? {mo13,2'b00} : mfs13;
467
always @(posedge clk)
468
        if (ce) ob <= ops13 ? mfs13 : {fractc13,{FMSB+1{1'b0}},2'b00};
469
 
470
// -----------------------------------------------------------
471
// Clock #15
472
// - Sort operands
473
// -----------------------------------------------------------
474
reg [FX+2:0] oaa, obb;
475
wire realOp15;
476
wire [EMSB:0] ex15;
477
wire [EMSB:0] ex9c = ex9[EMSB+1] ? infXp : ex9[EMSB:0];
478
wire overflow15;
479
vtdl #(1) u151 (.clk(clk), .ce(ce), .a(4'd7), .d(realOp7), .q(realOp15));
480
vtdl #(EMSB+1) u152 (.clk(clk), .ce(ce), .a(4'd5), .d(ex9c), .q(ex15));
481
vtdl #(EMSB+1) u153 (.clk(clk), .ce(ce), .a(4'd5), .d(ex9[EMSB+1]| &ex9[EMSB:0]), .q(overflow15));
482
 
483
always @(posedge clk)
484
        if (ce) oaa <= a_gt_b14 ? oa : ob;
485
always @(posedge clk)
486
        if (ce) obb <= a_gt_b14 ? ob : oa;
487
 
488
// -----------------------------------------------------------
489
// Clock #16
490
// - perform add/subtract
491
// - addition can generate an extra bit, subtract can't go negative
492
// -----------------------------------------------------------
493
reg [FX+3:0] mab;
494
wire [FX:0] mo16;
495
wire [FMSB+1:0] fractc16;
496
wire Nan16;
497
wire cNan16;
498
wire aInf16, cInf16;
499
wire op16;
500
wire exinf16;
501
 
502
vtdl #(1) u161 (.clk(clk), .ce(ce), .a(4'd10), .d(qNaNOut5|aNan5|bNan5), .q(Nan16));
503
vtdl #(1) u162 (.clk(clk), .ce(ce), .a(4'd14), .d(cNan1), .q(cNan16));
504
vtdl #(1) u163 (.clk(clk), .ce(ce), .a(4'd9), .d(&ex6), .q(aInf16));
505
vtdl #(1) u164 (.clk(clk), .ce(ce), .a(4'd14), .d(cInf1), .q(cInf16));
506
vtdl #(1) u165 (.clk(clk), .ce(ce), .a(4'd14), .d(op1), .q(op16));
507
delay3 #(FX+1) u166 (.clk(clk), .ce(ce), .i(mo13), .o(mo16));
508
vtdl #(FMSB+2) u167 (.clk(clk), .ce(ce), .a(4'd6), .d(fractc9), .q(fractc16));
509
delay1 u169 (.clk(clk), .ce(ce), .i(&ex15), .o(exinf16));
510
 
511
always @(posedge clk)
512
        if (ce) mab <= realOp15 ? oaa - obb : oaa + obb;
513
 
514
// -----------------------------------------------------------
515
// Clock #17
516
// - adjust for Nans
517
// -----------------------------------------------------------
518
wire [EMSB:0] ex17;
519
reg [FX:0] mo17;
520
wire so17;
521
wire exinf17;
522
wire overflow17;
523
 
524
vtdl #(1)        u171 (.clk(clk), .ce(ce), .a(4'd7), .d(so9), .q(so17));
525
delay2 #(EMSB+1) u172 (.clk(clk), .ce(ce), .i(ex15), .o(ex17));
526
delay1 #(1) u173 (.clk(clk), .ce(ce), .i(exinf16), .o(exinf17));
527
delay2 u174 (.clk(clk), .ce(ce), .i(overflow15), .o(overflow17));
528
 
529
always @(posedge clk)
530
        casez({aInf16&cInf16,Nan16,cNan16,exinf16})
531
        4'b1???:        mo17 <= {1'b0,op16,{FMSB-1{1'b0}},op16,{FMSB{1'b0}}};   // inf +/- inf - generate QNaN on subtract, inf on add
532
        4'b01??:        mo17 <= {1'b0,mo16};
533
        4'b001?:        mo17 <= {1'b1,1'b1,fractc16[FMSB-1:0],{FMSB+1{1'b0}}};
534
        4'b0001:        mo17 <= 1'd0;
535
        default:        mo17 <= mab[FX+3:2];            // mab has two extra lead bits and two trailing bits
536
        endcase
537
 
538
assign o = {so17,ex17,mo17};
539
assign zero = {ex17,mo17}==1'd0;
540
assign inf = exinf17;
541
assign under = ex17==1'd0;
542
assign over = overflow17;
543
 
544
endmodule
545
 
546
 
547
// Multiplier with normalization and rounding.
548
 
549
module fpFMAnr(clk, ce, op, rm, a, b, c, o, inf, zero, overflow, underflow, inexact);
550 32 robfinch
parameter FPWID=128;
551 29 robfinch
`include "fpSize.sv"
552
 
553
input clk;
554
input ce;
555
input op;
556
input [2:0] rm;
557
input  [MSB:0] a, b, c;
558
output [MSB:0] o;
559
output zero;
560
output inf;
561
output overflow;
562
output underflow;
563
output inexact;
564
 
565
wire [EX:0] fma_o;
566
wire fma_underflow;
567
wire fma_overflow;
568
wire norm_underflow;
569
wire norm_inexact;
570
wire sign_exe1, inf1, overflow1, underflow1;
571
wire [MSB+3:0] fpn0;
572
 
573
fpFMA #(FPWID) u1
574
(
575
        .clk(clk),
576
        .ce(ce),
577
        .op(op),
578
        .rm(rm),
579
        .a(a),
580
        .b(b),
581
        .c(c),
582
        .o(fma_o),
583
        .under(fma_underflow),
584
        .over(fma_overflow),
585
        .zero(),
586
        .inf()
587
);
588
fpNormalize #(FPWID) u2
589
(
590
        .clk(clk),
591
        .ce(ce),
592
        .i(fma_o),
593
        .o(fpn0),
594
        .under_i(fma_underflow),
595
        .under_o(norm_underflow),
596
        .inexact_o(norm_inexact)
597
);
598
fpRound         #(FPWID) u3(.clk(clk), .ce(ce), .rm(rm), .i(fpn0), .o(o) );
599
fpDecomp        #(FPWID) u4(.i(o), .xz(), .vz(zero), .inf(inf));
600
vtdl                                            u5 (.clk(clk), .ce(ce), .a(4'd11), .d(fma_underflow), .q(underflow));
601
vtdl                                            u6 (.clk(clk), .ce(ce), .a(4'd11), .d(fma_overflow), .q(overflow));
602
delay3          #(1)    u7 (.clk(clk), .ce(ce), .i(norm_inexact), .o(inexact));
603
assign overflow = inf;
604
 
605
endmodule
606
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.