OpenCores
URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpUnit.v] - Blame information for rev 6

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 6 robfinch
// ============================================================================
2
//        __
3
//   \\__/ o\    (C) 2006,2015  Robert Finch, Stratford
4
//    \  __ /    All rights reserved.
5
//     \/_//     robfinch<remove>@finitron.ca
6
//       ||
7
//
8
// This source file is free software: you can redistribute it and/or modify 
9
// it under the terms of the GNU Lesser General Public License as published 
10
// by the Free Software Foundation, either version 3 of the License, or     
11
// (at your option) any later version.                                      
12
//                                                                          
13
// This source file is distributed in the hope that it will be useful,      
14
// but WITHOUT ANY WARRANTY; without even the implied warranty of           
15
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            
16
// GNU General Public License for more details.                             
17
//                                                                          
18
// You should have received a copy of the GNU General Public License        
19
// along with this program.  If not, see <http://www.gnu.org/licenses/>.    
20
//
21
//
22
// Thor SuperScalar
23
//      fpUnit.v
24
//  - floating point unit
25
//  - parameterized width
26
//  - IEEE 754 representation
27
//
28
//      NaN Value               Origin
29
// 31'h7FC00001    - infinity - infinity
30
// 31'h7FC00002    - infinity / infinity
31
// 31'h7FC00003    - zero / zero
32
// 31'h7FC00004    - infinity X zero
33
//
34
// Whenever the fpu encounters a NaN input, the NaN is
35
// passed through to the output.
36
//
37
// Ref: Webpack 8.2  Spartan3-4  xc3s1000-4ft256
38
// 2335 LUTS / 1260 slices / 43.4 MHz
39
// Ref: Webpack 13.1 Spartan3e   xc3s1200e-4fg320
40
// 2433 LUTs / 1301 slices / 51.6 MHz
41
//
42
// Instr.  Cyc Lat
43
// fc__    ; 1  0    compare, lt le gt ge eq ne or un
44
// fabs    ; 1  0     absolute value
45
// fnabs    ; 1  0     negative absolute value
46
// fneg    ; 1  0     negate
47
// fmov    ; 1  0     move
48
// fman    ; 1  0     get mantissa
49
// fsign    ; 1  0     get sign
50
//
51
// f2i        ; 1  1  convert float to integer
52
// i2f        ; 1  1  convert integer to float
53
//
54
// fadd    ; 1  4    addition
55
// fsub    ; 1  4  subtraction
56
// fmul    ; 1  4  multiplication
57
//
58
// fdiv    ; 16 4    division
59
//
60
// ftx        ; 1  0  trigger fp exception
61
// fcx        ; 1  0  clear fp exception
62
// fex        ; 1  0  enable fp exception
63
// fdx        ; 1  0  disable fp exception
64
// frm        ; 1  0  set rounding mode
65
// fstat    ; 1  0  get status register
66
//
67
// related integer:
68
// graf    ; 1  0  get random float (0,1]
69
//
70
// ============================================================================
71
//
72
`include "..\Thor_defines.v"
73
 
74
`define QINFOS          23'h7FC000              // info
75
`define QSUBINFS        31'h7FC00001    // - infinity - infinity
76
`define QINFDIVS        31'h7FC00002    // - infinity / infinity
77
`define QZEROZEROS      31'h7FC00003    // - zero / zero
78
`define QINFZEROS       31'h7FC00004    // - infinity X zero
79
 
80
`define QINFO           52'h7FC000              // info
81
`define QSUBINF         62'h7FF0000000000001    // - infinity - infinity
82
`define QINFDIV         62'h7FF0000000000002    // - infinity / infinity
83
`define QZEROZERO   62'h7FF0000000000003        // - zero / zero
84
`define QINFZERO        62'h7FF0000000000004    // - infinity X zero
85
 
86
module fpUnit(rst, clk, ce, op, fn, ld, a, b, o, exception);
87
 
88
parameter WID = 32;
89
localparam MSB = WID-1;
90
localparam EMSB = WID==80 ? 14 :
91
                  WID==64 ? 10 :
92
                                  WID==52 ? 10 :
93
                                  WID==48 ? 10 :
94
                                  WID==44 ? 10 :
95
                                  WID==42 ? 10 :
96
                                  WID==40 ?  9 :
97
                                  WID==32 ?  7 :
98
                                  WID==24 ?  6 : 4;
99
localparam FMSB = WID==80 ? 63 :
100
                  WID==64 ? 51 :
101
                                  WID==52 ? 39 :
102
                                  WID==48 ? 35 :
103
                                  WID==44 ? 31 :
104
                                  WID==42 ? 29 :
105
                                  WID==40 ? 28 :
106
                                  WID==32 ? 22 :
107
                                  WID==24 ? 15 : 9;
108
localparam EMSBS = 7;
109
localparam FMSBS = 22;
110
localparam FX = (FMSB+2)*2-1;   // the MSB of the expanded fraction
111
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
112
localparam FXS = (FMSBS+2)*2-1; // the MSB of the expanded fraction
113
localparam EXS = FXS + 1 + EMSBS + 1 + 1 - 1;
114
 
115
input rst;
116
input clk;
117
input ce;
118
input [7:0] op;
119
input [5:0] fn;
120
input ld;
121
input [MSB:0] a;
122
input [MSB:0] b;
123
output tri [MSB:0] o;
124
output exception;
125
 
126
 
127
//------------------------------------------------------------
128
// constants
129
wire infXp = {11{1'b1}};        // value for infinite exponent / nan
130
wire infXps = {8{1'b1}};
131
 
132
// Variables
133
wire divByZero;                 // attempt to divide by zero
134
wire inf;                               // result is infinite (+ or -)
135
wire zero;                              // result is zero (+ or -)
136
wire ns;                // nan sign
137
wire nss;
138
wire nso;
139
wire nsos;
140
wire isNan,isNans;
141
wire nanx,nanxs;
142
 
143
// Decode fp operation
144
wire fstat      = op==`FLOAT && fn==`FSTAT;     // get status
145
wire fdiv       = op==`FLOAT && fn==`FDIV;
146
wire fdivs      = op==`FLOAT && fn==`FDIVS;
147
wire ftx        = op==`FLOAT && fn==`FTX;               // trigger exception
148
wire fcx        = op==`FLOAT && fn==`FCX;               // clear exception
149
wire fex        = op==`FLOAT && fn==`FEX;               // enable exception
150
wire fdx        = op==`FLOAT && fn==`FDX;               // disable exception
151
wire fcmp       = op==`FLOAT && (fn==`FCMP || fn==`FCMPS);
152
wire frm        = op==`FLOAT && fn==`FRM;               // set rounding mode
153
wire single = (op==`FLOAT && fn[5:4]==2'b01) || op==`SINGLE_R;
154
wire zl_op =  (op==`DOUBLE_R && (fn==`FABS || fn==`FNABS || fn==`FMOV || fn==`FNEG || fn==`FSIGN || fn==`FMAN)) ||
155
              (op==`FLOAT && fn==`FCMP) ||
156
              (op==`SINGLE_R && (fn==`FABSS || fn==`FNABSS || fn==`FMOVS || fn==`FNEGS || fn==`FSIGNS || fn==`FMANS)) ||
157
              (op==`FLOAT && (fn==`FCMPS))
158
             ;
159
wire loo_op = (op==`DOUBLE_R && (fn==`ITOF || fn==`FTOI)) ||
160
              (op==`SINGLE_R && (fn==`FTOIS || op==`ITOFS));
161
wire loo_done;
162
 
163
wire subinf;
164
wire zerozero;
165
wire infzero;
166
wire infdiv;
167
 
168
// floating point control and status
169
reg [1:0] rm;    // rounding mode
170
reg inexe;              // inexact exception enable
171
reg dbzxe;              // divide by zero exception enable
172
reg underxe;    // underflow exception enable
173
reg overxe;             // overflow exception enable
174
reg invopxe;    // invalid operation exception enable
175
 
176
reg nsfp;               // non-standard floating point indicator
177
 
178
reg fractie;    // fraction inexact
179
reg raz;                // rounded away from zero
180
 
181
reg inex;               // inexact exception
182
reg dbzx;               // divide by zero exception
183
reg underx;             // underflow exception
184
reg overx;              // overflow exception
185
reg giopx;              // global invalid operation exception
186
reg sx;                 // summary exception
187
 
188
reg swtx;               // software triggered exception indicator
189
 
190
wire gx = swtx|inex|dbzx|underx|overx|giopx;    // global exception indicator
191
 
192
// breakdown of invalid operation exceptions
193
reg cvtx;               // conversion exception
194
reg sqrtx;              // squareroot exception
195
reg NaNCmpx;    // NaN comparison exception
196
reg infzerox;   // multiply infinity by zero
197
reg zerozerox;  // division of zero by zero
198
reg infdivx;    // division of infinities
199
reg subinfx;    // subtraction of infinities
200
reg snanx;              // signalling nan
201
 
202
wire divDone;
203
wire pipe_ce = ce & divDone;    // divide must be done in order for pipe to clock
204
 
205
always @(posedge clk)
206
        // reset: disable and clear all exceptions and status
207
        if (rst) begin
208
                rm <= 2'b0;                     // round nearest even - default rounding mode
209
                inex <= 1'b0;
210
                dbzx <= 1'b0;
211
                underx <= 1'b0;
212
                overx <= 1'b0;
213
                giopx <= 1'b0;
214
                swtx <= 1'b0;
215
                sx <= 1'b0;
216
                NaNCmpx <= 1'b0;
217
 
218
                inexe <= 1'b0;
219
                dbzxe <= 1'b0;
220
                underxe <= 1'b0;
221
                overxe <= 1'b0;
222
                invopxe <= 1'b0;
223
 
224
                nsfp <= 1'b0;
225
 
226
        end
227
        else if (pipe_ce) begin
228
                if (ftx) begin
229
                        inex <= inex     | (a[4]|b[4]);
230
                        dbzx <= dbzx     | (a[3]|b[3]);
231
                        underx <= underx | (a[2]|b[2]);
232
                        overx <= overx   | (a[1]|b[1]);
233
                        giopx <= giopx   | (a[0]|b[0]);
234
                        swtx <= 1'b1;
235
                        sx <= 1'b1;
236
                end
237
                else if (fcx) begin
238
                        sx <= sx & !(a[5]|b[5]);
239
                        inex <= inex     & !(a[4]|b[4]);
240
                        dbzx <= dbzx     & !(a[3]|b[3]);
241
                        underx <= underx & !(a[2]|b[2]);
242
                        overx <= overx   & !(a[1]|b[1]);
243
                        giopx <= giopx   & !(a[0]|b[0]);
244
                        // clear exception type when global invalid operation is cleared
245
                        infdivx <= infdivx & !(a[0]|b[0]);
246
                        zerozerox <= zerozerox & !(a[0]|b[0]);
247
                        subinfx   <= subinfx   & !(a[0]|b[0]);
248
                        infzerox  <= infzerox  & !(a[0]|b[0]);
249
                        NaNCmpx   <= NaNCmpx   & !(a[0]|b[0]);
250
                        dbzx <= dbzx & !(a[0]|b[0]);
251
                        swtx <= 1'b1;
252
                end
253
                else if (fex) begin
254
                        inexe <= inexe     | (a[4]|b[4]);
255
                        dbzxe <= dbzxe     | (a[3]|b[3]);
256
                        underxe <= underxe | (a[2]|b[2]);
257
                        overxe <= overxe   | (a[1]|b[1]);
258
                        invopxe <= invopxe | (a[0]|b[0]);
259
                end
260
                else if (fdx) begin
261
                        inexe <= inexe     & !(a[4]|b[4]);
262
                        dbzxe <= dbzxe     & !(a[3]|b[3]);
263
                        underxe <= underxe & !(a[2]|b[2]);
264
                        overxe <= overxe   & !(a[1]|b[1]);
265
                        invopxe <= invopxe & !(a[0]|b[0]);
266
                end
267
                else if (frm)
268
                        rm <= a[1:0]|b[1:0];
269
 
270
                infzerox  <= infzerox  | (invopxe & infzero);
271
                zerozerox <= zerozerox | (invopxe & zerozero);
272
                subinfx   <= subinfx   | (invopxe & subinf);
273
                infdivx   <= infdivx   | (invopxe & infdiv);
274
                dbzx <= dbzx | (dbzxe & divByZero);
275
                NaNCmpx <= NaNCmpx | (invopxe & nanx & fcmp);   // must be a compare
276
                sx <= sx |
277
                                (invopxe & nanx & fcmp) |
278
                                (invopxe & (infzero|zerozero|subinf|infdiv)) |
279
                                (dbzxe & divByZero);
280
        end
281
 
282
// Decompose operands into sign,exponent,mantissa
283
wire sa, sb, sas, sbs;
284
wire [FMSB:0] ma, mb;
285
wire [22:0] mas, mbs;
286
 
287
wire aInf, bInf, aInfs, bInfs;
288
wire aNan, bNan, aNans, bNans;
289
wire az, bz, azs, bzs;
290
wire [1:0] rmd4; // 1st stage delayed
291
wire [7:0] op1, op2;
292
wire [5:0] fn1,fn2;
293
 
294
wire [MSB:0] zld_o,lood_o;
295
wire [31:0] zls_o,loos_o;
296
fpZLUnit  #(64) u6 (.op(op), .fn(fn), .a(a), .b(b), .o(zld_o), .nanx(nanx) );
297
fpLOOUnit #(64) u7 (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a), .o(loo_o), .done(loos_done) );
298
fpZLUnit  #(32) u6s (.op(op), .fn(fn), .a(a[31:0]), .b(b[31:0]), .o(zls_o), .nanx(nanxs) );
299
fpLOOUnit #(32) u7s (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a[31:0]), .o(loos_o), .done() );
300
assign loo_o = single ? loos_o : lood_o;
301
assign zl_o = single ? zls_o : zld_o;
302
fp_decomp #(64) u1 (.i(a), .sgn(sa), .man(ma), .vz(az), .inf(aInf), .nan(aNan) );
303
fp_decomp #(64) u2 (.i(b), .sgn(sb), .man(mb), .vz(bz), .inf(bInf), .nan(bNan) );
304
fp_decomp #(32) u1s (.i(a[31:0]), .sgn(sas), .man(mas), .vz(azs), .inf(aInfs), .nan(aNans) );
305
fp_decomp #(32) u2s (.i(b[31:0]), .sgn(sbs), .man(mbs), .vz(bzs), .inf(bInfs), .nan(bNans) );
306
 
307
delay4 #(2) u3 (.clk(clk), .ce(pipe_ce), .i(rmd), .o(rmd4) );
308
delay1 #(8) u4 (.clk(clk), .ce(pipe_ce), .i(op), .o(op1) );
309
delay2 #(8) u5 (.clk(clk), .ce(pipe_ce), .i(op), .o(op2) );
310
delay1 #(6) u5a (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn1) );
311
delay2 #(6) u5b (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn2) );
312
 
313
delay5 delay5_3(.clk(clk), .ce(pipe_ce), .i((bz & !aNan & fdiv)|(bzs & !aNans & fdivs)), .o(divByZero) );
314
 
315
// Compute NaN output sign
316
wire aob_nan = aNan|bNan;       // one of the operands is a nan
317
wire bothNan = aNan&bNan;       // both of the operands are nans
318
wire aob_nans = aNans|bNans;    // one of the operands is a nan
319
wire bothNans = aNans&bNans;    // both of the operands are nans
320
 
321
assign ns = bothNan ?
322
                                (ma==mb ? sa & sb : ma < mb ? sb : sa) :
323
                                aNan ? sa : sb;
324
assign nss = bothNans ?
325
                                 (mas==mbs ? sas & sbs : mas < mbs ? sbs : sas) :
326
                                  aNans ? sas : sbs;
327
 
328
delay5 u8(.clk(clk), .ce(ce), .i(ns), .o(nso) );
329
delay5 u9(.clk(clk), .ce(ce), .i(aob_nan), .o(isNan) );
330
delay5 u8s(.clk(clk), .ce(ce), .i(nss), .o(nsos) );
331
delay5 u9s(.clk(clk), .ce(ce), .i(aob_nans), .o(isNans) );
332
 
333
wire [MSB:0] fpu_o;
334
wire [MSB+3:0] fpn_o;
335
wire [EX:0] fdiv_o;
336
wire [EX:0] fmul_o;
337
wire [EX:0] fas_o;
338
reg  [EX:0] fres;
339
wire [31:0] fpus_o;
340
wire [31+3:0] fpns_o;
341
wire [EXS:0] fdivs_o;
342
wire [EXS:0] fmuls_o;
343
wire [EXS:0] fass_o;
344
reg  [EXS:0] fress;
345
wire divUnder,divUnders;
346
wire mulUnder,mulUnders;
347
reg under,unders;
348
 
349
// These units have a two clock cycle latency
350
fpAddsub #(64) u10(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a), .b(b), .o(fas_o) );
351
fpDiv    #(64) u11(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a), .b(b), .o(fdiv_o), .sign_exe(), .underflow(divUnder), .done(divDone) );
352
fpMul    #(64) u12(.clk(clk), .ce(pipe_ce),          .a(a), .b(b), .o(fmul_o), .sign_exe(), .inf(), .underflow(mulUnder) );
353
fpAddsub #(32) u10s(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a[31:0]), .b(b[31:0]), .o(fass_o) );
354
fpDiv    #(32) u11s(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a[31:0]), .b(b[31:0]), .o(fdivs_o), .sign_exe(), .underflow(divUnders), .done() );
355
fpMul    #(32) u12s(.clk(clk), .ce(pipe_ce),          .a(a[31:0]), .b(b[31:0]), .o(fmuls_o), .sign_exe(), .inf(), .underflow(mulUnders) );
356
 
357
always @(op2,fn2,mulUnder,divUnder,mulUnders,divUnders)
358
        case (op2)
359
        `FLOAT:
360
            case (fn2)
361
        `FMUL:  under = mulUnder;
362
            `FDIV:      under = divUnder;
363
        `FMULS: unders = mulUnders;
364
        `FDIVS: unders = divUnders;
365
            default: begin under = 0; unders = 0; end
366
            endcase
367
        default:        begin under = 0; unders = 0; end
368
        endcase
369
 
370
always @(op2,fn2,fas_o,fmul_o,fdiv_o,fass_o,fmuls_o,fdivs_o)
371
        case (op2)
372
    `FLOAT:
373
            case(fn2)
374
        `FADD:  fres <= fas_o;
375
        `FSUB:  fres <= fas_o;
376
        `FMUL:  fres <= fmul_o;
377
        `FDIV:  fres <= fdiv_o;
378
        `FADDS: fress <= fass_o;
379
        `FSUBS: fress <= fass_o;
380
        `FMULS: fress <= fmuls_o;
381
        `FDIVS: fress <= fdivs_o;
382
        default:        begin fres <= fas_o; fress <= fass_o; end
383
        endcase
384
        default:        begin fres <= fas_o; fress <= fass_o; end
385
        endcase
386
 
387
// pipeline stage
388
// one cycle latency
389
fpNormalize #(64) fpn0(.clk(clk), .ce(pipe_ce), .under(under), .i(fres), .o(fpn_o) );
390
fpNormalize #(32) fpns(.clk(clk), .ce(pipe_ce), .under(unders), .i(fress), .o(fpns_o) );
391
 
392
// pipeline stage
393
// one cycle latency
394
fpRoundReg #(64) fpr0(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpn_o), .o(fpu_o) );
395
fpRoundReg #(32) fprs(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpns_o), .o(fpus_o) );
396
 
397
wire so = single ? (isNans?nsos:fpus_o[31]): (isNan?nso:fpu_o[63]);
398
 
399
//fix: status should be registered
400
assign o = fstat ? {
401
        rm,
402
        inexe,
403
        dbzxe,
404
        underxe,
405
        overxe,
406
        invopxe,
407
        nsfp,
408
 
409
        fractie,
410
        raz,
411
        1'b0,
412
        so & !zero,
413
        !so & !zero,
414
        zero,
415
        inf,
416
 
417
        swtx,
418
        inex,
419
        dbzx,
420
        underx,
421
        overx,
422
        giopx,
423
        gx,
424
        sx,
425
 
426
        cvtx,
427
        sqrtx,
428
        NaNCmpx,
429
        infzerox,
430
        zerozerox,
431
        infdivx,
432
        subinfx,
433
        snanx
434
        } : 'bz;
435
 
436
assign o = (!fstat & !single) ?
437
    zl_op ? zld_o :
438
    loo_op ? lood_o :
439
    {so,fpu_o[MSB-1:0]} : 'bz;
440
assign o = (!fstat &  single)?
441
    zl_op ? zls_o :
442
    loo_op ? loos_o :
443
    {so,fpus_o[MSB-1:0]} : 'bz;
444
assign zero = single ? fpus_o[30:0]==0 : WID==64 ? fpu_o[62:0]==0 : 0;
445
assign inf = single ? &fpus_o[31:23] && fpus_o[22:0]==0 : WID==64 ? &fpu_o[62:52] && fpu_o[51:0]==0 : 0;
446
 
447
assign subinf   = single ? fpus_o[31:0]==`QSUBINFS : WID==64 ? fpu_o[63:0]==`QSUBINF : 0;
448
assign infdiv   = single ? fpus_o[31:0]==`QINFDIVS : WID==64 ? fpu_o[63:0]==`QINFDIV : 0;
449
assign zerozero = single ? fpus_o[31:0]==`QZEROZEROS : WID==64 ? fpu_o[63:0]==`QZEROZERO : 0;
450
assign infzero  = single ? fpus_o[31:0]==`QINFZEROS : WID==64 ? fpu_o[31:0]==`QINFZERO : 0;
451
 
452
assign exception = gx;
453
 
454
endmodule
455
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.