URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpUnit.v] - Blame information for rev 51

Go to most recent revision | Details | Compare with Previous | View Log


// ============================================================================
//        __
//   \\__/ o\    (C) 2006,2015  Robert Finch, Stratford
//    \  __ /    All rights reserved.
//     \/_//     robfinch<remove>@finitron.ca
//       ||
//
// This source file is free software: you can redistribute it and/or modify 
// it under the terms of the GNU Lesser General Public License as published 
// by the Free Software Foundation, either version 3 of the License, or     
// (at your option) any later version.                                      
//                                                                          
// This source file is distributed in the hope that it will be useful,      
// but WITHOUT ANY WARRANTY; without even the implied warranty of           
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            
// GNU General Public License for more details.                             
//                                                                          
// You should have received a copy of the GNU General Public License        
// along with this program.  If not, see <http://www.gnu.org/licenses/>.    
//
//
// Thor SuperScalar
//      fpUnit.v
//  - floating point unit
//  - parameterized width
//  - IEEE 754 representation
//
//      NaN Value               Origin
// 31'h7FC00001    - infinity - infinity
// 31'h7FC00002    - infinity / infinity
// 31'h7FC00003    - zero / zero
// 31'h7FC00004    - infinity X zero
//
// Whenever the fpu encounters a NaN input, the NaN is
// passed through to the output.
//
// Ref: Webpack 8.2  Spartan3-4  xc3s1000-4ft256
// 2335 LUTS / 1260 slices / 43.4 MHz
// Ref: Webpack 13.1 Spartan3e   xc3s1200e-4fg320
// 2433 LUTs / 1301 slices / 51.6 MHz
//
// Instr.  Cyc Lat
// fc__    ; 1  0    compare, lt le gt ge eq ne or un
// fabs    ; 1  0     absolute value
// fnabs    ; 1  0     negative absolute value
// fneg    ; 1  0     negate
// fmov    ; 1  0     move
// fman    ; 1  0     get mantissa
// fsign    ; 1  0     get sign
//
// f2i        ; 1  1  convert float to integer
// i2f        ; 1  1  convert integer to float
//
// fadd    ; 1  4    addition
// fsub    ; 1  4  subtraction
// fmul    ; 1  4  multiplication
//
// fdiv    ; 16 4    division
//
// ftx        ; 1  0  trigger fp exception
// fcx        ; 1  0  clear fp exception
// fex        ; 1  0  enable fp exception
// fdx        ; 1  0  disable fp exception
// frm        ; 1  0  set rounding mode
// fstat    ; 1  0  get status register
//
// related integer:
// graf    ; 1  0  get random float (0,1]
//
// ============================================================================
//
`include "..\Thor_defines.v"
 
`define QINFOS          23'h7FC000              // info
`define QSUBINFS        31'h7FC00001    // - infinity - infinity
`define QINFDIVS        31'h7FC00002    // - infinity / infinity
`define QZEROZEROS      31'h7FC00003    // - zero / zero
`define QINFZEROS       31'h7FC00004    // - infinity X zero
 
`define QINFO           52'h7FC000              // info
`define QSUBINF         62'h7FF0000000000001    // - infinity - infinity
`define QINFDIV         62'h7FF0000000000002    // - infinity / infinity
`define QZEROZERO   62'h7FF0000000000003        // - zero / zero
`define QINFZERO        62'h7FF0000000000004    // - infinity X zero
 
module fpUnit(rst, clk, ce, op, fn, ld, a, b, o, exception);
 
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
                  WID==64 ? 10 :
                                  WID==52 ? 10 :
                                  WID==48 ? 10 :
                                  WID==44 ? 10 :
                                  WID==42 ? 10 :
                                  WID==40 ?  9 :
                                  WID==32 ?  7 :
                                  WID==24 ?  6 : 4;
localparam FMSB = WID==80 ? 63 :
                  WID==64 ? 51 :
                                  WID==52 ? 39 :
                                  WID==48 ? 35 :
                                  WID==44 ? 31 :
                                  WID==42 ? 29 :
                                  WID==40 ? 28 :
                                  WID==32 ? 22 :
                                  WID==24 ? 15 : 9;
localparam EMSBS = 7;
localparam FMSBS = 22;
localparam FX = (FMSB+2)*2-1;   // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
localparam FXS = (FMSBS+2)*2-1; // the MSB of the expanded fraction
localparam EXS = FXS + 1 + EMSBS + 1 + 1 - 1;
 
input rst;
input clk;
input ce;
input [7:0] op;
input [5:0] fn;
input ld;
input [MSB:0] a;
input [MSB:0] b;
output tri [MSB:0] o;
output exception;
 
 
//------------------------------------------------------------
// constants
wire infXp = {11{1'b1}};        // value for infinite exponent / nan
wire infXps = {8{1'b1}};
 
// Variables
wire divByZero;                 // attempt to divide by zero
wire inf;                               // result is infinite (+ or -)
wire zero;                              // result is zero (+ or -)
wire ns;                // nan sign
wire nss;
wire nso;
wire nsos;
wire isNan,isNans;
wire nanx,nanxs;
 
// Decode fp operation
wire fstat      = op==`FLOAT && fn==`FSTAT;     // get status
wire fdiv       = op==`FLOAT && fn==`FDIV;
wire fdivs      = op==`FLOAT && fn==`FDIVS;
wire ftx        = op==`FLOAT && fn==`FTX;               // trigger exception
wire fcx        = op==`FLOAT && fn==`FCX;               // clear exception
wire fex        = op==`FLOAT && fn==`FEX;               // enable exception
wire fdx        = op==`FLOAT && fn==`FDX;               // disable exception
wire fcmp       = op==`FLOAT && (fn==`FCMP || fn==`FCMPS);
wire frm        = op==`FLOAT && fn==`FRM;               // set rounding mode
wire single = (op==`FLOAT && fn[5:4]==2'b01) || op==`SINGLE_R;
wire zl_op =  (op==`DOUBLE_R && (fn==`FABS || fn==`FNABS || fn==`FMOV || fn==`FNEG || fn==`FSIGN || fn==`FMAN)) ||
              (op==`FLOAT && fn==`FCMP) ||
              (op==`SINGLE_R && (fn==`FABSS || fn==`FNABSS || fn==`FMOVS || fn==`FNEGS || fn==`FSIGNS || fn==`FMANS)) ||
              (op==`FLOAT && (fn==`FCMPS))
             ;
wire loo_op = (op==`DOUBLE_R && (fn==`ITOF || fn==`FTOI)) ||
              (op==`SINGLE_R && (fn==`FTOIS || op==`ITOFS));
wire loo_done;
 
wire subinf;
wire zerozero;
wire infzero;
wire infdiv;
 
// floating point control and status
reg [1:0] rm;    // rounding mode
reg inexe;              // inexact exception enable
reg dbzxe;              // divide by zero exception enable
reg underxe;    // underflow exception enable
reg overxe;             // overflow exception enable
reg invopxe;    // invalid operation exception enable
 
reg nsfp;               // non-standard floating point indicator
 
reg fractie;    // fraction inexact
reg raz;                // rounded away from zero
 
reg inex;               // inexact exception
reg dbzx;               // divide by zero exception
reg underx;             // underflow exception
reg overx;              // overflow exception
reg giopx;              // global invalid operation exception
reg sx;                 // summary exception
 
reg swtx;               // software triggered exception indicator
 
wire gx = swtx|inex|dbzx|underx|overx|giopx;    // global exception indicator
 
// breakdown of invalid operation exceptions
reg cvtx;               // conversion exception
reg sqrtx;              // squareroot exception
reg NaNCmpx;    // NaN comparison exception
reg infzerox;   // multiply infinity by zero
reg zerozerox;  // division of zero by zero
reg infdivx;    // division of infinities
reg subinfx;    // subtraction of infinities
reg snanx;              // signalling nan
 
wire divDone;
wire pipe_ce = ce & divDone;    // divide must be done in order for pipe to clock
 
always @(posedge clk)
        // reset: disable and clear all exceptions and status
        if (rst) begin
                rm <= 2'b0;                     // round nearest even - default rounding mode
                inex <= 1'b0;
                dbzx <= 1'b0;
                underx <= 1'b0;
                overx <= 1'b0;
                giopx <= 1'b0;
                swtx <= 1'b0;
                sx <= 1'b0;
                NaNCmpx <= 1'b0;
 
                inexe <= 1'b0;
                dbzxe <= 1'b0;
                underxe <= 1'b0;
                overxe <= 1'b0;
                invopxe <= 1'b0;
 
                nsfp <= 1'b0;
 
        end
        else if (pipe_ce) begin
                if (ftx) begin
                        inex <= inex     | (a[4]|b[4]);
                        dbzx <= dbzx     | (a[3]|b[3]);
                        underx <= underx | (a[2]|b[2]);
                        overx <= overx   | (a[1]|b[1]);
                        giopx <= giopx   | (a[0]|b[0]);
                        swtx <= 1'b1;
                        sx <= 1'b1;
                end
                else if (fcx) begin
                        sx <= sx & !(a[5]|b[5]);
                        inex <= inex     & !(a[4]|b[4]);
                        dbzx <= dbzx     & !(a[3]|b[3]);
                        underx <= underx & !(a[2]|b[2]);
                        overx <= overx   & !(a[1]|b[1]);
                        giopx <= giopx   & !(a[0]|b[0]);
                        // clear exception type when global invalid operation is cleared
                        infdivx <= infdivx & !(a[0]|b[0]);
                        zerozerox <= zerozerox & !(a[0]|b[0]);
                        subinfx   <= subinfx   & !(a[0]|b[0]);
                        infzerox  <= infzerox  & !(a[0]|b[0]);
                        NaNCmpx   <= NaNCmpx   & !(a[0]|b[0]);
                        dbzx <= dbzx & !(a[0]|b[0]);
                        swtx <= 1'b1;
                end
                else if (fex) begin
                        inexe <= inexe     | (a[4]|b[4]);
                        dbzxe <= dbzxe     | (a[3]|b[3]);
                        underxe <= underxe | (a[2]|b[2]);
                        overxe <= overxe   | (a[1]|b[1]);
                        invopxe <= invopxe | (a[0]|b[0]);
                end
                else if (fdx) begin
                        inexe <= inexe     & !(a[4]|b[4]);
                        dbzxe <= dbzxe     & !(a[3]|b[3]);
                        underxe <= underxe & !(a[2]|b[2]);
                        overxe <= overxe   & !(a[1]|b[1]);
                        invopxe <= invopxe & !(a[0]|b[0]);
                end
                else if (frm)
                        rm <= a[1:0]|b[1:0];
 
                infzerox  <= infzerox  | (invopxe & infzero);
                zerozerox <= zerozerox | (invopxe & zerozero);
                subinfx   <= subinfx   | (invopxe & subinf);
                infdivx   <= infdivx   | (invopxe & infdiv);
                dbzx <= dbzx | (dbzxe & divByZero);
                NaNCmpx <= NaNCmpx | (invopxe & nanx & fcmp);   // must be a compare
                sx <= sx |
                                (invopxe & nanx & fcmp) |
                                (invopxe & (infzero|zerozero|subinf|infdiv)) |
                                (dbzxe & divByZero);
        end
 
// Decompose operands into sign,exponent,mantissa
wire sa, sb, sas, sbs;
wire [FMSB:0] ma, mb;
wire [22:0] mas, mbs;
 
wire aInf, bInf, aInfs, bInfs;
wire aNan, bNan, aNans, bNans;
wire az, bz, azs, bzs;
wire [1:0] rmd4; // 1st stage delayed
wire [7:0] op1, op2;
wire [5:0] fn1,fn2;
 
wire [MSB:0] zld_o,lood_o;
wire [31:0] zls_o,loos_o;
fpZLUnit  #(64) u6 (.op(op), .fn(fn), .a(a), .b(b), .o(zld_o), .nanx(nanx) );
fpLOOUnit #(64) u7 (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a), .o(loo_o), .done(loos_done) );
fpZLUnit  #(32) u6s (.op(op), .fn(fn), .a(a[31:0]), .b(b[31:0]), .o(zls_o), .nanx(nanxs) );
fpLOOUnit #(32) u7s (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a[31:0]), .o(loos_o), .done() );
assign loo_o = single ? loos_o : lood_o;
assign zl_o = single ? zls_o : zld_o;
fp_decomp #(64) u1 (.i(a), .sgn(sa), .man(ma), .vz(az), .inf(aInf), .nan(aNan) );
fp_decomp #(64) u2 (.i(b), .sgn(sb), .man(mb), .vz(bz), .inf(bInf), .nan(bNan) );
fp_decomp #(32) u1s (.i(a[31:0]), .sgn(sas), .man(mas), .vz(azs), .inf(aInfs), .nan(aNans) );
fp_decomp #(32) u2s (.i(b[31:0]), .sgn(sbs), .man(mbs), .vz(bzs), .inf(bInfs), .nan(bNans) );
 
delay4 #(2) u3 (.clk(clk), .ce(pipe_ce), .i(rmd), .o(rmd4) );
delay1 #(8) u4 (.clk(clk), .ce(pipe_ce), .i(op), .o(op1) );
delay2 #(8) u5 (.clk(clk), .ce(pipe_ce), .i(op), .o(op2) );
delay1 #(6) u5a (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn1) );
delay2 #(6) u5b (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn2) );
 
delay5 delay5_3(.clk(clk), .ce(pipe_ce), .i((bz & !aNan & fdiv)|(bzs & !aNans & fdivs)), .o(divByZero) );
 
// Compute NaN output sign
wire aob_nan = aNan|bNan;       // one of the operands is a nan
wire bothNan = aNan&bNan;       // both of the operands are nans
wire aob_nans = aNans|bNans;    // one of the operands is a nan
wire bothNans = aNans&bNans;    // both of the operands are nans
 
assign ns = bothNan ?
                                (ma==mb ? sa & sb : ma < mb ? sb : sa) :
                                aNan ? sa : sb;
assign nss = bothNans ?
                                 (mas==mbs ? sas & sbs : mas < mbs ? sbs : sas) :
                                  aNans ? sas : sbs;
 
delay5 u8(.clk(clk), .ce(ce), .i(ns), .o(nso) );
delay5 u9(.clk(clk), .ce(ce), .i(aob_nan), .o(isNan) );
delay5 u8s(.clk(clk), .ce(ce), .i(nss), .o(nsos) );
delay5 u9s(.clk(clk), .ce(ce), .i(aob_nans), .o(isNans) );
 
wire [MSB:0] fpu_o;
wire [MSB+3:0] fpn_o;
wire [EX:0] fdiv_o;
wire [EX:0] fmul_o;
wire [EX:0] fas_o;
reg  [EX:0] fres;
wire [31:0] fpus_o;
wire [31+3:0] fpns_o;
wire [EXS:0] fdivs_o;
wire [EXS:0] fmuls_o;
wire [EXS:0] fass_o;
reg  [EXS:0] fress;
wire divUnder,divUnders;
wire mulUnder,mulUnders;
reg under,unders;
 
// These units have a two clock cycle latency
fpAddsub #(64) u10(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a), .b(b), .o(fas_o) );
fpDiv    #(64) u11(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a), .b(b), .o(fdiv_o), .sign_exe(), .underflow(divUnder), .done(divDone) );
fpMul    #(64) u12(.clk(clk), .ce(pipe_ce),          .a(a), .b(b), .o(fmul_o), .sign_exe(), .inf(), .underflow(mulUnder) );
fpAddsub #(32) u10s(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a[31:0]), .b(b[31:0]), .o(fass_o) );
fpDiv    #(32) u11s(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a[31:0]), .b(b[31:0]), .o(fdivs_o), .sign_exe(), .underflow(divUnders), .done() );
fpMul    #(32) u12s(.clk(clk), .ce(pipe_ce),          .a(a[31:0]), .b(b[31:0]), .o(fmuls_o), .sign_exe(), .inf(), .underflow(mulUnders) );
 
always @(op2,fn2,mulUnder,divUnder,mulUnders,divUnders)
        case (op2)
        `FLOAT:
            case (fn2)
        `FMUL:  under = mulUnder;
            `FDIV:      under = divUnder;
        `FMULS: unders = mulUnders;
        `FDIVS: unders = divUnders;
            default: begin under = 0; unders = 0; end
            endcase
        default:        begin under = 0; unders = 0; end
        endcase
 
always @(op2,fn2,fas_o,fmul_o,fdiv_o,fass_o,fmuls_o,fdivs_o)
        case (op2)
    `FLOAT:
            case(fn2)
        `FADD:  fres <= fas_o;
        `FSUB:  fres <= fas_o;
        `FMUL:  fres <= fmul_o;
        `FDIV:  fres <= fdiv_o;
        `FADDS: fress <= fass_o;
        `FSUBS: fress <= fass_o;
        `FMULS: fress <= fmuls_o;
        `FDIVS: fress <= fdivs_o;
        default:        begin fres <= fas_o; fress <= fass_o; end
        endcase
        default:        begin fres <= fas_o; fress <= fass_o; end
        endcase
 
// pipeline stage
// one cycle latency
fpNormalize #(64) fpn0(.clk(clk), .ce(pipe_ce), .under(under), .i(fres), .o(fpn_o) );
fpNormalize #(32) fpns(.clk(clk), .ce(pipe_ce), .under(unders), .i(fress), .o(fpns_o) );
 
// pipeline stage
// one cycle latency
fpRoundReg #(64) fpr0(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpn_o), .o(fpu_o) );
fpRoundReg #(32) fprs(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpns_o), .o(fpus_o) );
 
wire so = single ? (isNans?nsos:fpus_o[31]): (isNan?nso:fpu_o[63]);
 
//fix: status should be registered
assign o = fstat ? {
        rm,
        inexe,
        dbzxe,
        underxe,
        overxe,
        invopxe,
        nsfp,
 
        fractie,
        raz,
        1'b0,
        so & !zero,
        !so & !zero,
        zero,
        inf,
 
        swtx,
        inex,
        dbzx,
        underx,
        overx,
        giopx,
        gx,
        sx,
 
        cvtx,
        sqrtx,
        NaNCmpx,
        infzerox,
        zerozerox,
        infdivx,
        subinfx,
        snanx
        } : 'bz;
 
assign o = (!fstat & !single) ?
    zl_op ? zld_o :
    loo_op ? lood_o :
    {so,fpu_o[MSB-1:0]} : 'bz;
assign o = (!fstat &  single)?
    zl_op ? zls_o :
    loo_op ? loos_o :
    {so,fpus_o[MSB-1:0]} : 'bz;
assign zero = single ? fpus_o[30:0]==0 : WID==64 ? fpu_o[62:0]==0 : 0;
assign inf = single ? &fpus_o[31:23] && fpus_o[22:0]==0 : WID==64 ? &fpu_o[62:52] && fpu_o[51:0]==0 : 0;
 
assign subinf   = single ? fpus_o[31:0]==`QSUBINFS : WID==64 ? fpu_o[63:0]==`QSUBINF : 0;
assign infdiv   = single ? fpus_o[31:0]==`QINFDIVS : WID==64 ? fpu_o[63:0]==`QINFDIV : 0;
assign zerozero = single ? fpus_o[31:0]==`QZEROZEROS : WID==64 ? fpu_o[63:0]==`QZEROZERO : 0;
assign infzero  = single ? fpus_o[31:0]==`QINFZEROS : WID==64 ? fpu_o[31:0]==`QINFZERO : 0;
 
assign exception = gx;
 
endmodule
 

Browse

Tools

Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpUnit.v] - Blame information for rev 51

Line No.	Rev	Author	Line
1	6	robfinch	`// ============================================================================`
2			`// __`
3			`// \\__/ o\ (C) 2006,2015 Robert Finch, Stratford`
4			`// \ __ / All rights reserved.`
5			`// \/_// robfinch<remove>@finitron.ca`
6			`// \|\|`
7			`//`
8			`// This source file is free software: you can redistribute it and/or modify`
9			`// it under the terms of the GNU Lesser General Public License as published`
10			`// by the Free Software Foundation, either version 3 of the License, or`
11			`// (at your option) any later version.`
12			`//`
13			`// This source file is distributed in the hope that it will be useful,`
14			`// but WITHOUT ANY WARRANTY; without even the implied warranty of`
15			`// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
16			`// GNU General Public License for more details.`
17			`//`
18			`// You should have received a copy of the GNU General Public License`
19			`// along with this program. If not, see <http://www.gnu.org/licenses/>.`
20			`//`
21			`//`
22			`// Thor SuperScalar`
23			`// fpUnit.v`
24			`// - floating point unit`
25			`// - parameterized width`
26			`// - IEEE 754 representation`
27			`//`
28			`// NaN Value Origin`
29			`// 31'h7FC00001 - infinity - infinity`
30			`// 31'h7FC00002 - infinity / infinity`
31			`// 31'h7FC00003 - zero / zero`
32			`// 31'h7FC00004 - infinity X zero`
33			`//`
34			`// Whenever the fpu encounters a NaN input, the NaN is`
35			`// passed through to the output.`
36			`//`
37			`// Ref: Webpack 8.2 Spartan3-4 xc3s1000-4ft256`
38			`// 2335 LUTS / 1260 slices / 43.4 MHz`
39			`// Ref: Webpack 13.1 Spartan3e xc3s1200e-4fg320`
40			`// 2433 LUTs / 1301 slices / 51.6 MHz`
41			`//`
42			`// Instr. Cyc Lat`
43			`// fc__ ; 1 0 compare, lt le gt ge eq ne or un`
44			`// fabs ; 1 0 absolute value`
45			`// fnabs ; 1 0 negative absolute value`
46			`// fneg ; 1 0 negate`
47			`// fmov ; 1 0 move`
48			`// fman ; 1 0 get mantissa`
49			`// fsign ; 1 0 get sign`
50			`//`
51			`// f2i ; 1 1 convert float to integer`
52			`// i2f ; 1 1 convert integer to float`
53			`//`
54			`// fadd ; 1 4 addition`
55			`// fsub ; 1 4 subtraction`
56			`// fmul ; 1 4 multiplication`
57			`//`
58			`// fdiv ; 16 4 division`
59			`//`
60			`// ftx ; 1 0 trigger fp exception`
61			`// fcx ; 1 0 clear fp exception`
62			`// fex ; 1 0 enable fp exception`
63			`// fdx ; 1 0 disable fp exception`
64			`// frm ; 1 0 set rounding mode`
65			`// fstat ; 1 0 get status register`
66			`//`
67			`// related integer:`
68			`// graf ; 1 0 get random float (0,1]`
69			`//`
70			`// ============================================================================`
71			`//`
72			`include "..\Thor_defines.v"
73
74			`define QINFOS 23'h7FC000 // info
75			`define QSUBINFS 31'h7FC00001 // - infinity - infinity
76			`define QINFDIVS 31'h7FC00002 // - infinity / infinity
77			`define QZEROZEROS 31'h7FC00003 // - zero / zero
78			`define QINFZEROS 31'h7FC00004 // - infinity X zero
79
80			`define QINFO 52'h7FC000 // info
81			`define QSUBINF 62'h7FF0000000000001 // - infinity - infinity
82			`define QINFDIV 62'h7FF0000000000002 // - infinity / infinity
83			`define QZEROZERO 62'h7FF0000000000003 // - zero / zero
84			`define QINFZERO 62'h7FF0000000000004 // - infinity X zero
85
86			`module fpUnit(rst, clk, ce, op, fn, ld, a, b, o, exception);`
87
88			`parameter WID = 32;`
89			`localparam MSB = WID-1;`
90			`localparam EMSB = WID==80 ? 14 :`
91			`WID==64 ? 10 :`
92			`WID==52 ? 10 :`
93			`WID==48 ? 10 :`
94			`WID==44 ? 10 :`
95			`WID==42 ? 10 :`
96			`WID==40 ? 9 :`
97			`WID==32 ? 7 :`
98			`WID==24 ? 6 : 4;`
99			`localparam FMSB = WID==80 ? 63 :`
100			`WID==64 ? 51 :`
101			`WID==52 ? 39 :`
102			`WID==48 ? 35 :`
103			`WID==44 ? 31 :`
104			`WID==42 ? 29 :`
105			`WID==40 ? 28 :`
106			`WID==32 ? 22 :`
107			`WID==24 ? 15 : 9;`
108			`localparam EMSBS = 7;`
109			`localparam FMSBS = 22;`
110			`localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction`
111			`localparam EX = FX + 1 + EMSB + 1 + 1 - 1;`
112			`localparam FXS = (FMSBS+2)*2-1; // the MSB of the expanded fraction`
113			`localparam EXS = FXS + 1 + EMSBS + 1 + 1 - 1;`
114
115			`input rst;`
116			`input clk;`
117			`input ce;`
118			`input [7:0] op;`
119			`input [5:0] fn;`
120			`input ld;`
121			`input [MSB:0] a;`
122			`input [MSB:0] b;`
123			`output tri [MSB:0] o;`
124			`output exception;`
125
126
127			`//------------------------------------------------------------`
128			`// constants`
129			`wire infXp = {11{1'b1}}; // value for infinite exponent / nan`
130			`wire infXps = {8{1'b1}};`
131
132			`// Variables`
133			`wire divByZero; // attempt to divide by zero`
134			`wire inf; // result is infinite (+ or -)`
135			`wire zero; // result is zero (+ or -)`
136			`wire ns; // nan sign`
137			`wire nss;`
138			`wire nso;`
139			`wire nsos;`
140			`wire isNan,isNans;`
141			`wire nanx,nanxs;`
142
143			`// Decode fp operation`
144			wire fstat = op==`FLOAT && fn==`FSTAT; // get status
145			wire fdiv = op==`FLOAT && fn==`FDIV;
146			wire fdivs = op==`FLOAT && fn==`FDIVS;
147			wire ftx = op==`FLOAT && fn==`FTX; // trigger exception
148			wire fcx = op==`FLOAT && fn==`FCX; // clear exception
149			wire fex = op==`FLOAT && fn==`FEX; // enable exception
150			wire fdx = op==`FLOAT && fn==`FDX; // disable exception
151			wire fcmp = op==`FLOAT && (fn==`FCMP \|\| fn==`FCMPS);
152			wire frm = op==`FLOAT && fn==`FRM; // set rounding mode
153			wire single = (op==`FLOAT && fn[5:4]==2'b01) \|\| op==`SINGLE_R;
154			wire zl_op = (op==`DOUBLE_R && (fn==`FABS \|\| fn==`FNABS \|\| fn==`FMOV \|\| fn==`FNEG \|\| fn==`FSIGN \|\| fn==`FMAN)) \|\|
155			(op==`FLOAT && fn==`FCMP) \|\|
156			(op==`SINGLE_R && (fn==`FABSS \|\| fn==`FNABSS \|\| fn==`FMOVS \|\| fn==`FNEGS \|\| fn==`FSIGNS \|\| fn==`FMANS)) \|\|
157			(op==`FLOAT && (fn==`FCMPS))
158			`;`
159			wire loo_op = (op==`DOUBLE_R && (fn==`ITOF \|\| fn==`FTOI)) \|\|
160			(op==`SINGLE_R && (fn==`FTOIS \|\| op==`ITOFS));
161			`wire loo_done;`
162
163			`wire subinf;`
164			`wire zerozero;`
165			`wire infzero;`
166			`wire infdiv;`
167
168			`// floating point control and status`
169			`reg [1:0] rm; // rounding mode`
170			`reg inexe; // inexact exception enable`
171			`reg dbzxe; // divide by zero exception enable`
172			`reg underxe; // underflow exception enable`
173			`reg overxe; // overflow exception enable`
174			`reg invopxe; // invalid operation exception enable`
175
176			`reg nsfp; // non-standard floating point indicator`
177
178			`reg fractie; // fraction inexact`
179			`reg raz; // rounded away from zero`
180
181			`reg inex; // inexact exception`
182			`reg dbzx; // divide by zero exception`
183			`reg underx; // underflow exception`
184			`reg overx; // overflow exception`
185			`reg giopx; // global invalid operation exception`
186			`reg sx; // summary exception`
187
188			`reg swtx; // software triggered exception indicator`
189
190			`wire gx = swtx\|inex\|dbzx\|underx\|overx\|giopx; // global exception indicator`
191
192			`// breakdown of invalid operation exceptions`
193			`reg cvtx; // conversion exception`
194			`reg sqrtx; // squareroot exception`
195			`reg NaNCmpx; // NaN comparison exception`
196			`reg infzerox; // multiply infinity by zero`
197			`reg zerozerox; // division of zero by zero`
198			`reg infdivx; // division of infinities`
199			`reg subinfx; // subtraction of infinities`
200			`reg snanx; // signalling nan`
201
202			`wire divDone;`
203			`wire pipe_ce = ce & divDone; // divide must be done in order for pipe to clock`
204
205			`always @(posedge clk)`
206			`// reset: disable and clear all exceptions and status`
207			`if (rst) begin`
208			`rm <= 2'b0; // round nearest even - default rounding mode`
209			`inex <= 1'b0;`
210			`dbzx <= 1'b0;`
211			`underx <= 1'b0;`
212			`overx <= 1'b0;`
213			`giopx <= 1'b0;`
214			`swtx <= 1'b0;`
215			`sx <= 1'b0;`
216			`NaNCmpx <= 1'b0;`
217
218			`inexe <= 1'b0;`
219			`dbzxe <= 1'b0;`
220			`underxe <= 1'b0;`
221			`overxe <= 1'b0;`
222			`invopxe <= 1'b0;`
223
224			`nsfp <= 1'b0;`
225
226			`end`
227			`else if (pipe_ce) begin`
228			`if (ftx) begin`
229			`inex <= inex \| (a[4]\|b[4]);`
230			`dbzx <= dbzx \| (a[3]\|b[3]);`
231			`underx <= underx \| (a[2]\|b[2]);`
232			`overx <= overx \| (a[1]\|b[1]);`
233			`giopx <= giopx \| (a[0]\|b[0]);`
234			`swtx <= 1'b1;`
235			`sx <= 1'b1;`
236			`end`
237			`else if (fcx) begin`
238			`sx <= sx & !(a[5]\|b[5]);`
239			`inex <= inex & !(a[4]\|b[4]);`
240			`dbzx <= dbzx & !(a[3]\|b[3]);`
241			`underx <= underx & !(a[2]\|b[2]);`
242			`overx <= overx & !(a[1]\|b[1]);`
243			`giopx <= giopx & !(a[0]\|b[0]);`
244			`// clear exception type when global invalid operation is cleared`
245			`infdivx <= infdivx & !(a[0]\|b[0]);`
246			`zerozerox <= zerozerox & !(a[0]\|b[0]);`
247			`subinfx <= subinfx & !(a[0]\|b[0]);`
248			`infzerox <= infzerox & !(a[0]\|b[0]);`
249			`NaNCmpx <= NaNCmpx & !(a[0]\|b[0]);`
250			`dbzx <= dbzx & !(a[0]\|b[0]);`
251			`swtx <= 1'b1;`
252			`end`
253			`else if (fex) begin`
254			`inexe <= inexe \| (a[4]\|b[4]);`
255			`dbzxe <= dbzxe \| (a[3]\|b[3]);`
256			`underxe <= underxe \| (a[2]\|b[2]);`
257			`overxe <= overxe \| (a[1]\|b[1]);`
258			`invopxe <= invopxe \| (a[0]\|b[0]);`
259			`end`
260			`else if (fdx) begin`
261			`inexe <= inexe & !(a[4]\|b[4]);`
262			`dbzxe <= dbzxe & !(a[3]\|b[3]);`
263			`underxe <= underxe & !(a[2]\|b[2]);`
264			`overxe <= overxe & !(a[1]\|b[1]);`
265			`invopxe <= invopxe & !(a[0]\|b[0]);`
266			`end`
267			`else if (frm)`
268			`rm <= a[1:0]\|b[1:0];`
269
270			`infzerox <= infzerox \| (invopxe & infzero);`
271			`zerozerox <= zerozerox \| (invopxe & zerozero);`
272			`subinfx <= subinfx \| (invopxe & subinf);`
273			`infdivx <= infdivx \| (invopxe & infdiv);`
274			`dbzx <= dbzx \| (dbzxe & divByZero);`
275			`NaNCmpx <= NaNCmpx \| (invopxe & nanx & fcmp); // must be a compare`
276			`sx <= sx \|`
277			`(invopxe & nanx & fcmp) \|`
278			`(invopxe & (infzero\|zerozero\|subinf\|infdiv)) \|`
279			`(dbzxe & divByZero);`
280			`end`
281
282			`// Decompose operands into sign,exponent,mantissa`
283			`wire sa, sb, sas, sbs;`
284			`wire [FMSB:0] ma, mb;`
285			`wire [22:0] mas, mbs;`
286
287			`wire aInf, bInf, aInfs, bInfs;`
288			`wire aNan, bNan, aNans, bNans;`
289			`wire az, bz, azs, bzs;`
290			`wire [1:0] rmd4; // 1st stage delayed`
291			`wire [7:0] op1, op2;`
292			`wire [5:0] fn1,fn2;`
293
294			`wire [MSB:0] zld_o,lood_o;`
295			`wire [31:0] zls_o,loos_o;`
296			`fpZLUnit #(64) u6 (.op(op), .fn(fn), .a(a), .b(b), .o(zld_o), .nanx(nanx) );`
297			`fpLOOUnit #(64) u7 (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a), .o(loo_o), .done(loos_done) );`
298			`fpZLUnit #(32) u6s (.op(op), .fn(fn), .a(a[31:0]), .b(b[31:0]), .o(zls_o), .nanx(nanxs) );`
299			`fpLOOUnit #(32) u7s (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a[31:0]), .o(loos_o), .done() );`
300			`assign loo_o = single ? loos_o : lood_o;`
301			`assign zl_o = single ? zls_o : zld_o;`
302			`fp_decomp #(64) u1 (.i(a), .sgn(sa), .man(ma), .vz(az), .inf(aInf), .nan(aNan) );`
303			`fp_decomp #(64) u2 (.i(b), .sgn(sb), .man(mb), .vz(bz), .inf(bInf), .nan(bNan) );`
304			`fp_decomp #(32) u1s (.i(a[31:0]), .sgn(sas), .man(mas), .vz(azs), .inf(aInfs), .nan(aNans) );`
305			`fp_decomp #(32) u2s (.i(b[31:0]), .sgn(sbs), .man(mbs), .vz(bzs), .inf(bInfs), .nan(bNans) );`
306
307			`delay4 #(2) u3 (.clk(clk), .ce(pipe_ce), .i(rmd), .o(rmd4) );`
308			`delay1 #(8) u4 (.clk(clk), .ce(pipe_ce), .i(op), .o(op1) );`
309			`delay2 #(8) u5 (.clk(clk), .ce(pipe_ce), .i(op), .o(op2) );`
310			`delay1 #(6) u5a (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn1) );`
311			`delay2 #(6) u5b (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn2) );`
312
313			`delay5 delay5_3(.clk(clk), .ce(pipe_ce), .i((bz & !aNan & fdiv)\|(bzs & !aNans & fdivs)), .o(divByZero) );`
314
315			`// Compute NaN output sign`
316			`wire aob_nan = aNan\|bNan; // one of the operands is a nan`
317			`wire bothNan = aNan&bNan; // both of the operands are nans`
318			`wire aob_nans = aNans\|bNans; // one of the operands is a nan`
319			`wire bothNans = aNans&bNans; // both of the operands are nans`
320
321			`assign ns = bothNan ?`
322			`(ma==mb ? sa & sb : ma < mb ? sb : sa) :`
323			`aNan ? sa : sb;`
324			`assign nss = bothNans ?`
325			`(mas==mbs ? sas & sbs : mas < mbs ? sbs : sas) :`
326			`aNans ? sas : sbs;`
327
328			`delay5 u8(.clk(clk), .ce(ce), .i(ns), .o(nso) );`
329			`delay5 u9(.clk(clk), .ce(ce), .i(aob_nan), .o(isNan) );`
330			`delay5 u8s(.clk(clk), .ce(ce), .i(nss), .o(nsos) );`
331			`delay5 u9s(.clk(clk), .ce(ce), .i(aob_nans), .o(isNans) );`
332
333			`wire [MSB:0] fpu_o;`
334			`wire [MSB+3:0] fpn_o;`
335			`wire [EX:0] fdiv_o;`
336			`wire [EX:0] fmul_o;`
337			`wire [EX:0] fas_o;`
338			`reg [EX:0] fres;`
339			`wire [31:0] fpus_o;`
340			`wire [31+3:0] fpns_o;`
341			`wire [EXS:0] fdivs_o;`
342			`wire [EXS:0] fmuls_o;`
343			`wire [EXS:0] fass_o;`
344			`reg [EXS:0] fress;`
345			`wire divUnder,divUnders;`
346			`wire mulUnder,mulUnders;`
347			`reg under,unders;`
348
349			`// These units have a two clock cycle latency`
350			`fpAddsub #(64) u10(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a), .b(b), .o(fas_o) );`
351			`fpDiv #(64) u11(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a), .b(b), .o(fdiv_o), .sign_exe(), .underflow(divUnder), .done(divDone) );`
352			`fpMul #(64) u12(.clk(clk), .ce(pipe_ce), .a(a), .b(b), .o(fmul_o), .sign_exe(), .inf(), .underflow(mulUnder) );`
353			`fpAddsub #(32) u10s(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a[31:0]), .b(b[31:0]), .o(fass_o) );`
354			`fpDiv #(32) u11s(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a[31:0]), .b(b[31:0]), .o(fdivs_o), .sign_exe(), .underflow(divUnders), .done() );`
355			`fpMul #(32) u12s(.clk(clk), .ce(pipe_ce), .a(a[31:0]), .b(b[31:0]), .o(fmuls_o), .sign_exe(), .inf(), .underflow(mulUnders) );`
356
357			`always @(op2,fn2,mulUnder,divUnder,mulUnders,divUnders)`
358			`case (op2)`
359			`FLOAT:
360			`case (fn2)`
361			`FMUL: under = mulUnder;
362			`FDIV: under = divUnder;
363			`FMULS: unders = mulUnders;
364			`FDIVS: unders = divUnders;
365			`default: begin under = 0; unders = 0; end`
366			`endcase`
367			`default: begin under = 0; unders = 0; end`
368			`endcase`
369
370			`always @(op2,fn2,fas_o,fmul_o,fdiv_o,fass_o,fmuls_o,fdivs_o)`
371			`case (op2)`
372			`FLOAT:
373			`case(fn2)`
374			`FADD: fres <= fas_o;
375			`FSUB: fres <= fas_o;
376			`FMUL: fres <= fmul_o;
377			`FDIV: fres <= fdiv_o;
378			`FADDS: fress <= fass_o;
379			`FSUBS: fress <= fass_o;
380			`FMULS: fress <= fmuls_o;
381			`FDIVS: fress <= fdivs_o;
382			`default: begin fres <= fas_o; fress <= fass_o; end`
383			`endcase`
384			`default: begin fres <= fas_o; fress <= fass_o; end`
385			`endcase`
386
387			`// pipeline stage`
388			`// one cycle latency`
389			`fpNormalize #(64) fpn0(.clk(clk), .ce(pipe_ce), .under(under), .i(fres), .o(fpn_o) );`
390			`fpNormalize #(32) fpns(.clk(clk), .ce(pipe_ce), .under(unders), .i(fress), .o(fpns_o) );`
391
392			`// pipeline stage`
393			`// one cycle latency`
394			`fpRoundReg #(64) fpr0(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpn_o), .o(fpu_o) );`
395			`fpRoundReg #(32) fprs(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpns_o), .o(fpus_o) );`
396
397			`wire so = single ? (isNans?nsos:fpus_o[31]): (isNan?nso:fpu_o[63]);`
398
399			`//fix: status should be registered`
400			`assign o = fstat ? {`
401			`rm,`
402			`inexe,`
403			`dbzxe,`
404			`underxe,`
405			`overxe,`
406			`invopxe,`
407			`nsfp,`
408
409			`fractie,`
410			`raz,`
411			`1'b0,`
412			`so & !zero,`
413			`!so & !zero,`
414			`zero,`
415			`inf,`
416
417			`swtx,`
418			`inex,`
419			`dbzx,`
420			`underx,`
421			`overx,`
422			`giopx,`
423			`gx,`
424			`sx,`
425
426			`cvtx,`
427			`sqrtx,`
428			`NaNCmpx,`
429			`infzerox,`
430			`zerozerox,`
431			`infdivx,`
432			`subinfx,`
433			`snanx`
434			`} : 'bz;`
435
436			`assign o = (!fstat & !single) ?`
437			`zl_op ? zld_o :`
438			`loo_op ? lood_o :`
439			`{so,fpu_o[MSB-1:0]} : 'bz;`
440			`assign o = (!fstat & single)?`
441			`zl_op ? zls_o :`
442			`loo_op ? loos_o :`
443			`{so,fpus_o[MSB-1:0]} : 'bz;`
444			`assign zero = single ? fpus_o[30:0]==0 : WID==64 ? fpu_o[62:0]==0 : 0;`
445			`assign inf = single ? &fpus_o[31:23] && fpus_o[22:0]==0 : WID==64 ? &fpu_o[62:52] && fpu_o[51:0]==0 : 0;`
446
447			assign subinf = single ? fpus_o[31:0]==`QSUBINFS : WID==64 ? fpu_o[63:0]==`QSUBINF : 0;
448			assign infdiv = single ? fpus_o[31:0]==`QINFDIVS : WID==64 ? fpu_o[63:0]==`QINFDIV : 0;
449			assign zerozero = single ? fpus_o[31:0]==`QZEROZEROS : WID==64 ? fpu_o[63:0]==`QZEROZERO : 0;
450			assign infzero = single ? fpus_o[31:0]==`QINFZEROS : WID==64 ? fpu_o[31:0]==`QINFZERO : 0;
451
452			`assign exception = gx;`
453
454			`endmodule`
455