URL
https://opencores.org/ocsvn/thor/thor/trunk
Subversion Repositories thor
[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpUnit.v] - Rev 25
Go to most recent revision | Compare with Previous | Blame | View Log
// ============================================================================ // __ // \\__/ o\ (C) 2006,2015 Robert Finch, Stratford // \ __ / All rights reserved. // \/_// robfinch<remove>@finitron.ca // || // // This source file is free software: you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License as published // by the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This source file is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see <http://www.gnu.org/licenses/>. // // // Thor SuperScalar // fpUnit.v // - floating point unit // - parameterized width // - IEEE 754 representation // // NaN Value Origin // 31'h7FC00001 - infinity - infinity // 31'h7FC00002 - infinity / infinity // 31'h7FC00003 - zero / zero // 31'h7FC00004 - infinity X zero // // Whenever the fpu encounters a NaN input, the NaN is // passed through to the output. // // Ref: Webpack 8.2 Spartan3-4 xc3s1000-4ft256 // 2335 LUTS / 1260 slices / 43.4 MHz // Ref: Webpack 13.1 Spartan3e xc3s1200e-4fg320 // 2433 LUTs / 1301 slices / 51.6 MHz // // Instr. Cyc Lat // fc__ ; 1 0 compare, lt le gt ge eq ne or un // fabs ; 1 0 absolute value // fnabs ; 1 0 negative absolute value // fneg ; 1 0 negate // fmov ; 1 0 move // fman ; 1 0 get mantissa // fsign ; 1 0 get sign // // f2i ; 1 1 convert float to integer // i2f ; 1 1 convert integer to float // // fadd ; 1 4 addition // fsub ; 1 4 subtraction // fmul ; 1 4 multiplication // // fdiv ; 16 4 division // // ftx ; 1 0 trigger fp exception // fcx ; 1 0 clear fp exception // fex ; 1 0 enable fp exception // fdx ; 1 0 disable fp exception // frm ; 1 0 set rounding mode // fstat ; 1 0 get status register // // related integer: // graf ; 1 0 get random float (0,1] // // ============================================================================ // `include "..\Thor_defines.v" `define QINFOS 23'h7FC000 // info `define QSUBINFS 31'h7FC00001 // - infinity - infinity `define QINFDIVS 31'h7FC00002 // - infinity / infinity `define QZEROZEROS 31'h7FC00003 // - zero / zero `define QINFZEROS 31'h7FC00004 // - infinity X zero `define QINFO 52'h7FC000 // info `define QSUBINF 62'h7FF0000000000001 // - infinity - infinity `define QINFDIV 62'h7FF0000000000002 // - infinity / infinity `define QZEROZERO 62'h7FF0000000000003 // - zero / zero `define QINFZERO 62'h7FF0000000000004 // - infinity X zero module fpUnit(rst, clk, ce, op, fn, ld, a, b, o, exception); parameter WID = 32; localparam MSB = WID-1; localparam EMSB = WID==80 ? 14 : WID==64 ? 10 : WID==52 ? 10 : WID==48 ? 10 : WID==44 ? 10 : WID==42 ? 10 : WID==40 ? 9 : WID==32 ? 7 : WID==24 ? 6 : 4; localparam FMSB = WID==80 ? 63 : WID==64 ? 51 : WID==52 ? 39 : WID==48 ? 35 : WID==44 ? 31 : WID==42 ? 29 : WID==40 ? 28 : WID==32 ? 22 : WID==24 ? 15 : 9; localparam EMSBS = 7; localparam FMSBS = 22; localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction localparam EX = FX + 1 + EMSB + 1 + 1 - 1; localparam FXS = (FMSBS+2)*2-1; // the MSB of the expanded fraction localparam EXS = FXS + 1 + EMSBS + 1 + 1 - 1; input rst; input clk; input ce; input [7:0] op; input [5:0] fn; input ld; input [MSB:0] a; input [MSB:0] b; output tri [MSB:0] o; output exception; //------------------------------------------------------------ // constants wire infXp = {11{1'b1}}; // value for infinite exponent / nan wire infXps = {8{1'b1}}; // Variables wire divByZero; // attempt to divide by zero wire inf; // result is infinite (+ or -) wire zero; // result is zero (+ or -) wire ns; // nan sign wire nss; wire nso; wire nsos; wire isNan,isNans; wire nanx,nanxs; // Decode fp operation wire fstat = op==`FLOAT && fn==`FSTAT; // get status wire fdiv = op==`FLOAT && fn==`FDIV; wire fdivs = op==`FLOAT && fn==`FDIVS; wire ftx = op==`FLOAT && fn==`FTX; // trigger exception wire fcx = op==`FLOAT && fn==`FCX; // clear exception wire fex = op==`FLOAT && fn==`FEX; // enable exception wire fdx = op==`FLOAT && fn==`FDX; // disable exception wire fcmp = op==`FLOAT && (fn==`FCMP || fn==`FCMPS); wire frm = op==`FLOAT && fn==`FRM; // set rounding mode wire single = (op==`FLOAT && fn[5:4]==2'b01) || op==`SINGLE_R; wire zl_op = (op==`DOUBLE_R && (fn==`FABS || fn==`FNABS || fn==`FMOV || fn==`FNEG || fn==`FSIGN || fn==`FMAN)) || (op==`FLOAT && fn==`FCMP) || (op==`SINGLE_R && (fn==`FABSS || fn==`FNABSS || fn==`FMOVS || fn==`FNEGS || fn==`FSIGNS || fn==`FMANS)) || (op==`FLOAT && (fn==`FCMPS)) ; wire loo_op = (op==`DOUBLE_R && (fn==`ITOF || fn==`FTOI)) || (op==`SINGLE_R && (fn==`FTOIS || op==`ITOFS)); wire loo_done; wire subinf; wire zerozero; wire infzero; wire infdiv; // floating point control and status reg [1:0] rm; // rounding mode reg inexe; // inexact exception enable reg dbzxe; // divide by zero exception enable reg underxe; // underflow exception enable reg overxe; // overflow exception enable reg invopxe; // invalid operation exception enable reg nsfp; // non-standard floating point indicator reg fractie; // fraction inexact reg raz; // rounded away from zero reg inex; // inexact exception reg dbzx; // divide by zero exception reg underx; // underflow exception reg overx; // overflow exception reg giopx; // global invalid operation exception reg sx; // summary exception reg swtx; // software triggered exception indicator wire gx = swtx|inex|dbzx|underx|overx|giopx; // global exception indicator // breakdown of invalid operation exceptions reg cvtx; // conversion exception reg sqrtx; // squareroot exception reg NaNCmpx; // NaN comparison exception reg infzerox; // multiply infinity by zero reg zerozerox; // division of zero by zero reg infdivx; // division of infinities reg subinfx; // subtraction of infinities reg snanx; // signalling nan wire divDone; wire pipe_ce = ce & divDone; // divide must be done in order for pipe to clock always @(posedge clk) // reset: disable and clear all exceptions and status if (rst) begin rm <= 2'b0; // round nearest even - default rounding mode inex <= 1'b0; dbzx <= 1'b0; underx <= 1'b0; overx <= 1'b0; giopx <= 1'b0; swtx <= 1'b0; sx <= 1'b0; NaNCmpx <= 1'b0; inexe <= 1'b0; dbzxe <= 1'b0; underxe <= 1'b0; overxe <= 1'b0; invopxe <= 1'b0; nsfp <= 1'b0; end else if (pipe_ce) begin if (ftx) begin inex <= inex | (a[4]|b[4]); dbzx <= dbzx | (a[3]|b[3]); underx <= underx | (a[2]|b[2]); overx <= overx | (a[1]|b[1]); giopx <= giopx | (a[0]|b[0]); swtx <= 1'b1; sx <= 1'b1; end else if (fcx) begin sx <= sx & !(a[5]|b[5]); inex <= inex & !(a[4]|b[4]); dbzx <= dbzx & !(a[3]|b[3]); underx <= underx & !(a[2]|b[2]); overx <= overx & !(a[1]|b[1]); giopx <= giopx & !(a[0]|b[0]); // clear exception type when global invalid operation is cleared infdivx <= infdivx & !(a[0]|b[0]); zerozerox <= zerozerox & !(a[0]|b[0]); subinfx <= subinfx & !(a[0]|b[0]); infzerox <= infzerox & !(a[0]|b[0]); NaNCmpx <= NaNCmpx & !(a[0]|b[0]); dbzx <= dbzx & !(a[0]|b[0]); swtx <= 1'b1; end else if (fex) begin inexe <= inexe | (a[4]|b[4]); dbzxe <= dbzxe | (a[3]|b[3]); underxe <= underxe | (a[2]|b[2]); overxe <= overxe | (a[1]|b[1]); invopxe <= invopxe | (a[0]|b[0]); end else if (fdx) begin inexe <= inexe & !(a[4]|b[4]); dbzxe <= dbzxe & !(a[3]|b[3]); underxe <= underxe & !(a[2]|b[2]); overxe <= overxe & !(a[1]|b[1]); invopxe <= invopxe & !(a[0]|b[0]); end else if (frm) rm <= a[1:0]|b[1:0]; infzerox <= infzerox | (invopxe & infzero); zerozerox <= zerozerox | (invopxe & zerozero); subinfx <= subinfx | (invopxe & subinf); infdivx <= infdivx | (invopxe & infdiv); dbzx <= dbzx | (dbzxe & divByZero); NaNCmpx <= NaNCmpx | (invopxe & nanx & fcmp); // must be a compare sx <= sx | (invopxe & nanx & fcmp) | (invopxe & (infzero|zerozero|subinf|infdiv)) | (dbzxe & divByZero); end // Decompose operands into sign,exponent,mantissa wire sa, sb, sas, sbs; wire [FMSB:0] ma, mb; wire [22:0] mas, mbs; wire aInf, bInf, aInfs, bInfs; wire aNan, bNan, aNans, bNans; wire az, bz, azs, bzs; wire [1:0] rmd4; // 1st stage delayed wire [7:0] op1, op2; wire [5:0] fn1,fn2; wire [MSB:0] zld_o,lood_o; wire [31:0] zls_o,loos_o; fpZLUnit #(64) u6 (.op(op), .fn(fn), .a(a), .b(b), .o(zld_o), .nanx(nanx) ); fpLOOUnit #(64) u7 (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a), .o(loo_o), .done(loos_done) ); fpZLUnit #(32) u6s (.op(op), .fn(fn), .a(a[31:0]), .b(b[31:0]), .o(zls_o), .nanx(nanxs) ); fpLOOUnit #(32) u7s (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a[31:0]), .o(loos_o), .done() ); assign loo_o = single ? loos_o : lood_o; assign zl_o = single ? zls_o : zld_o; fp_decomp #(64) u1 (.i(a), .sgn(sa), .man(ma), .vz(az), .inf(aInf), .nan(aNan) ); fp_decomp #(64) u2 (.i(b), .sgn(sb), .man(mb), .vz(bz), .inf(bInf), .nan(bNan) ); fp_decomp #(32) u1s (.i(a[31:0]), .sgn(sas), .man(mas), .vz(azs), .inf(aInfs), .nan(aNans) ); fp_decomp #(32) u2s (.i(b[31:0]), .sgn(sbs), .man(mbs), .vz(bzs), .inf(bInfs), .nan(bNans) ); delay4 #(2) u3 (.clk(clk), .ce(pipe_ce), .i(rmd), .o(rmd4) ); delay1 #(8) u4 (.clk(clk), .ce(pipe_ce), .i(op), .o(op1) ); delay2 #(8) u5 (.clk(clk), .ce(pipe_ce), .i(op), .o(op2) ); delay1 #(6) u5a (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn1) ); delay2 #(6) u5b (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn2) ); delay5 delay5_3(.clk(clk), .ce(pipe_ce), .i((bz & !aNan & fdiv)|(bzs & !aNans & fdivs)), .o(divByZero) ); // Compute NaN output sign wire aob_nan = aNan|bNan; // one of the operands is a nan wire bothNan = aNan&bNan; // both of the operands are nans wire aob_nans = aNans|bNans; // one of the operands is a nan wire bothNans = aNans&bNans; // both of the operands are nans assign ns = bothNan ? (ma==mb ? sa & sb : ma < mb ? sb : sa) : aNan ? sa : sb; assign nss = bothNans ? (mas==mbs ? sas & sbs : mas < mbs ? sbs : sas) : aNans ? sas : sbs; delay5 u8(.clk(clk), .ce(ce), .i(ns), .o(nso) ); delay5 u9(.clk(clk), .ce(ce), .i(aob_nan), .o(isNan) ); delay5 u8s(.clk(clk), .ce(ce), .i(nss), .o(nsos) ); delay5 u9s(.clk(clk), .ce(ce), .i(aob_nans), .o(isNans) ); wire [MSB:0] fpu_o; wire [MSB+3:0] fpn_o; wire [EX:0] fdiv_o; wire [EX:0] fmul_o; wire [EX:0] fas_o; reg [EX:0] fres; wire [31:0] fpus_o; wire [31+3:0] fpns_o; wire [EXS:0] fdivs_o; wire [EXS:0] fmuls_o; wire [EXS:0] fass_o; reg [EXS:0] fress; wire divUnder,divUnders; wire mulUnder,mulUnders; reg under,unders; // These units have a two clock cycle latency fpAddsub #(64) u10(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a), .b(b), .o(fas_o) ); fpDiv #(64) u11(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a), .b(b), .o(fdiv_o), .sign_exe(), .underflow(divUnder), .done(divDone) ); fpMul #(64) u12(.clk(clk), .ce(pipe_ce), .a(a), .b(b), .o(fmul_o), .sign_exe(), .inf(), .underflow(mulUnder) ); fpAddsub #(32) u10s(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a[31:0]), .b(b[31:0]), .o(fass_o) ); fpDiv #(32) u11s(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a[31:0]), .b(b[31:0]), .o(fdivs_o), .sign_exe(), .underflow(divUnders), .done() ); fpMul #(32) u12s(.clk(clk), .ce(pipe_ce), .a(a[31:0]), .b(b[31:0]), .o(fmuls_o), .sign_exe(), .inf(), .underflow(mulUnders) ); always @(op2,fn2,mulUnder,divUnder,mulUnders,divUnders) case (op2) `FLOAT: case (fn2) `FMUL: under = mulUnder; `FDIV: under = divUnder; `FMULS: unders = mulUnders; `FDIVS: unders = divUnders; default: begin under = 0; unders = 0; end endcase default: begin under = 0; unders = 0; end endcase always @(op2,fn2,fas_o,fmul_o,fdiv_o,fass_o,fmuls_o,fdivs_o) case (op2) `FLOAT: case(fn2) `FADD: fres <= fas_o; `FSUB: fres <= fas_o; `FMUL: fres <= fmul_o; `FDIV: fres <= fdiv_o; `FADDS: fress <= fass_o; `FSUBS: fress <= fass_o; `FMULS: fress <= fmuls_o; `FDIVS: fress <= fdivs_o; default: begin fres <= fas_o; fress <= fass_o; end endcase default: begin fres <= fas_o; fress <= fass_o; end endcase // pipeline stage // one cycle latency fpNormalize #(64) fpn0(.clk(clk), .ce(pipe_ce), .under(under), .i(fres), .o(fpn_o) ); fpNormalize #(32) fpns(.clk(clk), .ce(pipe_ce), .under(unders), .i(fress), .o(fpns_o) ); // pipeline stage // one cycle latency fpRoundReg #(64) fpr0(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpn_o), .o(fpu_o) ); fpRoundReg #(32) fprs(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpns_o), .o(fpus_o) ); wire so = single ? (isNans?nsos:fpus_o[31]): (isNan?nso:fpu_o[63]); //fix: status should be registered assign o = fstat ? { rm, inexe, dbzxe, underxe, overxe, invopxe, nsfp, fractie, raz, 1'b0, so & !zero, !so & !zero, zero, inf, swtx, inex, dbzx, underx, overx, giopx, gx, sx, cvtx, sqrtx, NaNCmpx, infzerox, zerozerox, infdivx, subinfx, snanx } : 'bz; assign o = (!fstat & !single) ? zl_op ? zld_o : loo_op ? lood_o : {so,fpu_o[MSB-1:0]} : 'bz; assign o = (!fstat & single)? zl_op ? zls_o : loo_op ? loos_o : {so,fpus_o[MSB-1:0]} : 'bz; assign zero = single ? fpus_o[30:0]==0 : WID==64 ? fpu_o[62:0]==0 : 0; assign inf = single ? &fpus_o[31:23] && fpus_o[22:0]==0 : WID==64 ? &fpu_o[62:52] && fpu_o[51:0]==0 : 0; assign subinf = single ? fpus_o[31:0]==`QSUBINFS : WID==64 ? fpu_o[63:0]==`QSUBINF : 0; assign infdiv = single ? fpus_o[31:0]==`QINFDIVS : WID==64 ? fpu_o[63:0]==`QINFDIV : 0; assign zerozero = single ? fpus_o[31:0]==`QZEROZEROS : WID==64 ? fpu_o[63:0]==`QZEROZERO : 0; assign infzero = single ? fpus_o[31:0]==`QINFZEROS : WID==64 ? fpu_o[31:0]==`QINFZERO : 0; assign exception = gx; endmodule
Go to most recent revision | Compare with Previous | Blame | View Log