URL https://opencores.org/ocsvn/thor/thor/trunk
Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpUnit.v] - Rev 25

Go to most recent revision | Compare with Previous | Blame | View Log
// ============================================================================
//        __
//   \\__/ o\    (C) 2006,2015  Robert Finch, Stratford
//    \  __ /    All rights reserved.
//     \/_//     robfinch<remove>@finitron.ca
//       ||
//
// This source file is free software: you can redistribute it and/or modify 
// it under the terms of the GNU Lesser General Public License as published 
// by the Free Software Foundation, either version 3 of the License, or     
// (at your option) any later version.                                      
//                                                                          
// This source file is distributed in the hope that it will be useful,      
// but WITHOUT ANY WARRANTY; without even the implied warranty of           
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            
// GNU General Public License for more details.                             
//                                                                          
// You should have received a copy of the GNU General Public License        
// along with this program.  If not, see <http://www.gnu.org/licenses/>.    
//
//
// Thor SuperScalar
//	fpUnit.v
//  - floating point unit
//  - parameterized width
//  - IEEE 754 representation
//
//	NaN Value		Origin
// 31'h7FC00001    - infinity - infinity
// 31'h7FC00002    - infinity / infinity
// 31'h7FC00003    - zero / zero
// 31'h7FC00004    - infinity X zero
//
// Whenever the fpu encounters a NaN input, the NaN is
// passed through to the output.
//
// Ref: Webpack 8.2  Spartan3-4  xc3s1000-4ft256
// 2335 LUTS / 1260 slices / 43.4 MHz
// Ref: Webpack 13.1 Spartan3e   xc3s1200e-4fg320
// 2433 LUTs / 1301 slices / 51.6 MHz
//
// Instr.  Cyc Lat
// fc__    ; 1  0    compare, lt le gt ge eq ne or un
// fabs    ; 1  0     absolute value
// fnabs    ; 1  0     negative absolute value
// fneg    ; 1  0     negate
// fmov    ; 1  0     move
// fman    ; 1  0     get mantissa
// fsign    ; 1  0     get sign
//
// f2i        ; 1  1  convert float to integer
// i2f        ; 1  1  convert integer to float
//
// fadd    ; 1  4    addition
// fsub    ; 1  4  subtraction
// fmul    ; 1  4  multiplication
//
// fdiv    ; 16 4    division
//
// ftx        ; 1  0  trigger fp exception
// fcx        ; 1  0  clear fp exception
// fex        ; 1  0  enable fp exception
// fdx        ; 1  0  disable fp exception
// frm        ; 1  0  set rounding mode
// fstat    ; 1  0  get status register
//
// related integer:
// graf    ; 1  0  get random float (0,1]
//
// ============================================================================
//
`include "..\Thor_defines.v"
 
`define	QINFOS		23'h7FC000		// info
`define	QSUBINFS	31'h7FC00001	// - infinity - infinity
`define QINFDIVS	31'h7FC00002	// - infinity / infinity
`define QZEROZEROS	31'h7FC00003	// - zero / zero
`define QINFZEROS	31'h7FC00004	// - infinity X zero
 
`define	QINFO		52'h7FC000		// info
`define	QSUBINF 	62'h7FF0000000000001	// - infinity - infinity
`define QINFDIV 	62'h7FF0000000000002	// - infinity / infinity
`define QZEROZERO   62'h7FF0000000000003	// - zero / zero
`define QINFZERO	62'h7FF0000000000004	// - infinity X zero
 
module fpUnit(rst, clk, ce, op, fn, ld, a, b, o, exception);
 
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
                  WID==64 ? 10 :
				  WID==52 ? 10 :
				  WID==48 ? 10 :
				  WID==44 ? 10 :
				  WID==42 ? 10 :
				  WID==40 ?  9 :
				  WID==32 ?  7 :
				  WID==24 ?  6 : 4;
localparam FMSB = WID==80 ? 63 :
                  WID==64 ? 51 :
				  WID==52 ? 39 :
				  WID==48 ? 35 :
				  WID==44 ? 31 :
				  WID==42 ? 29 :
				  WID==40 ? 28 :
				  WID==32 ? 22 :
				  WID==24 ? 15 : 9;
localparam EMSBS = 7;
localparam FMSBS = 22;
localparam FX = (FMSB+2)*2-1;	// the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
localparam FXS = (FMSBS+2)*2-1;	// the MSB of the expanded fraction
localparam EXS = FXS + 1 + EMSBS + 1 + 1 - 1;
 
input rst;
input clk;
input ce;
input [7:0] op;
input [5:0] fn;
input ld;
input [MSB:0] a;
input [MSB:0] b;
output tri [MSB:0] o;
output exception;
 
 
//------------------------------------------------------------
// constants
wire infXp = {11{1'b1}};	// value for infinite exponent / nan
wire infXps = {8{1'b1}};
 
// Variables
wire divByZero;			// attempt to divide by zero
wire inf;				// result is infinite (+ or -)
wire zero;				// result is zero (+ or -)
wire ns;		// nan sign
wire nss;
wire nso;
wire nsos;
wire isNan,isNans;
wire nanx,nanxs;
 
// Decode fp operation
wire fstat 	= op==`FLOAT && fn==`FSTAT;	// get status
wire fdiv	= op==`FLOAT && fn==`FDIV;
wire fdivs	= op==`FLOAT && fn==`FDIVS;
wire ftx   	= op==`FLOAT && fn==`FTX;		// trigger exception
wire fcx   	= op==`FLOAT && fn==`FCX;		// clear exception
wire fex	= op==`FLOAT && fn==`FEX;		// enable exception
wire fdx	= op==`FLOAT && fn==`FDX;		// disable exception
wire fcmp	= op==`FLOAT && (fn==`FCMP || fn==`FCMPS);
wire frm	= op==`FLOAT && fn==`FRM;		// set rounding mode
wire single = (op==`FLOAT && fn[5:4]==2'b01) || op==`SINGLE_R;
wire zl_op =  (op==`DOUBLE_R && (fn==`FABS || fn==`FNABS || fn==`FMOV || fn==`FNEG || fn==`FSIGN || fn==`FMAN)) ||
              (op==`FLOAT && fn==`FCMP) ||     
              (op==`SINGLE_R && (fn==`FABSS || fn==`FNABSS || fn==`FMOVS || fn==`FNEGS || fn==`FSIGNS || fn==`FMANS)) ||
              (op==`FLOAT && (fn==`FCMPS))
             ;
wire loo_op = (op==`DOUBLE_R && (fn==`ITOF || fn==`FTOI)) ||
              (op==`SINGLE_R && (fn==`FTOIS || op==`ITOFS));
wire loo_done;
 
wire subinf;
wire zerozero;
wire infzero;
wire infdiv;
 
// floating point control and status
reg [1:0] rm;	// rounding mode
reg inexe;		// inexact exception enable
reg dbzxe;		// divide by zero exception enable
reg underxe;	// underflow exception enable
reg overxe;		// overflow exception enable
reg invopxe;	// invalid operation exception enable
 
reg nsfp;		// non-standard floating point indicator
 
reg fractie;	// fraction inexact
reg raz;		// rounded away from zero
 
reg inex;		// inexact exception
reg dbzx;		// divide by zero exception
reg underx;		// underflow exception
reg overx;		// overflow exception
reg giopx;		// global invalid operation exception
reg sx;			// summary exception
 
reg swtx;		// software triggered exception indicator
 
wire gx = swtx|inex|dbzx|underx|overx|giopx;	// global exception indicator
 
// breakdown of invalid operation exceptions
reg cvtx;		// conversion exception
reg sqrtx;		// squareroot exception
reg NaNCmpx;	// NaN comparison exception
reg infzerox;	// multiply infinity by zero
reg zerozerox;	// division of zero by zero
reg infdivx;	// division of infinities
reg subinfx;	// subtraction of infinities
reg snanx;		// signalling nan
 
wire divDone;
wire pipe_ce = ce & divDone;	// divide must be done in order for pipe to clock
 
always @(posedge clk)
	// reset: disable and clear all exceptions and status
	if (rst) begin
		rm <= 2'b0;			// round nearest even - default rounding mode
		inex <= 1'b0;
		dbzx <= 1'b0;
		underx <= 1'b0;
		overx <= 1'b0;
		giopx <= 1'b0;
		swtx <= 1'b0;
		sx <= 1'b0;
		NaNCmpx <= 1'b0;
 
		inexe <= 1'b0;
		dbzxe <= 1'b0;
		underxe <= 1'b0;
		overxe <= 1'b0;
		invopxe <= 1'b0;
 
		nsfp <= 1'b0;
 
	end
	else if (pipe_ce) begin
		if (ftx) begin
			inex <= inex     | (a[4]|b[4]);
			dbzx <= dbzx     | (a[3]|b[3]);
			underx <= underx | (a[2]|b[2]);
			overx <= overx   | (a[1]|b[1]);
			giopx <= giopx   | (a[0]|b[0]);
			swtx <= 1'b1;
			sx <= 1'b1;
		end
		else if (fcx) begin
			sx <= sx & !(a[5]|b[5]);
			inex <= inex     & !(a[4]|b[4]);
			dbzx <= dbzx     & !(a[3]|b[3]);
			underx <= underx & !(a[2]|b[2]);
			overx <= overx   & !(a[1]|b[1]);
			giopx <= giopx	 & !(a[0]|b[0]);
			// clear exception type when global invalid operation is cleared
			infdivx <= infdivx & !(a[0]|b[0]);
			zerozerox <= zerozerox & !(a[0]|b[0]);
			subinfx   <= subinfx   & !(a[0]|b[0]);
			infzerox  <= infzerox  & !(a[0]|b[0]);
			NaNCmpx   <= NaNCmpx   & !(a[0]|b[0]);
			dbzx <= dbzx & !(a[0]|b[0]);
			swtx <= 1'b1;
		end
		else if (fex) begin
			inexe <= inexe     | (a[4]|b[4]);
			dbzxe <= dbzxe     | (a[3]|b[3]);
			underxe <= underxe | (a[2]|b[2]);
			overxe <= overxe   | (a[1]|b[1]);
			invopxe <= invopxe | (a[0]|b[0]);
		end
		else if (fdx) begin
			inexe <= inexe     & !(a[4]|b[4]);
			dbzxe <= dbzxe     & !(a[3]|b[3]);
			underxe <= underxe & !(a[2]|b[2]);
			overxe <= overxe   & !(a[1]|b[1]);
			invopxe <= invopxe & !(a[0]|b[0]);
		end
		else if (frm)
			rm <= a[1:0]|b[1:0];
 
		infzerox  <= infzerox  | (invopxe & infzero);
		zerozerox <= zerozerox | (invopxe & zerozero);
		subinfx   <= subinfx   | (invopxe & subinf);
		infdivx   <= infdivx   | (invopxe & infdiv);
		dbzx <= dbzx | (dbzxe & divByZero);
		NaNCmpx <= NaNCmpx | (invopxe & nanx & fcmp);	// must be a compare
		sx <= sx |
				(invopxe & nanx & fcmp) |
				(invopxe & (infzero|zerozero|subinf|infdiv)) |
				(dbzxe & divByZero);
	end
 
// Decompose operands into sign,exponent,mantissa
wire sa, sb, sas, sbs;
wire [FMSB:0] ma, mb;
wire [22:0] mas, mbs;
 
wire aInf, bInf, aInfs, bInfs;
wire aNan, bNan, aNans, bNans;
wire az, bz, azs, bzs;
wire [1:0] rmd4;	// 1st stage delayed
wire [7:0] op1, op2;
wire [5:0] fn1,fn2;
 
wire [MSB:0] zld_o,lood_o;
wire [31:0] zls_o,loos_o;
fpZLUnit  #(64) u6 (.op(op), .fn(fn), .a(a), .b(b), .o(zld_o), .nanx(nanx) );
fpLOOUnit #(64) u7 (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a), .o(loo_o), .done(loos_done) );
fpZLUnit  #(32) u6s (.op(op), .fn(fn), .a(a[31:0]), .b(b[31:0]), .o(zls_o), .nanx(nanxs) );
fpLOOUnit #(32) u7s (.clk(clk), .ce(pipe_ce), .rm(rm), .op(op), .fn(fn), .a(a[31:0]), .o(loos_o), .done() );
assign loo_o = single ? loos_o : lood_o;
assign zl_o = single ? zls_o : zld_o;
fp_decomp #(64) u1 (.i(a), .sgn(sa), .man(ma), .vz(az), .inf(aInf), .nan(aNan) );
fp_decomp #(64) u2 (.i(b), .sgn(sb), .man(mb), .vz(bz), .inf(bInf), .nan(bNan) );
fp_decomp #(32) u1s (.i(a[31:0]), .sgn(sas), .man(mas), .vz(azs), .inf(aInfs), .nan(aNans) );
fp_decomp #(32) u2s (.i(b[31:0]), .sgn(sbs), .man(mbs), .vz(bzs), .inf(bInfs), .nan(bNans) );
 
delay4 #(2) u3 (.clk(clk), .ce(pipe_ce), .i(rmd), .o(rmd4) );
delay1 #(8) u4 (.clk(clk), .ce(pipe_ce), .i(op), .o(op1) );
delay2 #(8) u5 (.clk(clk), .ce(pipe_ce), .i(op), .o(op2) );
delay1 #(6) u5a (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn1) );
delay2 #(6) u5b (.clk(clk), .ce(pipe_ce), .i(fn), .o(fn2) );
 
delay5 delay5_3(.clk(clk), .ce(pipe_ce), .i((bz & !aNan & fdiv)|(bzs & !aNans & fdivs)), .o(divByZero) );
 
// Compute NaN output sign
wire aob_nan = aNan|bNan;	// one of the operands is a nan
wire bothNan = aNan&bNan;	// both of the operands are nans
wire aob_nans = aNans|bNans;	// one of the operands is a nan
wire bothNans = aNans&bNans;	// both of the operands are nans
 
assign ns = bothNan ?
				(ma==mb ? sa & sb : ma < mb ? sb : sa) :
		 		aNan ? sa : sb;
assign nss = bothNans ?
                                 (mas==mbs ? sas & sbs : mas < mbs ? sbs : sas) :
                                  aNans ? sas : sbs;
 
delay5 u8(.clk(clk), .ce(ce), .i(ns), .o(nso) );
delay5 u9(.clk(clk), .ce(ce), .i(aob_nan), .o(isNan) );
delay5 u8s(.clk(clk), .ce(ce), .i(nss), .o(nsos) );
delay5 u9s(.clk(clk), .ce(ce), .i(aob_nans), .o(isNans) );
 
wire [MSB:0] fpu_o;
wire [MSB+3:0] fpn_o;
wire [EX:0] fdiv_o;
wire [EX:0] fmul_o;
wire [EX:0] fas_o;
reg  [EX:0] fres;
wire [31:0] fpus_o;
wire [31+3:0] fpns_o;
wire [EXS:0] fdivs_o;
wire [EXS:0] fmuls_o;
wire [EXS:0] fass_o;
reg  [EXS:0] fress;
wire divUnder,divUnders;
wire mulUnder,mulUnders;
reg under,unders;
 
// These units have a two clock cycle latency
fpAddsub #(64) u10(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a), .b(b), .o(fas_o) );
fpDiv    #(64) u11(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a), .b(b), .o(fdiv_o), .sign_exe(), .underflow(divUnder), .done(divDone) );
fpMul    #(64) u12(.clk(clk), .ce(pipe_ce),          .a(a), .b(b), .o(fmul_o), .sign_exe(), .inf(), .underflow(mulUnder) );
fpAddsub #(32) u10s(.clk(clk), .ce(pipe_ce), .rm(rm), .op(op[0]), .a(a[31:0]), .b(b[31:0]), .o(fass_o) );
fpDiv    #(32) u11s(.clk(clk), .ce(pipe_ce), .ld(ld), .a(a[31:0]), .b(b[31:0]), .o(fdivs_o), .sign_exe(), .underflow(divUnders), .done() );
fpMul    #(32) u12s(.clk(clk), .ce(pipe_ce),          .a(a[31:0]), .b(b[31:0]), .o(fmuls_o), .sign_exe(), .inf(), .underflow(mulUnders) );
 
always @(op2,fn2,mulUnder,divUnder,mulUnders,divUnders)
	case (op2)
	`FLOAT:
	    case (fn2)
     	`FMUL:	under = mulUnder;
	    `FDIV:	under = divUnder;
    	`FMULS: unders = mulUnders;
        `FDIVS: unders = divUnders;
	    default: begin under = 0; unders = 0; end
	    endcase
	default:	begin under = 0; unders = 0; end
	endcase
 
always @(op2,fn2,fas_o,fmul_o,fdiv_o,fass_o,fmuls_o,fdivs_o)
	case (op2)
    `FLOAT:
	    case(fn2)
        `FADD:	fres <= fas_o;
        `FSUB:	fres <= fas_o;
        `FMUL:	fres <= fmul_o;
        `FDIV:	fres <= fdiv_o;
    	`FADDS: fress <= fass_o;
        `FSUBS: fress <= fass_o;
        `FMULS: fress <= fmuls_o;
        `FDIVS: fress <= fdivs_o;
    	default:	begin fres <= fas_o; fress <= fass_o; end
        endcase
	default:	begin fres <= fas_o; fress <= fass_o; end
	endcase
 
// pipeline stage
// one cycle latency
fpNormalize #(64) fpn0(.clk(clk), .ce(pipe_ce), .under(under), .i(fres), .o(fpn_o) );
fpNormalize #(32) fpns(.clk(clk), .ce(pipe_ce), .under(unders), .i(fress), .o(fpns_o) );
 
// pipeline stage
// one cycle latency
fpRoundReg #(64) fpr0(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpn_o), .o(fpu_o) );
fpRoundReg #(32) fprs(.clk(clk), .ce(pipe_ce), .rm(rm4), .i(fpns_o), .o(fpus_o) );
 
wire so = single ? (isNans?nsos:fpus_o[31]): (isNan?nso:fpu_o[63]);
 
//fix: status should be registered
assign o = fstat ? {
	rm,
	inexe,
	dbzxe,
	underxe,
	overxe,
	invopxe,
	nsfp,
 
	fractie,
	raz,
	1'b0,
	so & !zero,
	!so & !zero,
	zero,
	inf,
 
	swtx,
	inex,
	dbzx,
	underx,
	overx,
	giopx,
	gx,
	sx,
 
	cvtx,
	sqrtx,
	NaNCmpx,
	infzerox,
	zerozerox,
	infdivx,
	subinfx,
	snanx
	} : 'bz;
 
assign o = (!fstat & !single) ?
    zl_op ? zld_o :
    loo_op ? lood_o : 
    {so,fpu_o[MSB-1:0]} : 'bz;
assign o = (!fstat &  single)?
    zl_op ? zls_o :
    loo_op ? loos_o :
    {so,fpus_o[MSB-1:0]} : 'bz;
assign zero = single ? fpus_o[30:0]==0 : WID==64 ? fpu_o[62:0]==0 : 0;
assign inf = single ? &fpus_o[31:23] && fpus_o[22:0]==0 : WID==64 ? &fpu_o[62:52] && fpu_o[51:0]==0 : 0;
 
assign subinf 	= single ? fpus_o[31:0]==`QSUBINFS : WID==64 ? fpu_o[63:0]==`QSUBINF : 0;
assign infdiv 	= single ? fpus_o[31:0]==`QINFDIVS : WID==64 ? fpu_o[63:0]==`QINFDIV : 0;
assign zerozero = single ? fpus_o[31:0]==`QZEROZEROS : WID==64 ? fpu_o[63:0]==`QZEROZERO : 0;
assign infzero 	= single ? fpus_o[31:0]==`QINFZEROS : WID==64 ? fpu_o[31:0]==`QINFZERO : 0;
 
assign exception = gx;
 
endmodule
Go to most recent revision | Compare with Previous | Blame | View Log
Browse

Tools

Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpUnit.v] - Rev 25