URL https://opencores.org/ocsvn/thor/thor/trunk
Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpNormalize.v] - Rev 6

Compare with Previous | Blame | View Log
/* ===============================================================
	(C) 2006  Robert Finch
	All rights reserved.
	rob@birdcomputer.ca
 
	fpNormalize.v
		- floating point normalization unit
		- two cycle latency
		- parameterized width
		- IEEE 754 representation
 
	This source code is free for use and modification for
	non-commercial or evaluation purposes, provided this
	copyright statement and disclaimer remains present in
	the file.
 
	If you do modify the code, please state the origin and
	note that you have modified the code.
 
	NO WARRANTY.
	THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
	ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
	the entire risk of using the Work.
 
	IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
	ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
	WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
	RELATIONSHIP WITH THE AUTHOR.
 
	IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
	TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
	WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
	TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
	OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
	AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
	FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
	USE.
 
 
	This unit takes a floating point number in an intermediate
	format and normalizes it. No normalization occurs
	for NaN's or infinities. The unit has a two cycle latency.
 
	The mantissa is assumed to start with two whole bits on
	the left. The remaining bits are fractional.
 
	The width of the incoming format is reduced via a generation
	of sticky bit in place of the low order fractional bits.
 
	On an underflowed input, the incoming exponent is assumed
	to be negative. A right shift is needed.
 
	Ref: Webpack 8.2  Spartan3-4 xc3s1000-4ft256
	302 LUTs / 166 slices / 
	550 LUTs / 291 slices / 89 MHz
	163 LUTs / 93 slices / 113.6 MHz?
=============================================================== */
 
module fpNormalize(clk, ce, under, i, o);
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
                  WID==64 ? 10 :
				  WID==52 ? 10 :
				  WID==48 ? 10 :
				  WID==44 ? 10 :
				  WID==42 ? 10 :
				  WID==40 ?  9 :
				  WID==32 ?  7 :
				  WID==24 ?  6 : 4;
localparam FMSB = WID==80 ? 63 :
                  WID==64 ? 51 :
				  WID==52 ? 39 :
				  WID==48 ? 35 :
				  WID==44 ? 31 :
				  WID==42 ? 29 :
				  WID==40 ? 28 :
				  WID==32 ? 22 :
				  WID==24 ? 15 : 9;
 
localparam FX = (FMSB+2)*2-1;	// the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk;
input ce;
input under;
input [EX:0] i;		// expanded format input
output [WID+2:0] o;		// normalized output + guard, sticky and round bits, + 1 whole digit
 
// variables
wire so;
 
wire so1 = i[EX];		// sign doesn't change
 
// Since the there are *two* whole digits in the incoming format
// the number of whole digits needs to be reduced. If the MSB is
// set, then increment the exponent and no shift is needed.
wire [EMSB:0] xo;
wire [EMSB:0] xo1a = i[EX-1:FX+1];
wire xInf = &xo1a & !under;
wire incExp1 = !xInf & i[FX];
wire [EMSB:0] xo1 = xo1a + incExp1;
wire [EMSB:0] xo2;
wire xInf1 = &xo1;
 
// If infinity is reached then set the mantissa to zero
wire gbit =  i[FMSB];
wire rbit =  i[FMSB-1];
wire sbit = |i[FMSB-2:0];
// shift mantissa left by one to reduce to a single whole digit
// if there is no exponent increment
wire [FMSB+3:0] mo;
wire [FMSB+3:0] mo1 = xInf1 & incExp1 ? 0 :
	incExp1 ? {i[FX:FMSB+1],gbit,rbit,sbit} :		// reduce mantissa size
			 {i[FX-1:FMSB+1],gbit,rbit,sbit,1'b0};	// reduce mantissa size
wire [FMSB+3:0] mo2;
wire [6:0] leadingZeros2;
 
 
cntlz64Reg clz0 (.clk(clk), .ce(ce), .i(mo1), .o(leadingZeros2) );
 
// compensate for leadingZeros delay
wire xInf2;
delay1 #(EMSB+1) d2(.clk(clk), .ce(ce), .i(xo1), .o(xo2) );
delay1 #(1)      d3(.clk(clk), .ce(ce), .i(xInf1), .o(xInf2) );
 
// If the exponent underflowed, then the shift direction must be to the
// right regardless of mantissa bits; the number is denormalized.
// Otherwise the shift direction must be to the left.
wire rightOrLeft2;	// 0=left,1=right
delay1 #(1) d8(.clk(clk), .ce(ce), .i(under), .o(rightOrLeft2) );
 
// Compute how much we want to decrement by
wire [6:0] lshiftAmt2 = leadingZeros2 > xo2 ? xo2 : leadingZeros2;
 
// compute amount to shift right
// at infinity the exponent can't be incremented, so we can't shift right
// otherwise it was an underflow situation so the exponent was negative
// shift amount needs to be negated for shift register
wire [6:0] rshiftAmt2 = xInf2 ? 0 : -xo2 > FMSB+3 ? FMSB+4 : FMSB+4+xo2;	// xo2 is negative !
 
 
// sign
// the output sign is the same as the input sign
delay1 #(1)      d7(.clk(clk), .ce(ce), .i(so1), .o(so) );
 
// exponent
//	always @(posedge clk)
//		if (ce)
assign xo =
		xInf2 ? xo2 :		// an infinite exponent is either a NaN or infinity; no need to change
		rightOrLeft2 ? 0 :	// on a right shift, the exponent was negative, it's being made to zero
		xo2 - lshiftAmt2;	// on a left shift, the exponent can't be decremented below zero
 
// mantissa
delay1 #(FMSB+3) d4(.clk(clk), .ce(ce), .i(mo1), .o(mo2) );
 
wire [FMSB+3:0] mo2a;
shiftAndMask #(FMSB+4) u1 (.op({rightOrLeft2,1'b0}), .a(mo2), .b(rightOrLeft2 ? lshiftAmt2 : rshiftAmt2), .mb(6'd0), .me(FMSB+3), .o(mo2a) );
 
//	always @(posedge clk)
//		if (ce)
assign mo = mo2a;//rightOrLeft2 ? mo2 >> rshiftAmt2 : mo2 << lshiftAmt2;
 
assign o = {so,xo,mo};
 
endmodule
Compare with Previous | Blame | View Log
Browse

Tools

Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpNormalize.v] - Rev 6