URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpNormalize.v] - Blame information for rev 6

Details | Compare with Previous | View Log


/* ===============================================================
        (C) 2006  Robert Finch
        All rights reserved.
        rob@birdcomputer.ca
 
        fpNormalize.v
                - floating point normalization unit
                - two cycle latency
                - parameterized width
                - IEEE 754 representation
 
        This source code is free for use and modification for
        non-commercial or evaluation purposes, provided this
        copyright statement and disclaimer remains present in
        the file.
 
        If you do modify the code, please state the origin and
        note that you have modified the code.
 
        NO WARRANTY.
        THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
        ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
        the entire risk of using the Work.
 
        IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
        ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
        WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
        RELATIONSHIP WITH THE AUTHOR.
 
        IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
        TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
        WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
        TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
        OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
        AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
        FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
        USE.
 
 
        This unit takes a floating point number in an intermediate
        format and normalizes it. No normalization occurs
        for NaN's or infinities. The unit has a two cycle latency.
 
        The mantissa is assumed to start with two whole bits on
        the left. The remaining bits are fractional.
 
        The width of the incoming format is reduced via a generation
        of sticky bit in place of the low order fractional bits.
 
        On an underflowed input, the incoming exponent is assumed
        to be negative. A right shift is needed.
 
        Ref: Webpack 8.2  Spartan3-4 xc3s1000-4ft256
        302 LUTs / 166 slices /
        550 LUTs / 291 slices / 89 MHz
        163 LUTs / 93 slices / 113.6 MHz?
=============================================================== */
 
module fpNormalize(clk, ce, under, i, o);
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
                  WID==64 ? 10 :
                                  WID==52 ? 10 :
                                  WID==48 ? 10 :
                                  WID==44 ? 10 :
                                  WID==42 ? 10 :
                                  WID==40 ?  9 :
                                  WID==32 ?  7 :
                                  WID==24 ?  6 : 4;
localparam FMSB = WID==80 ? 63 :
                  WID==64 ? 51 :
                                  WID==52 ? 39 :
                                  WID==48 ? 35 :
                                  WID==44 ? 31 :
                                  WID==42 ? 29 :
                                  WID==40 ? 28 :
                                  WID==32 ? 22 :
                                  WID==24 ? 15 : 9;
 
localparam FX = (FMSB+2)*2-1;   // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk;
input ce;
input under;
input [EX:0] i;          // expanded format input
output [WID+2:0] o;              // normalized output + guard, sticky and round bits, + 1 whole digit
 
// variables
wire so;
 
wire so1 = i[EX];               // sign doesn't change
 
// Since the there are *two* whole digits in the incoming format
// the number of whole digits needs to be reduced. If the MSB is
// set, then increment the exponent and no shift is needed.
wire [EMSB:0] xo;
wire [EMSB:0] xo1a = i[EX-1:FX+1];
wire xInf = &xo1a & !under;
wire incExp1 = !xInf & i[FX];
wire [EMSB:0] xo1 = xo1a + incExp1;
wire [EMSB:0] xo2;
wire xInf1 = &xo1;
 
// If infinity is reached then set the mantissa to zero
wire gbit =  i[FMSB];
wire rbit =  i[FMSB-1];
wire sbit = |i[FMSB-2:0];
// shift mantissa left by one to reduce to a single whole digit
// if there is no exponent increment
wire [FMSB+3:0] mo;
wire [FMSB+3:0] mo1 = xInf1 & incExp1 ? 0 :
        incExp1 ? {i[FX:FMSB+1],gbit,rbit,sbit} :               // reduce mantissa size
                         {i[FX-1:FMSB+1],gbit,rbit,sbit,1'b0};  // reduce mantissa size
wire [FMSB+3:0] mo2;
wire [6:0] leadingZeros2;
 
 
cntlz64Reg clz0 (.clk(clk), .ce(ce), .i(mo1), .o(leadingZeros2) );
 
// compensate for leadingZeros delay
wire xInf2;
delay1 #(EMSB+1) d2(.clk(clk), .ce(ce), .i(xo1), .o(xo2) );
delay1 #(1)      d3(.clk(clk), .ce(ce), .i(xInf1), .o(xInf2) );
 
// If the exponent underflowed, then the shift direction must be to the
// right regardless of mantissa bits; the number is denormalized.
// Otherwise the shift direction must be to the left.
wire rightOrLeft2;      // 0=left,1=right
delay1 #(1) d8(.clk(clk), .ce(ce), .i(under), .o(rightOrLeft2) );
 
// Compute how much we want to decrement by
wire [6:0] lshiftAmt2 = leadingZeros2 > xo2 ? xo2 : leadingZeros2;
 
// compute amount to shift right
// at infinity the exponent can't be incremented, so we can't shift right
// otherwise it was an underflow situation so the exponent was negative
// shift amount needs to be negated for shift register
wire [6:0] rshiftAmt2 = xInf2 ? 0 : -xo2 > FMSB+3 ? FMSB+4 : FMSB+4+xo2;  // xo2 is negative !
 
 
// sign
// the output sign is the same as the input sign
delay1 #(1)      d7(.clk(clk), .ce(ce), .i(so1), .o(so) );
 
// exponent
//      always @(posedge clk)
//              if (ce)
assign xo =
                xInf2 ? xo2 :           // an infinite exponent is either a NaN or infinity; no need to change
                rightOrLeft2 ? 0 :       // on a right shift, the exponent was negative, it's being made to zero
                xo2 - lshiftAmt2;       // on a left shift, the exponent can't be decremented below zero
 
// mantissa
delay1 #(FMSB+3) d4(.clk(clk), .ce(ce), .i(mo1), .o(mo2) );
 
wire [FMSB+3:0] mo2a;
shiftAndMask #(FMSB+4) u1 (.op({rightOrLeft2,1'b0}), .a(mo2), .b(rightOrLeft2 ? lshiftAmt2 : rshiftAmt2), .mb(6'd0), .me(FMSB+3), .o(mo2a) );
 
//      always @(posedge clk)
//              if (ce)
assign mo = mo2a;//rightOrLeft2 ? mo2 >> rshiftAmt2 : mo2 << lshiftAmt2;
 
assign o = {so,xo,mo};
 
endmodule
 

Line No.	Rev	Author	Line
1	6	robfinch	`/* ===============================================================`
2			`(C) 2006 Robert Finch`
3			`All rights reserved.`
4			`rob@birdcomputer.ca`
5
6			`fpNormalize.v`
7			`- floating point normalization unit`
8			`- two cycle latency`
9			`- parameterized width`
10			`- IEEE 754 representation`
11
12			`This source code is free for use and modification for`
13			`non-commercial or evaluation purposes, provided this`
14			`copyright statement and disclaimer remains present in`
15			`the file.`
16
17			`If you do modify the code, please state the origin and`
18			`note that you have modified the code.`
19
20			`NO WARRANTY.`
21			`THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF`
22			`ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume`
23			`the entire risk of using the Work.`
24
25			`IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR`
26			`ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES`
27			`WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR`
28			`RELATIONSHIP WITH THE AUTHOR.`
29
30			`IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU`
31			`TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE`
32			`WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED`
33			`TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS`
34			`OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,`
35			`AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS`
36			`FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED`
37			`USE.`
38
39
40			`This unit takes a floating point number in an intermediate`
41			`format and normalizes it. No normalization occurs`
42			`for NaN's or infinities. The unit has a two cycle latency.`
43
44			`The mantissa is assumed to start with two whole bits on`
45			`the left. The remaining bits are fractional.`
46
47			`The width of the incoming format is reduced via a generation`
48			`of sticky bit in place of the low order fractional bits.`
49
50			`On an underflowed input, the incoming exponent is assumed`
51			`to be negative. A right shift is needed.`
52
53			`Ref: Webpack 8.2 Spartan3-4 xc3s1000-4ft256`
54			`302 LUTs / 166 slices /`
55			`550 LUTs / 291 slices / 89 MHz`
56			`163 LUTs / 93 slices / 113.6 MHz?`
57			`=============================================================== */`
58
59			`module fpNormalize(clk, ce, under, i, o);`
60			`parameter WID = 32;`
61			`localparam MSB = WID-1;`
62			`localparam EMSB = WID==80 ? 14 :`
63			`WID==64 ? 10 :`
64			`WID==52 ? 10 :`
65			`WID==48 ? 10 :`
66			`WID==44 ? 10 :`
67			`WID==42 ? 10 :`
68			`WID==40 ? 9 :`
69			`WID==32 ? 7 :`
70			`WID==24 ? 6 : 4;`
71			`localparam FMSB = WID==80 ? 63 :`
72			`WID==64 ? 51 :`
73			`WID==52 ? 39 :`
74			`WID==48 ? 35 :`
75			`WID==44 ? 31 :`
76			`WID==42 ? 29 :`
77			`WID==40 ? 28 :`
78			`WID==32 ? 22 :`
79			`WID==24 ? 15 : 9;`
80
81			`localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction`
82			`localparam EX = FX + 1 + EMSB + 1 + 1 - 1;`
83
84			`input clk;`
85			`input ce;`
86			`input under;`
87			`input [EX:0] i; // expanded format input`
88			`output [WID+2:0] o; // normalized output + guard, sticky and round bits, + 1 whole digit`
89
90			`// variables`
91			`wire so;`
92
93			`wire so1 = i[EX]; // sign doesn't change`
94
95			`// Since the there are two whole digits in the incoming format`
96			`// the number of whole digits needs to be reduced. If the MSB is`
97			`// set, then increment the exponent and no shift is needed.`
98			`wire [EMSB:0] xo;`
99			`wire [EMSB:0] xo1a = i[EX-1:FX+1];`
100			`wire xInf = &xo1a & !under;`
101			`wire incExp1 = !xInf & i[FX];`
102			`wire [EMSB:0] xo1 = xo1a + incExp1;`
103			`wire [EMSB:0] xo2;`
104			`wire xInf1 = &xo1;`
105
106			`// If infinity is reached then set the mantissa to zero`
107			`wire gbit = i[FMSB];`
108			`wire rbit = i[FMSB-1];`
109			`wire sbit = \|i[FMSB-2:0];`
110			`// shift mantissa left by one to reduce to a single whole digit`
111			`// if there is no exponent increment`
112			`wire [FMSB+3:0] mo;`
113			`wire [FMSB+3:0] mo1 = xInf1 & incExp1 ? 0 :`
114			`incExp1 ? {i[FX:FMSB+1],gbit,rbit,sbit} : // reduce mantissa size`
115			`{i[FX-1:FMSB+1],gbit,rbit,sbit,1'b0}; // reduce mantissa size`
116			`wire [FMSB+3:0] mo2;`
117			`wire [6:0] leadingZeros2;`
118
119
120			`cntlz64Reg clz0 (.clk(clk), .ce(ce), .i(mo1), .o(leadingZeros2) );`
121
122			`// compensate for leadingZeros delay`
123			`wire xInf2;`
124			`delay1 #(EMSB+1) d2(.clk(clk), .ce(ce), .i(xo1), .o(xo2) );`
125			`delay1 #(1) d3(.clk(clk), .ce(ce), .i(xInf1), .o(xInf2) );`
126
127			`// If the exponent underflowed, then the shift direction must be to the`
128			`// right regardless of mantissa bits; the number is denormalized.`
129			`// Otherwise the shift direction must be to the left.`
130			`wire rightOrLeft2; // 0=left,1=right`
131			`delay1 #(1) d8(.clk(clk), .ce(ce), .i(under), .o(rightOrLeft2) );`
132
133			`// Compute how much we want to decrement by`
134			`wire [6:0] lshiftAmt2 = leadingZeros2 > xo2 ? xo2 : leadingZeros2;`
135
136			`// compute amount to shift right`
137			`// at infinity the exponent can't be incremented, so we can't shift right`
138			`// otherwise it was an underflow situation so the exponent was negative`
139			`// shift amount needs to be negated for shift register`
140			`wire [6:0] rshiftAmt2 = xInf2 ? 0 : -xo2 > FMSB+3 ? FMSB+4 : FMSB+4+xo2; // xo2 is negative !`
141
142
143			`// sign`
144			`// the output sign is the same as the input sign`
145			`delay1 #(1) d7(.clk(clk), .ce(ce), .i(so1), .o(so) );`
146
147			`// exponent`
148			`// always @(posedge clk)`
149			`// if (ce)`
150			`assign xo =`
151			`xInf2 ? xo2 : // an infinite exponent is either a NaN or infinity; no need to change`
152			`rightOrLeft2 ? 0 : // on a right shift, the exponent was negative, it's being made to zero`
153			`xo2 - lshiftAmt2; // on a left shift, the exponent can't be decremented below zero`
154
155			`// mantissa`
156			`delay1 #(FMSB+3) d4(.clk(clk), .ce(ce), .i(mo1), .o(mo2) );`
157
158			`wire [FMSB+3:0] mo2a;`
159			`shiftAndMask #(FMSB+4) u1 (.op({rightOrLeft2,1'b0}), .a(mo2), .b(rightOrLeft2 ? lshiftAmt2 : rshiftAmt2), .mb(6'd0), .me(FMSB+3), .o(mo2a) );`
160
161			`// always @(posedge clk)`
162			`// if (ce)`
163			`assign mo = mo2a;//rightOrLeft2 ? mo2 >> rshiftAmt2 : mo2 << lshiftAmt2;`
164
165			`assign o = {so,xo,mo};`
166
167			`endmodule`
168

Browse

Tools

Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpNormalize.v] - Blame information for rev 6