OpenCores
URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpNormalize.v] - Blame information for rev 34

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 6 robfinch
/* ===============================================================
2
        (C) 2006  Robert Finch
3
        All rights reserved.
4
        rob@birdcomputer.ca
5
 
6
        fpNormalize.v
7
                - floating point normalization unit
8
                - two cycle latency
9
                - parameterized width
10
                - IEEE 754 representation
11
 
12
        This source code is free for use and modification for
13
        non-commercial or evaluation purposes, provided this
14
        copyright statement and disclaimer remains present in
15
        the file.
16
 
17
        If you do modify the code, please state the origin and
18
        note that you have modified the code.
19
 
20
        NO WARRANTY.
21
        THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
22
        ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
23
        the entire risk of using the Work.
24
 
25
        IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
26
        ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
27
        WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
28
        RELATIONSHIP WITH THE AUTHOR.
29
 
30
        IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
31
        TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
32
        WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
33
        TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
34
        OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
35
        AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
36
        FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
37
        USE.
38
 
39
 
40
        This unit takes a floating point number in an intermediate
41
        format and normalizes it. No normalization occurs
42
        for NaN's or infinities. The unit has a two cycle latency.
43
 
44
        The mantissa is assumed to start with two whole bits on
45
        the left. The remaining bits are fractional.
46
 
47
        The width of the incoming format is reduced via a generation
48
        of sticky bit in place of the low order fractional bits.
49
 
50
        On an underflowed input, the incoming exponent is assumed
51
        to be negative. A right shift is needed.
52
 
53
        Ref: Webpack 8.2  Spartan3-4 xc3s1000-4ft256
54
        302 LUTs / 166 slices /
55
        550 LUTs / 291 slices / 89 MHz
56
        163 LUTs / 93 slices / 113.6 MHz?
57
=============================================================== */
58
 
59
module fpNormalize(clk, ce, under, i, o);
60
parameter WID = 32;
61
localparam MSB = WID-1;
62
localparam EMSB = WID==80 ? 14 :
63
                  WID==64 ? 10 :
64
                                  WID==52 ? 10 :
65
                                  WID==48 ? 10 :
66
                                  WID==44 ? 10 :
67
                                  WID==42 ? 10 :
68
                                  WID==40 ?  9 :
69
                                  WID==32 ?  7 :
70
                                  WID==24 ?  6 : 4;
71
localparam FMSB = WID==80 ? 63 :
72
                  WID==64 ? 51 :
73
                                  WID==52 ? 39 :
74
                                  WID==48 ? 35 :
75
                                  WID==44 ? 31 :
76
                                  WID==42 ? 29 :
77
                                  WID==40 ? 28 :
78
                                  WID==32 ? 22 :
79
                                  WID==24 ? 15 : 9;
80
 
81
localparam FX = (FMSB+2)*2-1;   // the MSB of the expanded fraction
82
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
83
 
84
input clk;
85
input ce;
86
input under;
87
input [EX:0] i;          // expanded format input
88
output [WID+2:0] o;              // normalized output + guard, sticky and round bits, + 1 whole digit
89
 
90
// variables
91
wire so;
92
 
93
wire so1 = i[EX];               // sign doesn't change
94
 
95
// Since the there are *two* whole digits in the incoming format
96
// the number of whole digits needs to be reduced. If the MSB is
97
// set, then increment the exponent and no shift is needed.
98
wire [EMSB:0] xo;
99
wire [EMSB:0] xo1a = i[EX-1:FX+1];
100
wire xInf = &xo1a & !under;
101
wire incExp1 = !xInf & i[FX];
102
wire [EMSB:0] xo1 = xo1a + incExp1;
103
wire [EMSB:0] xo2;
104
wire xInf1 = &xo1;
105
 
106
// If infinity is reached then set the mantissa to zero
107
wire gbit =  i[FMSB];
108
wire rbit =  i[FMSB-1];
109
wire sbit = |i[FMSB-2:0];
110
// shift mantissa left by one to reduce to a single whole digit
111
// if there is no exponent increment
112
wire [FMSB+3:0] mo;
113
wire [FMSB+3:0] mo1 = xInf1 & incExp1 ? 0 :
114
        incExp1 ? {i[FX:FMSB+1],gbit,rbit,sbit} :               // reduce mantissa size
115
                         {i[FX-1:FMSB+1],gbit,rbit,sbit,1'b0};  // reduce mantissa size
116
wire [FMSB+3:0] mo2;
117
wire [6:0] leadingZeros2;
118
 
119
 
120
cntlz64Reg clz0 (.clk(clk), .ce(ce), .i(mo1), .o(leadingZeros2) );
121
 
122
// compensate for leadingZeros delay
123
wire xInf2;
124
delay1 #(EMSB+1) d2(.clk(clk), .ce(ce), .i(xo1), .o(xo2) );
125
delay1 #(1)      d3(.clk(clk), .ce(ce), .i(xInf1), .o(xInf2) );
126
 
127
// If the exponent underflowed, then the shift direction must be to the
128
// right regardless of mantissa bits; the number is denormalized.
129
// Otherwise the shift direction must be to the left.
130
wire rightOrLeft2;      // 0=left,1=right
131
delay1 #(1) d8(.clk(clk), .ce(ce), .i(under), .o(rightOrLeft2) );
132
 
133
// Compute how much we want to decrement by
134
wire [6:0] lshiftAmt2 = leadingZeros2 > xo2 ? xo2 : leadingZeros2;
135
 
136
// compute amount to shift right
137
// at infinity the exponent can't be incremented, so we can't shift right
138
// otherwise it was an underflow situation so the exponent was negative
139
// shift amount needs to be negated for shift register
140
wire [6:0] rshiftAmt2 = xInf2 ? 0 : -xo2 > FMSB+3 ? FMSB+4 : FMSB+4+xo2;  // xo2 is negative !
141
 
142
 
143
// sign
144
// the output sign is the same as the input sign
145
delay1 #(1)      d7(.clk(clk), .ce(ce), .i(so1), .o(so) );
146
 
147
// exponent
148
//      always @(posedge clk)
149
//              if (ce)
150
assign xo =
151
                xInf2 ? xo2 :           // an infinite exponent is either a NaN or infinity; no need to change
152
                rightOrLeft2 ? 0 :       // on a right shift, the exponent was negative, it's being made to zero
153
                xo2 - lshiftAmt2;       // on a left shift, the exponent can't be decremented below zero
154
 
155
// mantissa
156
delay1 #(FMSB+3) d4(.clk(clk), .ce(ce), .i(mo1), .o(mo2) );
157
 
158
wire [FMSB+3:0] mo2a;
159
shiftAndMask #(FMSB+4) u1 (.op({rightOrLeft2,1'b0}), .a(mo2), .b(rightOrLeft2 ? lshiftAmt2 : rshiftAmt2), .mb(6'd0), .me(FMSB+3), .o(mo2a) );
160
 
161
//      always @(posedge clk)
162
//              if (ce)
163
assign mo = mo2a;//rightOrLeft2 ? mo2 >> rshiftAmt2 : mo2 << lshiftAmt2;
164
 
165
assign o = {so,xo,mo};
166
 
167
endmodule
168
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.