OpenCores
URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpAddsub.v] - Blame information for rev 23

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 6 robfinch
/* ===============================================================
2
        (C) 2006  Robert Finch
3
        All rights reserved.
4
        rob@birdcomputer.ca
5
 
6
        fpAddsub.v
7
                - floating point adder/subtracter
8
                - two cycle latency
9
                - can issue every clock cycle
10
                - parameterized width
11
                - IEEE 754 representation
12
 
13
        This source code is free for use and modification for
14
        non-commercial or evaluation purposes, provided this
15
        copyright statement and disclaimer remains present in
16
        the file.
17
 
18
        If you do modify the code, please state the origin and
19
        note that you have modified the code.
20
 
21
        NO WARRANTY.
22
        THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
23
        ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
24
        the entire risk of using the Work.
25
 
26
        IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
27
        ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
28
        WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
29
        RELATIONSHIP WITH THE AUTHOR.
30
 
31
        IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
32
        TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
33
        WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
34
        TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
35
        OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
36
        AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
37
        FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
38
        USE.
39
 
40
        This adder/subtractor handles denormalized numbers.
41
        It has a two cycle latency.
42
        The output format is of an internal expanded representation
43
        in preparation to be fed into a normalization unit, then
44
        rounding. Basically, it's the same as the regular format
45
        except the mantissa is doubled in size, the leading two
46
        bits of which are assumed to be whole bits.
47
 
48
        Ref: Webpack 8.2  Spartan3-4 xc3s1000-4ft256
49
        580 LUTS / 315 slices / 74 MHz
50
=============================================================== */
51
 
52
module fpAddsub(clk, ce, rm, op, a, b, o);
53
parameter WID = 32;
54
localparam MSB = WID-1;
55
localparam EMSB = WID==80 ? 14 :
56
                  WID==64 ? 10 :
57
                                  WID==52 ? 10 :
58
                                  WID==48 ? 10 :
59
                                  WID==44 ? 10 :
60
                                  WID==42 ? 10 :
61
                                  WID==40 ?  9 :
62
                                  WID==32 ?  7 :
63
                                  WID==24 ?  6 : 4;
64
localparam FMSB = WID==80 ? 63 :
65
                  WID==64 ? 51 :
66
                                  WID==52 ? 39 :
67
                                  WID==48 ? 35 :
68
                                  WID==44 ? 31 :
69
                                  WID==42 ? 29 :
70
                                  WID==40 ? 28 :
71
                                  WID==32 ? 22 :
72
                                  WID==24 ? 15 : 9;
73
 
74
localparam FX = (FMSB+2)*2-1;   // the MSB of the expanded fraction
75
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
76
 
77
input clk;              // system clock
78
input ce;               // core clock enable
79
input [1:0] rm;  // rounding mode
80
input op;               // operation 0 = add, 1 = subtract
81
input [WID-1:0] a;       // operand a
82
input [WID-1:0] b;       // operand b
83
output [EX:0] o; // output
84
 
85
 
86
// variables
87
wire so;                        // sign output
88
wire [EMSB:0] xo;        // de normalized exponent output
89
reg [EMSB:0] xo1;        // de normalized exponent output
90
wire [FX:0] mo;  // mantissa output
91
reg [FX:0] mo1;  // mantissa output
92
 
93
assign o = {so,xo,mo};
94
 
95
// operands sign,exponent,mantissa
96
wire sa, sb;
97
wire [EMSB:0] xa, xb;
98
wire [FMSB:0] ma, mb;
99
wire [FMSB+1:0] fracta, fractb;
100
wire [FMSB+1:0] fracta1, fractb1;
101
 
102
// which has greater magnitude ? Used for sign calc
103
wire xa_gt_xb = xa > xb;
104
wire xa_gt_xb1;
105
wire a_gt_b = xa_gt_xb || (xa==xb && ma > mb);
106
wire a_gt_b1;
107
wire az, bz;    // operand a,b is zero
108
 
109
wire adn, bdn;          // a,b denormalized ?
110
wire xaInf, xbInf;
111
wire aInf, bInf, aInf1, bInf1;
112
wire aNan, bNan, aNan1, bNan1;
113
 
114
wire [EMSB:0] xad = xa|adn;      // operand a exponent, compensated for denormalized numbers
115
wire [EMSB:0] xbd = xb|bdn; // operand b exponent, compensated for denormalized numbers
116
 
117
fpDecomp #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .man(ma), .fract(fracta), .xz(adn), .vz(az), .xinf(xaInf), .inf(aInf), .nan(aNan) );
118
fpDecomp #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .man(mb), .fract(fractb), .xz(bdn), .vz(bz), .xinf(xbInf), .inf(bInf), .nan(bNan) );
119
 
120
// Figure out which operation is really needed an add or
121
// subtract ?
122
// If the signs are the same, use the orignal op,
123
// otherwise flip the operation
124
//  a +  b = add,+
125
//  a + -b = sub, so of larger
126
// -a +  b = sub, so of larger
127
// -a + -b = add,-
128
//  a -  b = sub, so of larger
129
//  a - -b = add,+
130
// -a -  b = add,-
131
// -a - -b = sub, so of larger
132
wire realOp = op ^ sa ^ sb;
133
wire realOp1;
134
wire op1;
135
 
136
// Find out if the result will be zero.
137
wire resZero = (realOp && xa==xb && ma==mb) ||  // subtract, same magnitude
138
                           (az & bz);           // both a,b zero
139
 
140
// Compute output exponent
141
//
142
// The output exponent is the larger of the two exponents,
143
// unless a subtract operation is in progress and the two
144
// numbers are equal, in which case the exponent should be
145
// zero.
146
 
147
always @(xaInf,xbInf,resZero,xa,xb,xa_gt_xb)
148
        xo1 = (xaInf&xbInf) ? xa : resZero ? 0 : xa_gt_xb ? xa : xb;
149
 
150
// Compute output sign
151
reg so1;
152
always @*
153
        case ({resZero,sa,op,sb})       // synopsys full_case parallel_case
154
        4'b0000: so1 <= 0;                       // + + + = +
155
        4'b0001: so1 <= !a_gt_b;        // + + - = sign of larger
156
        4'b0010: so1 <= !a_gt_b;        // + - + = sign of larger
157
        4'b0011: so1 <= 0;                       // + - - = +
158
        4'b0100: so1 <= a_gt_b;         // - + + = sign of larger
159
        4'b0101: so1 <= 1;                      // - + - = -
160
        4'b0110: so1 <= 1;                      // - - + = -
161
        4'b0111: so1 <= a_gt_b;         // - - - = sign of larger
162
        4'b1000: so1 <= 0;                       //  A +  B, sign = +
163
        4'b1001: so1 <= rm==3;          //  A + -B, sign = + unless rounding down
164
        4'b1010: so1 <= rm==3;          //  A -  B, sign = + unless rounding down
165
        4'b1011: so1 <= 0;                       // +A - -B, sign = +
166
        4'b1100: so1 <= rm==3;          // -A +  B, sign = + unless rounding down
167
        4'b1101: so1 <= 1;                      // -A + -B, sign = -
168
        4'b1110: so1 <= 1;                      // -A - +B, sign = -
169
        4'b1111: so1 <= rm==3;          // -A - -B, sign = + unless rounding down
170
        endcase
171
 
172
delay2 #(EMSB+1) d1(.clk(clk), .ce(ce), .i(xo1), .o(xo) );
173
delay2 #(1)      d2(.clk(clk), .ce(ce), .i(so1), .o(so) );
174
 
175
// Compute the difference in exponents, provides shift amount
176
wire [EMSB:0] xdiff = xa_gt_xb ? xad - xbd : xbd - xad;
177
wire [6:0] xdif = xdiff > FMSB+3 ? FMSB+3 : xdiff;
178
wire [6:0] xdif1;
179
 
180
// determine which fraction to denormalize
181
wire [FMSB+1:0] mfs = xa_gt_xb ? fractb : fracta;
182
wire [FMSB+1:0] mfs1;
183
 
184
// Determine the sticky bit
185
wire sticky, sticky1;
186
redor64 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
187
 
188
// register inputs to shifter and shift
189
delay1 #(1)      d16(.clk(clk), .ce(ce), .i(sticky), .o(sticky1) );
190
delay1 #(7)      d15(.clk(clk), .ce(ce), .i(xdif),   .o(xdif1) );
191
delay1 #(FMSB+2) d14(.clk(clk), .ce(ce), .i(mfs),    .o(mfs1) );
192
 
193
wire [FMSB+3:0] md1 = ({mfs1,2'b0} >> xdif1)|sticky1;
194
 
195
// sync control signals
196
delay1 #(1) d4 (.clk(clk), .ce(ce), .i(xa_gt_xb), .o(xa_gt_xb1) );
197
delay1 #(1) d17(.clk(clk), .ce(ce), .i(a_gt_b), .o(a_gt_b1) );
198
delay1 #(1) d5 (.clk(clk), .ce(ce), .i(realOp), .o(realOp1) );
199
delay1 #(FMSB+2) d5a(.clk(clk), .ce(ce), .i(fracta), .o(fracta1) );
200
delay1 #(FMSB+2) d6a(.clk(clk), .ce(ce), .i(fractb), .o(fractb1) );
201
delay1 #(1) d7 (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) );
202
delay1 #(1) d8 (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) );
203
delay1 #(1) d9 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) );
204
delay1 #(1) d10(.clk(clk), .ce(ce), .i(bNan), .o(bNan1) );
205
delay1 #(1) d11(.clk(clk), .ce(ce), .i(op), .o(op1) );
206
 
207
// Sort operands and perform add/subtract
208
// addition can generate an extra bit, subtract can't go negative
209
wire [FMSB+3:0] oa = xa_gt_xb1 ? {fracta1,2'b0} : md1;
210
wire [FMSB+3:0] ob = xa_gt_xb1 ? md1 : {fractb1,2'b0};
211
wire [FMSB+3:0] oaa = a_gt_b1 ? oa : ob;
212
wire [FMSB+3:0] obb = a_gt_b1 ? ob : oa;
213
wire [FMSB+4:0] mab = realOp1 ? oaa - obb : oaa + obb;
214
 
215
always @*
216
        casex({aInf1&bInf1,aNan1,bNan1})
217
        3'b1xx:         mo1 = {1'b0,op1,{FMSB-1{1'b0}},op1,{FMSB{1'b0}}};       // inf +/- inf - generate QNaN on subtract, inf on add
218
        3'bx1x:         mo1 = {1'b0,fracta1[FMSB+1:0],{FMSB{1'b0}}};
219
        3'bxx1:         mo1 = {1'b0,fractb1[FMSB+1:0],{FMSB{1'b0}}};
220
        default:        mo1 = {mab,{FMSB-2{1'b0}}};     // mab has an extra lead bit and two trailing bits
221
        endcase
222
 
223
delay1 #(FX+1) d3(.clk(clk), .ce(ce), .i(mo1), .o(mo) );
224
 
225
endmodule
226
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.