URL https://opencores.org/ocsvn/thor/thor/trunk

Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpAddsub.v] - Blame information for rev 23

Go to most recent revision | Details | Compare with Previous | View Log


/* ===============================================================
        (C) 2006  Robert Finch
        All rights reserved.
        rob@birdcomputer.ca
 
        fpAddsub.v
                - floating point adder/subtracter
                - two cycle latency
                - can issue every clock cycle
                - parameterized width
                - IEEE 754 representation
 
        This source code is free for use and modification for
        non-commercial or evaluation purposes, provided this
        copyright statement and disclaimer remains present in
        the file.
 
        If you do modify the code, please state the origin and
        note that you have modified the code.
 
        NO WARRANTY.
        THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF
        ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume
        the entire risk of using the Work.
 
        IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
        ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES
        WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR
        RELATIONSHIP WITH THE AUTHOR.
 
        IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU
        TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE
        WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED
        TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS
        OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,
        AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS
        FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED
        USE.
 
        This adder/subtractor handles denormalized numbers.
        It has a two cycle latency.
        The output format is of an internal expanded representation
        in preparation to be fed into a normalization unit, then
        rounding. Basically, it's the same as the regular format
        except the mantissa is doubled in size, the leading two
        bits of which are assumed to be whole bits.
 
        Ref: Webpack 8.2  Spartan3-4 xc3s1000-4ft256
        580 LUTS / 315 slices / 74 MHz
=============================================================== */
 
module fpAddsub(clk, ce, rm, op, a, b, o);
parameter WID = 32;
localparam MSB = WID-1;
localparam EMSB = WID==80 ? 14 :
                  WID==64 ? 10 :
                                  WID==52 ? 10 :
                                  WID==48 ? 10 :
                                  WID==44 ? 10 :
                                  WID==42 ? 10 :
                                  WID==40 ?  9 :
                                  WID==32 ?  7 :
                                  WID==24 ?  6 : 4;
localparam FMSB = WID==80 ? 63 :
                  WID==64 ? 51 :
                                  WID==52 ? 39 :
                                  WID==48 ? 35 :
                                  WID==44 ? 31 :
                                  WID==42 ? 29 :
                                  WID==40 ? 28 :
                                  WID==32 ? 22 :
                                  WID==24 ? 15 : 9;
 
localparam FX = (FMSB+2)*2-1;   // the MSB of the expanded fraction
localparam EX = FX + 1 + EMSB + 1 + 1 - 1;
 
input clk;              // system clock
input ce;               // core clock enable
input [1:0] rm;  // rounding mode
input op;               // operation 0 = add, 1 = subtract
input [WID-1:0] a;       // operand a
input [WID-1:0] b;       // operand b
output [EX:0] o; // output
 
 
// variables
wire so;                        // sign output
wire [EMSB:0] xo;        // de normalized exponent output
reg [EMSB:0] xo1;        // de normalized exponent output
wire [FX:0] mo;  // mantissa output
reg [FX:0] mo1;  // mantissa output
 
assign o = {so,xo,mo};
 
// operands sign,exponent,mantissa
wire sa, sb;
wire [EMSB:0] xa, xb;
wire [FMSB:0] ma, mb;
wire [FMSB+1:0] fracta, fractb;
wire [FMSB+1:0] fracta1, fractb1;
 
// which has greater magnitude ? Used for sign calc
wire xa_gt_xb = xa > xb;
wire xa_gt_xb1;
wire a_gt_b = xa_gt_xb || (xa==xb && ma > mb);
wire a_gt_b1;
wire az, bz;    // operand a,b is zero
 
wire adn, bdn;          // a,b denormalized ?
wire xaInf, xbInf;
wire aInf, bInf, aInf1, bInf1;
wire aNan, bNan, aNan1, bNan1;
 
wire [EMSB:0] xad = xa|adn;      // operand a exponent, compensated for denormalized numbers
wire [EMSB:0] xbd = xb|bdn; // operand b exponent, compensated for denormalized numbers
 
fpDecomp #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .man(ma), .fract(fracta), .xz(adn), .vz(az), .xinf(xaInf), .inf(aInf), .nan(aNan) );
fpDecomp #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .man(mb), .fract(fractb), .xz(bdn), .vz(bz), .xinf(xbInf), .inf(bInf), .nan(bNan) );
 
// Figure out which operation is really needed an add or
// subtract ?
// If the signs are the same, use the orignal op,
// otherwise flip the operation
//  a +  b = add,+
//  a + -b = sub, so of larger
// -a +  b = sub, so of larger
// -a + -b = add,-
//  a -  b = sub, so of larger
//  a - -b = add,+
// -a -  b = add,-
// -a - -b = sub, so of larger
wire realOp = op ^ sa ^ sb;
wire realOp1;
wire op1;
 
// Find out if the result will be zero.
wire resZero = (realOp && xa==xb && ma==mb) ||  // subtract, same magnitude
                           (az & bz);           // both a,b zero
 
// Compute output exponent
//
// The output exponent is the larger of the two exponents,
// unless a subtract operation is in progress and the two
// numbers are equal, in which case the exponent should be
// zero.
 
always @(xaInf,xbInf,resZero,xa,xb,xa_gt_xb)
        xo1 = (xaInf&xbInf) ? xa : resZero ? 0 : xa_gt_xb ? xa : xb;
 
// Compute output sign
reg so1;
always @*
        case ({resZero,sa,op,sb})       // synopsys full_case parallel_case
        4'b0000: so1 <= 0;                       // + + + = +
        4'b0001: so1 <= !a_gt_b;        // + + - = sign of larger
        4'b0010: so1 <= !a_gt_b;        // + - + = sign of larger
        4'b0011: so1 <= 0;                       // + - - = +
        4'b0100: so1 <= a_gt_b;         // - + + = sign of larger
        4'b0101: so1 <= 1;                      // - + - = -
        4'b0110: so1 <= 1;                      // - - + = -
        4'b0111: so1 <= a_gt_b;         // - - - = sign of larger
        4'b1000: so1 <= 0;                       //  A +  B, sign = +
        4'b1001: so1 <= rm==3;          //  A + -B, sign = + unless rounding down
        4'b1010: so1 <= rm==3;          //  A -  B, sign = + unless rounding down
        4'b1011: so1 <= 0;                       // +A - -B, sign = +
        4'b1100: so1 <= rm==3;          // -A +  B, sign = + unless rounding down
        4'b1101: so1 <= 1;                      // -A + -B, sign = -
        4'b1110: so1 <= 1;                      // -A - +B, sign = -
        4'b1111: so1 <= rm==3;          // -A - -B, sign = + unless rounding down
        endcase
 
delay2 #(EMSB+1) d1(.clk(clk), .ce(ce), .i(xo1), .o(xo) );
delay2 #(1)      d2(.clk(clk), .ce(ce), .i(so1), .o(so) );
 
// Compute the difference in exponents, provides shift amount
wire [EMSB:0] xdiff = xa_gt_xb ? xad - xbd : xbd - xad;
wire [6:0] xdif = xdiff > FMSB+3 ? FMSB+3 : xdiff;
wire [6:0] xdif1;
 
// determine which fraction to denormalize
wire [FMSB+1:0] mfs = xa_gt_xb ? fractb : fracta;
wire [FMSB+1:0] mfs1;
 
// Determine the sticky bit
wire sticky, sticky1;
redor64 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );
 
// register inputs to shifter and shift
delay1 #(1)      d16(.clk(clk), .ce(ce), .i(sticky), .o(sticky1) );
delay1 #(7)      d15(.clk(clk), .ce(ce), .i(xdif),   .o(xdif1) );
delay1 #(FMSB+2) d14(.clk(clk), .ce(ce), .i(mfs),    .o(mfs1) );
 
wire [FMSB+3:0] md1 = ({mfs1,2'b0} >> xdif1)|sticky1;
 
// sync control signals
delay1 #(1) d4 (.clk(clk), .ce(ce), .i(xa_gt_xb), .o(xa_gt_xb1) );
delay1 #(1) d17(.clk(clk), .ce(ce), .i(a_gt_b), .o(a_gt_b1) );
delay1 #(1) d5 (.clk(clk), .ce(ce), .i(realOp), .o(realOp1) );
delay1 #(FMSB+2) d5a(.clk(clk), .ce(ce), .i(fracta), .o(fracta1) );
delay1 #(FMSB+2) d6a(.clk(clk), .ce(ce), .i(fractb), .o(fractb1) );
delay1 #(1) d7 (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) );
delay1 #(1) d8 (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) );
delay1 #(1) d9 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) );
delay1 #(1) d10(.clk(clk), .ce(ce), .i(bNan), .o(bNan1) );
delay1 #(1) d11(.clk(clk), .ce(ce), .i(op), .o(op1) );
 
// Sort operands and perform add/subtract
// addition can generate an extra bit, subtract can't go negative
wire [FMSB+3:0] oa = xa_gt_xb1 ? {fracta1,2'b0} : md1;
wire [FMSB+3:0] ob = xa_gt_xb1 ? md1 : {fractb1,2'b0};
wire [FMSB+3:0] oaa = a_gt_b1 ? oa : ob;
wire [FMSB+3:0] obb = a_gt_b1 ? ob : oa;
wire [FMSB+4:0] mab = realOp1 ? oaa - obb : oaa + obb;
 
always @*
        casex({aInf1&bInf1,aNan1,bNan1})
        3'b1xx:         mo1 = {1'b0,op1,{FMSB-1{1'b0}},op1,{FMSB{1'b0}}};       // inf +/- inf - generate QNaN on subtract, inf on add
        3'bx1x:         mo1 = {1'b0,fracta1[FMSB+1:0],{FMSB{1'b0}}};
        3'bxx1:         mo1 = {1'b0,fractb1[FMSB+1:0],{FMSB{1'b0}}};
        default:        mo1 = {mab,{FMSB-2{1'b0}}};     // mab has an extra lead bit and two trailing bits
        endcase
 
delay1 #(FX+1) d3(.clk(clk), .ce(ce), .i(mo1), .o(mo) );
 
endmodule
 

Browse

Tools

Subversion Repositories thor

[/] [thor/] [trunk/] [rtl/] [verilog/] [fpUnit/] [fpAddsub.v] - Blame information for rev 23

Line No.	Rev	Author	Line
1	6	robfinch	`/* ===============================================================`
2			`(C) 2006 Robert Finch`
3			`All rights reserved.`
4			`rob@birdcomputer.ca`
5
6			`fpAddsub.v`
7			`- floating point adder/subtracter`
8			`- two cycle latency`
9			`- can issue every clock cycle`
10			`- parameterized width`
11			`- IEEE 754 representation`
12
13			`This source code is free for use and modification for`
14			`non-commercial or evaluation purposes, provided this`
15			`copyright statement and disclaimer remains present in`
16			`the file.`
17
18			`If you do modify the code, please state the origin and`
19			`note that you have modified the code.`
20
21			`NO WARRANTY.`
22			`THIS Work, IS PROVIDEDED "AS IS" WITH NO WARRANTIES OF`
23			`ANY KIND, WHETHER EXPRESS OR IMPLIED. The user must assume`
24			`the entire risk of using the Work.`
25
26			`IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR`
27			`ANY INCIDENTAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES`
28			`WHATSOEVER RELATING TO THE USE OF THIS WORK, OR YOUR`
29			`RELATIONSHIP WITH THE AUTHOR.`
30
31			`IN ADDITION, IN NO EVENT DOES THE AUTHOR AUTHORIZE YOU`
32			`TO USE THE WORK IN APPLICATIONS OR SYSTEMS WHERE THE`
33			`WORK'S FAILURE TO PERFORM CAN REASONABLY BE EXPECTED`
34			`TO RESULT IN A SIGNIFICANT PHYSICAL INJURY, OR IN LOSS`
35			`OF LIFE. ANY SUCH USE BY YOU IS ENTIRELY AT YOUR OWN RISK,`
36			`AND YOU AGREE TO HOLD THE AUTHOR AND CONTRIBUTORS HARMLESS`
37			`FROM ANY CLAIMS OR LOSSES RELATING TO SUCH UNAUTHORIZED`
38			`USE.`
39
40			`This adder/subtractor handles denormalized numbers.`
41			`It has a two cycle latency.`
42			`The output format is of an internal expanded representation`
43			`in preparation to be fed into a normalization unit, then`
44			`rounding. Basically, it's the same as the regular format`
45			`except the mantissa is doubled in size, the leading two`
46			`bits of which are assumed to be whole bits.`
47
48			`Ref: Webpack 8.2 Spartan3-4 xc3s1000-4ft256`
49			`580 LUTS / 315 slices / 74 MHz`
50			`=============================================================== */`
51
52			`module fpAddsub(clk, ce, rm, op, a, b, o);`
53			`parameter WID = 32;`
54			`localparam MSB = WID-1;`
55			`localparam EMSB = WID==80 ? 14 :`
56			`WID==64 ? 10 :`
57			`WID==52 ? 10 :`
58			`WID==48 ? 10 :`
59			`WID==44 ? 10 :`
60			`WID==42 ? 10 :`
61			`WID==40 ? 9 :`
62			`WID==32 ? 7 :`
63			`WID==24 ? 6 : 4;`
64			`localparam FMSB = WID==80 ? 63 :`
65			`WID==64 ? 51 :`
66			`WID==52 ? 39 :`
67			`WID==48 ? 35 :`
68			`WID==44 ? 31 :`
69			`WID==42 ? 29 :`
70			`WID==40 ? 28 :`
71			`WID==32 ? 22 :`
72			`WID==24 ? 15 : 9;`
73
74			`localparam FX = (FMSB+2)*2-1; // the MSB of the expanded fraction`
75			`localparam EX = FX + 1 + EMSB + 1 + 1 - 1;`
76
77			`input clk; // system clock`
78			`input ce; // core clock enable`
79			`input [1:0] rm; // rounding mode`
80			`input op; // operation 0 = add, 1 = subtract`
81			`input [WID-1:0] a; // operand a`
82			`input [WID-1:0] b; // operand b`
83			`output [EX:0] o; // output`
84
85
86			`// variables`
87			`wire so; // sign output`
88			`wire [EMSB:0] xo; // de normalized exponent output`
89			`reg [EMSB:0] xo1; // de normalized exponent output`
90			`wire [FX:0] mo; // mantissa output`
91			`reg [FX:0] mo1; // mantissa output`
92
93			`assign o = {so,xo,mo};`
94
95			`// operands sign,exponent,mantissa`
96			`wire sa, sb;`
97			`wire [EMSB:0] xa, xb;`
98			`wire [FMSB:0] ma, mb;`
99			`wire [FMSB+1:0] fracta, fractb;`
100			`wire [FMSB+1:0] fracta1, fractb1;`
101
102			`// which has greater magnitude ? Used for sign calc`
103			`wire xa_gt_xb = xa > xb;`
104			`wire xa_gt_xb1;`
105			`wire a_gt_b = xa_gt_xb \|\| (xa==xb && ma > mb);`
106			`wire a_gt_b1;`
107			`wire az, bz; // operand a,b is zero`
108
109			`wire adn, bdn; // a,b denormalized ?`
110			`wire xaInf, xbInf;`
111			`wire aInf, bInf, aInf1, bInf1;`
112			`wire aNan, bNan, aNan1, bNan1;`
113
114			`wire [EMSB:0] xad = xa\|adn; // operand a exponent, compensated for denormalized numbers`
115			`wire [EMSB:0] xbd = xb\|bdn; // operand b exponent, compensated for denormalized numbers`
116
117			`fpDecomp #(WID) u1a (.i(a), .sgn(sa), .exp(xa), .man(ma), .fract(fracta), .xz(adn), .vz(az), .xinf(xaInf), .inf(aInf), .nan(aNan) );`
118			`fpDecomp #(WID) u1b (.i(b), .sgn(sb), .exp(xb), .man(mb), .fract(fractb), .xz(bdn), .vz(bz), .xinf(xbInf), .inf(bInf), .nan(bNan) );`
119
120			`// Figure out which operation is really needed an add or`
121			`// subtract ?`
122			`// If the signs are the same, use the orignal op,`
123			`// otherwise flip the operation`
124			`// a + b = add,+`
125			`// a + -b = sub, so of larger`
126			`// -a + b = sub, so of larger`
127			`// -a + -b = add,-`
128			`// a - b = sub, so of larger`
129			`// a - -b = add,+`
130			`// -a - b = add,-`
131			`// -a - -b = sub, so of larger`
132			`wire realOp = op ^ sa ^ sb;`
133			`wire realOp1;`
134			`wire op1;`
135
136			`// Find out if the result will be zero.`
137			`wire resZero = (realOp && xa==xb && ma==mb) \|\| // subtract, same magnitude`
138			`(az & bz); // both a,b zero`
139
140			`// Compute output exponent`
141			`//`
142			`// The output exponent is the larger of the two exponents,`
143			`// unless a subtract operation is in progress and the two`
144			`// numbers are equal, in which case the exponent should be`
145			`// zero.`
146
147			`always @(xaInf,xbInf,resZero,xa,xb,xa_gt_xb)`
148			`xo1 = (xaInf&xbInf) ? xa : resZero ? 0 : xa_gt_xb ? xa : xb;`
149
150			`// Compute output sign`
151			`reg so1;`
152			`always @*`
153			`case ({resZero,sa,op,sb}) // synopsys full_case parallel_case`
154			`4'b0000: so1 <= 0; // + + + = +`
155			`4'b0001: so1 <= !a_gt_b; // + + - = sign of larger`
156			`4'b0010: so1 <= !a_gt_b; // + - + = sign of larger`
157			`4'b0011: so1 <= 0; // + - - = +`
158			`4'b0100: so1 <= a_gt_b; // - + + = sign of larger`
159			`4'b0101: so1 <= 1; // - + - = -`
160			`4'b0110: so1 <= 1; // - - + = -`
161			`4'b0111: so1 <= a_gt_b; // - - - = sign of larger`
162			`4'b1000: so1 <= 0; // A + B, sign = +`
163			`4'b1001: so1 <= rm==3; // A + -B, sign = + unless rounding down`
164			`4'b1010: so1 <= rm==3; // A - B, sign = + unless rounding down`
165			`4'b1011: so1 <= 0; // +A - -B, sign = +`
166			`4'b1100: so1 <= rm==3; // -A + B, sign = + unless rounding down`
167			`4'b1101: so1 <= 1; // -A + -B, sign = -`
168			`4'b1110: so1 <= 1; // -A - +B, sign = -`
169			`4'b1111: so1 <= rm==3; // -A - -B, sign = + unless rounding down`
170			`endcase`
171
172			`delay2 #(EMSB+1) d1(.clk(clk), .ce(ce), .i(xo1), .o(xo) );`
173			`delay2 #(1) d2(.clk(clk), .ce(ce), .i(so1), .o(so) );`
174
175			`// Compute the difference in exponents, provides shift amount`
176			`wire [EMSB:0] xdiff = xa_gt_xb ? xad - xbd : xbd - xad;`
177			`wire [6:0] xdif = xdiff > FMSB+3 ? FMSB+3 : xdiff;`
178			`wire [6:0] xdif1;`
179
180			`// determine which fraction to denormalize`
181			`wire [FMSB+1:0] mfs = xa_gt_xb ? fractb : fracta;`
182			`wire [FMSB+1:0] mfs1;`
183
184			`// Determine the sticky bit`
185			`wire sticky, sticky1;`
186			`redor64 u1 (.a(xdif), .b({mfs,2'b0}), .o(sticky) );`
187
188			`// register inputs to shifter and shift`
189			`delay1 #(1) d16(.clk(clk), .ce(ce), .i(sticky), .o(sticky1) );`
190			`delay1 #(7) d15(.clk(clk), .ce(ce), .i(xdif), .o(xdif1) );`
191			`delay1 #(FMSB+2) d14(.clk(clk), .ce(ce), .i(mfs), .o(mfs1) );`
192
193			`wire [FMSB+3:0] md1 = ({mfs1,2'b0} >> xdif1)\|sticky1;`
194
195			`// sync control signals`
196			`delay1 #(1) d4 (.clk(clk), .ce(ce), .i(xa_gt_xb), .o(xa_gt_xb1) );`
197			`delay1 #(1) d17(.clk(clk), .ce(ce), .i(a_gt_b), .o(a_gt_b1) );`
198			`delay1 #(1) d5 (.clk(clk), .ce(ce), .i(realOp), .o(realOp1) );`
199			`delay1 #(FMSB+2) d5a(.clk(clk), .ce(ce), .i(fracta), .o(fracta1) );`
200			`delay1 #(FMSB+2) d6a(.clk(clk), .ce(ce), .i(fractb), .o(fractb1) );`
201			`delay1 #(1) d7 (.clk(clk), .ce(ce), .i(aInf), .o(aInf1) );`
202			`delay1 #(1) d8 (.clk(clk), .ce(ce), .i(bInf), .o(bInf1) );`
203			`delay1 #(1) d9 (.clk(clk), .ce(ce), .i(aNan), .o(aNan1) );`
204			`delay1 #(1) d10(.clk(clk), .ce(ce), .i(bNan), .o(bNan1) );`
205			`delay1 #(1) d11(.clk(clk), .ce(ce), .i(op), .o(op1) );`
206
207			`// Sort operands and perform add/subtract`
208			`// addition can generate an extra bit, subtract can't go negative`
209			`wire [FMSB+3:0] oa = xa_gt_xb1 ? {fracta1,2'b0} : md1;`
210			`wire [FMSB+3:0] ob = xa_gt_xb1 ? md1 : {fractb1,2'b0};`
211			`wire [FMSB+3:0] oaa = a_gt_b1 ? oa : ob;`
212			`wire [FMSB+3:0] obb = a_gt_b1 ? ob : oa;`
213			`wire [FMSB+4:0] mab = realOp1 ? oaa - obb : oaa + obb;`
214
215			`always @*`
216			`casex({aInf1&bInf1,aNan1,bNan1})`
217			`3'b1xx: mo1 = {1'b0,op1,{FMSB-1{1'b0}},op1,{FMSB{1'b0}}}; // inf +/- inf - generate QNaN on subtract, inf on add`
218			`3'bx1x: mo1 = {1'b0,fracta1[FMSB+1:0],{FMSB{1'b0}}};`
219			`3'bxx1: mo1 = {1'b0,fractb1[FMSB+1:0],{FMSB{1'b0}}};`
220			`default: mo1 = {mab,{FMSB-2{1'b0}}}; // mab has an extra lead bit and two trailing bits`
221			`endcase`
222
223			`delay1 #(FX+1) d3(.clk(clk), .ce(ce), .i(mo1), .o(mo) );`
224
225			`endmodule`
226