1 |
48 |
alirezamon |
//////////////////////////////////////////////////////////////////////
|
2 |
|
|
// //
|
3 |
|
|
// pfpu32_addsub //
|
4 |
|
|
// //
|
5 |
|
|
// This file is part of the mor1kx project //
|
6 |
|
|
// https://github.com/openrisc/mor1kx //
|
7 |
|
|
// //
|
8 |
|
|
// Description //
|
9 |
|
|
// addition/subtraction pipeline for single precision floating //
|
10 |
|
|
// point numbers //
|
11 |
|
|
// //
|
12 |
|
|
// Author(s): //
|
13 |
|
|
// - Original design (FPU100) - //
|
14 |
|
|
// Jidan Al-eryani, jidan@gmx.net //
|
15 |
|
|
// - Conv. to Verilog and inclusion in OR1200 - //
|
16 |
|
|
// Julius Baxter, julius@opencores.org //
|
17 |
|
|
// - Update for mor1kx, //
|
18 |
|
|
// bug fixing and further development - //
|
19 |
|
|
// Andrey Bacherov, avbacherov@opencores.org //
|
20 |
|
|
// //
|
21 |
|
|
//////////////////////////////////////////////////////////////////////
|
22 |
|
|
// //
|
23 |
|
|
// Copyright (C) 2006, 2010, 2014 //
|
24 |
|
|
// //
|
25 |
|
|
// This source file may be used and distributed without //
|
26 |
|
|
// restriction provided that this copyright statement is not //
|
27 |
|
|
// removed from the file and that any derivative work contains //
|
28 |
|
|
// the original copyright notice and the associated disclaimer. //
|
29 |
|
|
// //
|
30 |
|
|
// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //
|
31 |
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //
|
32 |
|
|
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //
|
33 |
|
|
// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //
|
34 |
|
|
// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //
|
35 |
|
|
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //
|
36 |
|
|
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //
|
37 |
|
|
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //
|
38 |
|
|
// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //
|
39 |
|
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //
|
40 |
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //
|
41 |
|
|
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //
|
42 |
|
|
// POSSIBILITY OF SUCH DAMAGE. //
|
43 |
|
|
//////////////////////////////////////////////////////////////////////
|
44 |
|
|
|
45 |
|
|
`include "mor1kx-defines.v"
|
46 |
|
|
|
47 |
|
|
|
48 |
|
|
module pfpu32_addsub
|
49 |
|
|
(
|
50 |
|
|
input clk,
|
51 |
|
|
input rst,
|
52 |
|
|
input flush_i, // flushe pipe
|
53 |
|
|
input adv_i, // advance pipe
|
54 |
|
|
input start_i, // start add/sub
|
55 |
|
|
input is_sub_i, // 1: substruction, 0: addition
|
56 |
|
|
// input 'a' related values
|
57 |
|
|
input signa_i,
|
58 |
|
|
input [9:0] exp10a_i,
|
59 |
|
|
input [23:0] fract24a_i,
|
60 |
|
|
input infa_i,
|
61 |
|
|
// input 'b' related values
|
62 |
|
|
input signb_i,
|
63 |
|
|
input [9:0] exp10b_i,
|
64 |
|
|
input [23:0] fract24b_i,
|
65 |
|
|
input infb_i,
|
66 |
|
|
// 'a'/'b' related
|
67 |
|
|
input snan_i,
|
68 |
|
|
input qnan_i,
|
69 |
|
|
input anan_sign_i,
|
70 |
|
|
input addsub_agtb_i,
|
71 |
|
|
input addsub_aeqb_i,
|
72 |
|
|
// outputs
|
73 |
|
|
output reg add_rdy_o, // ready
|
74 |
|
|
output reg add_sign_o, // signum
|
75 |
|
|
output reg add_sub_0_o, // flag that actual substruction is performed and result is zero
|
76 |
|
|
output reg [4:0] add_shl_o, // do left shift in align stage
|
77 |
|
|
output reg [9:0] add_exp10shl_o, // exponent for left shift align
|
78 |
|
|
output reg [9:0] add_exp10sh0_o, // exponent for no shift in align
|
79 |
|
|
output reg [27:0] add_fract28_o, // fractional with appended {r,s} bits
|
80 |
|
|
output reg add_inv_o, // invalid operation flag
|
81 |
|
|
output reg add_inf_o, // infinity output reg
|
82 |
|
|
output reg add_snan_o, // signaling NaN output reg
|
83 |
|
|
output reg add_qnan_o, // quiet NaN output reg
|
84 |
|
|
output reg add_anan_sign_o // signum for output nan
|
85 |
|
|
);
|
86 |
|
|
/*
|
87 |
|
|
Any stage's output is registered.
|
88 |
|
|
Definitions:
|
89 |
|
|
s??o_name - "S"tage number "??", "O"utput
|
90 |
|
|
s??t_name - "S"tage number "??", "T"emporary (internally)
|
91 |
|
|
*/
|
92 |
|
|
|
93 |
|
|
/* Stage #1: pre addition / substruction align */
|
94 |
|
|
|
95 |
|
|
// detection of some exceptions
|
96 |
|
|
// inf - inf -> invalid operation; snan output
|
97 |
|
|
wire s1t_inv = infa_i & infb_i &
|
98 |
|
|
(signa_i ^ (is_sub_i ^ signb_i));
|
99 |
|
|
// inf input
|
100 |
|
|
wire s1t_inf_i = infa_i | infb_i;
|
101 |
|
|
|
102 |
|
|
// signums for calculation
|
103 |
|
|
wire s1t_calc_signa = signa_i;
|
104 |
|
|
wire s1t_calc_signb = (signb_i ^ is_sub_i);
|
105 |
|
|
|
106 |
|
|
// not shifted operand and its signum
|
107 |
|
|
wire [23:0] s1t_fract24_nsh =
|
108 |
|
|
addsub_agtb_i ? fract24a_i : fract24b_i;
|
109 |
|
|
|
110 |
|
|
// operand for right shift
|
111 |
|
|
wire [23:0] s1t_fract24_fsh =
|
112 |
|
|
addsub_agtb_i ? fract24b_i : fract24a_i;
|
113 |
|
|
|
114 |
|
|
// shift amount
|
115 |
|
|
wire [9:0] s1t_exp_diff =
|
116 |
|
|
addsub_agtb_i ? (exp10a_i - exp10b_i) :
|
117 |
|
|
(exp10b_i - exp10a_i);
|
118 |
|
|
|
119 |
|
|
// limiter by 31
|
120 |
|
|
wire [4:0] s1t_shr = s1t_exp_diff[4:0] | {5{|s1t_exp_diff[9:5]}};
|
121 |
|
|
|
122 |
|
|
// stage #1 outputs
|
123 |
|
|
// input related
|
124 |
|
|
reg s1o_inv, s1o_inf_i,
|
125 |
|
|
s1o_snan_i, s1o_qnan_i, s1o_anan_i_sign;
|
126 |
|
|
// computation related
|
127 |
|
|
reg s1o_aeqb;
|
128 |
|
|
reg [4:0] s1o_shr;
|
129 |
|
|
reg s1o_sign_nsh;
|
130 |
|
|
reg s1o_op_sub;
|
131 |
|
|
reg [9:0] s1o_exp10c;
|
132 |
|
|
reg [23:0] s1o_fract24_nsh;
|
133 |
|
|
reg [23:0] s1o_fract24_fsh;
|
134 |
|
|
// registering
|
135 |
|
|
always @(posedge clk) begin
|
136 |
|
|
if(adv_i) begin
|
137 |
|
|
// input related
|
138 |
|
|
s1o_inv <= s1t_inv;
|
139 |
|
|
s1o_inf_i <= s1t_inf_i;
|
140 |
|
|
s1o_snan_i <= snan_i;
|
141 |
|
|
s1o_qnan_i <= qnan_i;
|
142 |
|
|
s1o_anan_i_sign <= anan_sign_i;
|
143 |
|
|
// computation related
|
144 |
|
|
s1o_aeqb <= addsub_aeqb_i;
|
145 |
|
|
s1o_shr <= s1t_shr & {5{~s1t_inf_i}};
|
146 |
|
|
s1o_sign_nsh <= addsub_agtb_i ? s1t_calc_signa : s1t_calc_signb;
|
147 |
|
|
s1o_op_sub <= s1t_calc_signa ^ s1t_calc_signb;
|
148 |
|
|
s1o_exp10c <= addsub_agtb_i ? exp10a_i : exp10b_i;
|
149 |
|
|
s1o_fract24_nsh <= s1t_fract24_nsh & {24{~s1t_inf_i}};
|
150 |
|
|
s1o_fract24_fsh <= s1t_fract24_fsh & {24{~s1t_inf_i}};
|
151 |
|
|
end // advance
|
152 |
|
|
end // posedge clock
|
153 |
|
|
|
154 |
|
|
// ready is special case
|
155 |
|
|
reg s1o_ready;
|
156 |
|
|
always @(posedge clk `OR_ASYNC_RST) begin
|
157 |
|
|
if (rst)
|
158 |
|
|
s1o_ready <= 0;
|
159 |
|
|
else if(flush_i)
|
160 |
|
|
s1o_ready <= 0;
|
161 |
|
|
else if(adv_i)
|
162 |
|
|
s1o_ready <= start_i;
|
163 |
|
|
end // posedge clock
|
164 |
|
|
|
165 |
|
|
|
166 |
|
|
/* Stage 2: multiplex and shift */
|
167 |
|
|
|
168 |
|
|
|
169 |
|
|
// shifter
|
170 |
|
|
wire [25:0] s2t_fract26_fsh = {s1o_fract24_fsh,2'd0};
|
171 |
|
|
wire [25:0] s2t_fract26_shr = s2t_fract26_fsh >> s1o_shr;
|
172 |
|
|
|
173 |
|
|
// sticky
|
174 |
|
|
reg s2t_sticky;
|
175 |
|
|
always @(s1o_shr or s1o_fract24_fsh) begin
|
176 |
|
|
case(s1o_shr)
|
177 |
|
|
5'd0, 5'd1, 5'd2 : s2t_sticky = 1'b0; // two added zero bits
|
178 |
|
|
5'd3 : s2t_sticky = s1o_fract24_fsh[0];
|
179 |
|
|
5'd4 : s2t_sticky = |s1o_fract24_fsh[1:0];
|
180 |
|
|
5'd5 : s2t_sticky = |s1o_fract24_fsh[2:0];
|
181 |
|
|
5'd6 : s2t_sticky = |s1o_fract24_fsh[3:0];
|
182 |
|
|
5'd7 : s2t_sticky = |s1o_fract24_fsh[4:0];
|
183 |
|
|
5'd8 : s2t_sticky = |s1o_fract24_fsh[5:0];
|
184 |
|
|
5'd9 : s2t_sticky = |s1o_fract24_fsh[6:0];
|
185 |
|
|
5'd10: s2t_sticky = |s1o_fract24_fsh[7:0];
|
186 |
|
|
5'd11: s2t_sticky = |s1o_fract24_fsh[8:0];
|
187 |
|
|
5'd12: s2t_sticky = |s1o_fract24_fsh[9:0];
|
188 |
|
|
5'd13: s2t_sticky = |s1o_fract24_fsh[10:0];
|
189 |
|
|
5'd14: s2t_sticky = |s1o_fract24_fsh[11:0];
|
190 |
|
|
5'd15: s2t_sticky = |s1o_fract24_fsh[12:0];
|
191 |
|
|
5'd16: s2t_sticky = |s1o_fract24_fsh[13:0];
|
192 |
|
|
5'd17: s2t_sticky = |s1o_fract24_fsh[14:0];
|
193 |
|
|
5'd18: s2t_sticky = |s1o_fract24_fsh[15:0];
|
194 |
|
|
5'd19: s2t_sticky = |s1o_fract24_fsh[16:0];
|
195 |
|
|
5'd20: s2t_sticky = |s1o_fract24_fsh[17:0];
|
196 |
|
|
5'd21: s2t_sticky = |s1o_fract24_fsh[18:0];
|
197 |
|
|
5'd22: s2t_sticky = |s1o_fract24_fsh[19:0];
|
198 |
|
|
5'd23: s2t_sticky = |s1o_fract24_fsh[20:0];
|
199 |
|
|
5'd24: s2t_sticky = |s1o_fract24_fsh[21:0];
|
200 |
|
|
5'd25: s2t_sticky = |s1o_fract24_fsh[22:0];
|
201 |
|
|
default: s2t_sticky = |s1o_fract24_fsh[23:0];
|
202 |
|
|
endcase
|
203 |
|
|
end
|
204 |
|
|
|
205 |
|
|
// add/sub of non-shifted and shifted operands
|
206 |
|
|
wire [27:0] s2t_fract28_shr = {1'b0,s2t_fract26_shr,s2t_sticky};
|
207 |
|
|
|
208 |
|
|
wire [27:0] s2t_fract28_add = {1'b0,s1o_fract24_nsh,3'd0} +
|
209 |
|
|
(s2t_fract28_shr ^ {28{s1o_op_sub}}) +
|
210 |
|
|
{27'd0,s1o_op_sub};
|
211 |
|
|
|
212 |
|
|
|
213 |
|
|
// stage #2 outputs
|
214 |
|
|
// input related
|
215 |
|
|
reg s2o_inv, s2o_inf_i,
|
216 |
|
|
s2o_snan_i, s2o_qnan_i, s2o_anan_i_sign;
|
217 |
|
|
// computational related
|
218 |
|
|
reg s2o_signc;
|
219 |
|
|
reg [9:0] s2o_exp10c;
|
220 |
|
|
reg [26:0] s2o_fract27;
|
221 |
|
|
reg s2o_sub_0; // actual operation is substruction and the result is zero
|
222 |
|
|
reg s2o_sticky; // rounding support
|
223 |
|
|
// registering
|
224 |
|
|
always @(posedge clk) begin
|
225 |
|
|
if(adv_i) begin
|
226 |
|
|
// input related
|
227 |
|
|
s2o_inv <= s1o_inv;
|
228 |
|
|
s2o_inf_i <= s1o_inf_i;
|
229 |
|
|
s2o_snan_i <= s1o_snan_i;
|
230 |
|
|
s2o_qnan_i <= s1o_qnan_i;
|
231 |
|
|
s2o_anan_i_sign <= s1o_anan_i_sign;
|
232 |
|
|
// computation related
|
233 |
|
|
s2o_signc <= s1o_sign_nsh;
|
234 |
|
|
s2o_exp10c <= s1o_exp10c;
|
235 |
|
|
s2o_fract27 <= s2t_fract28_add[27:1];
|
236 |
|
|
s2o_sub_0 <= s1o_aeqb & s1o_op_sub;
|
237 |
|
|
s2o_sticky <= s2t_sticky;
|
238 |
|
|
end // advance
|
239 |
|
|
end // posedge clock
|
240 |
|
|
|
241 |
|
|
// ready is special case
|
242 |
|
|
reg s2o_ready;
|
243 |
|
|
always @(posedge clk `OR_ASYNC_RST) begin
|
244 |
|
|
if (rst)
|
245 |
|
|
s2o_ready <= 0;
|
246 |
|
|
else if(flush_i)
|
247 |
|
|
s2o_ready <= 0;
|
248 |
|
|
else if(adv_i)
|
249 |
|
|
s2o_ready <= s1o_ready;
|
250 |
|
|
end // posedge clock
|
251 |
|
|
|
252 |
|
|
|
253 |
|
|
/* Stage 4: update exponent */
|
254 |
|
|
|
255 |
|
|
|
256 |
|
|
// for possible left shift
|
257 |
|
|
// [26] bit is right shift flag
|
258 |
|
|
reg [4:0] s3t_nlz;
|
259 |
|
|
always @(s2o_fract27) begin
|
260 |
|
|
casez(s2o_fract27)
|
261 |
|
|
27'b1??????????????????????????: s3t_nlz <= 0; // [26] bit: shift right
|
262 |
|
|
27'b01?????????????????????????: s3t_nlz <= 0; // 1 is in place
|
263 |
|
|
27'b001????????????????????????: s3t_nlz <= 1;
|
264 |
|
|
27'b0001???????????????????????: s3t_nlz <= 2;
|
265 |
|
|
27'b00001??????????????????????: s3t_nlz <= 3;
|
266 |
|
|
27'b000001?????????????????????: s3t_nlz <= 4;
|
267 |
|
|
27'b0000001????????????????????: s3t_nlz <= 5;
|
268 |
|
|
27'b00000001???????????????????: s3t_nlz <= 6;
|
269 |
|
|
27'b000000001??????????????????: s3t_nlz <= 7;
|
270 |
|
|
27'b0000000001?????????????????: s3t_nlz <= 8;
|
271 |
|
|
27'b00000000001????????????????: s3t_nlz <= 9;
|
272 |
|
|
27'b000000000001???????????????: s3t_nlz <= 10;
|
273 |
|
|
27'b0000000000001??????????????: s3t_nlz <= 11;
|
274 |
|
|
27'b00000000000001?????????????: s3t_nlz <= 12;
|
275 |
|
|
27'b000000000000001????????????: s3t_nlz <= 13;
|
276 |
|
|
27'b0000000000000001???????????: s3t_nlz <= 14;
|
277 |
|
|
27'b00000000000000001??????????: s3t_nlz <= 15;
|
278 |
|
|
27'b000000000000000001?????????: s3t_nlz <= 16;
|
279 |
|
|
27'b0000000000000000001????????: s3t_nlz <= 17;
|
280 |
|
|
27'b00000000000000000001???????: s3t_nlz <= 18;
|
281 |
|
|
27'b000000000000000000001??????: s3t_nlz <= 19;
|
282 |
|
|
27'b0000000000000000000001?????: s3t_nlz <= 20;
|
283 |
|
|
27'b00000000000000000000001????: s3t_nlz <= 21;
|
284 |
|
|
27'b000000000000000000000001???: s3t_nlz <= 22;
|
285 |
|
|
27'b0000000000000000000000001??: s3t_nlz <= 23;
|
286 |
|
|
27'b00000000000000000000000001?: s3t_nlz <= 24;
|
287 |
|
|
27'b000000000000000000000000001: s3t_nlz <= 25;
|
288 |
|
|
27'b000000000000000000000000000: s3t_nlz <= 0; // zero result
|
289 |
|
|
endcase
|
290 |
|
|
end // always
|
291 |
|
|
|
292 |
|
|
// left shift amount and corrected exponent
|
293 |
|
|
wire [4:0] s3t_nlz_m1 = (s3t_nlz - 5'd1);
|
294 |
|
|
wire [9:0] s3t_exp10c_m1 = s2o_exp10c - 10'd1;
|
295 |
|
|
wire [9:0] s3t_exp10c_mz = s2o_exp10c - {5'd0,s3t_nlz};
|
296 |
|
|
wire [4:0] s3t_shl;
|
297 |
|
|
wire [9:0] s3t_exp10shl;
|
298 |
|
|
assign {s3t_shl,s3t_exp10shl} =
|
299 |
|
|
// shift isn't needed or impossible
|
300 |
|
|
(~(|s3t_nlz) | (s2o_exp10c == 10'd1)) ?
|
301 |
|
|
{5'd0,s2o_exp10c} :
|
302 |
|
|
// normalization is possible
|
303 |
|
|
(s2o_exp10c > s3t_nlz) ? {s3t_nlz,s3t_exp10c_mz} :
|
304 |
|
|
// denormalized cases
|
305 |
|
|
(s2o_exp10c == s3t_nlz) ? {s3t_nlz_m1,10'd1} :
|
306 |
|
|
{s3t_exp10c_m1[4:0],10'd1};
|
307 |
|
|
|
308 |
|
|
|
309 |
|
|
// registering output
|
310 |
|
|
always @(posedge clk) begin
|
311 |
|
|
if(adv_i) begin
|
312 |
|
|
// input related
|
313 |
|
|
add_inv_o <= s2o_inv;
|
314 |
|
|
add_inf_o <= s2o_inf_i;
|
315 |
|
|
add_snan_o <= s2o_snan_i;
|
316 |
|
|
add_qnan_o <= s2o_qnan_i;
|
317 |
|
|
add_anan_sign_o <= s2o_anan_i_sign;
|
318 |
|
|
// computation related
|
319 |
|
|
add_sign_o <= s2o_signc;
|
320 |
|
|
add_sub_0_o <= s2o_sub_0;
|
321 |
|
|
add_shl_o <= s3t_shl;
|
322 |
|
|
add_exp10shl_o <= s3t_exp10shl;
|
323 |
|
|
add_exp10sh0_o <= s2o_exp10c;
|
324 |
|
|
add_fract28_o <= {s2o_fract27,s2o_sticky};
|
325 |
|
|
end // advance
|
326 |
|
|
end // posedge clock
|
327 |
|
|
|
328 |
|
|
// ready is special case
|
329 |
|
|
always @(posedge clk `OR_ASYNC_RST) begin
|
330 |
|
|
if (rst)
|
331 |
|
|
add_rdy_o <= 0;
|
332 |
|
|
else if(flush_i)
|
333 |
|
|
add_rdy_o <= 0;
|
334 |
|
|
else if(adv_i)
|
335 |
|
|
add_rdy_o <= s2o_ready;
|
336 |
|
|
end // posedge clock
|
337 |
|
|
|
338 |
|
|
endmodule // pfpu32_addsub
|