1 |
350 |
julius |
//////////////////////////////////////////////////////////////////////
|
2 |
|
|
//// ////
|
3 |
|
|
//// or1200_fpu_post_norm_div ////
|
4 |
|
|
//// ////
|
5 |
|
|
//// This file is part of the OpenRISC 1200 project ////
|
6 |
|
|
//// http://opencores.org/project,or1k ////
|
7 |
|
|
//// ////
|
8 |
|
|
//// Description ////
|
9 |
|
|
//// post-normalization entity for the division unit ////
|
10 |
|
|
//// ////
|
11 |
|
|
//// To Do: ////
|
12 |
|
|
//// ////
|
13 |
|
|
//// ////
|
14 |
|
|
//// Author(s): ////
|
15 |
|
|
//// - Original design (FPU100) - ////
|
16 |
|
|
//// Jidan Al-eryani, jidan@gmx.net ////
|
17 |
|
|
//// - Conv. to Verilog and inclusion in OR1200 - ////
|
18 |
|
|
//// Julius Baxter, julius@opencores.org ////
|
19 |
|
|
//// ////
|
20 |
|
|
//////////////////////////////////////////////////////////////////////
|
21 |
|
|
//
|
22 |
|
|
// Copyright (C) 2006, 2010
|
23 |
|
|
//
|
24 |
|
|
// This source file may be used and distributed without
|
25 |
|
|
// restriction provided that this copyright statement is not
|
26 |
|
|
// removed from the file and that any derivative work contains
|
27 |
|
|
// the original copyright notice and the associated disclaimer.
|
28 |
|
|
//
|
29 |
|
|
// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
|
30 |
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
31 |
|
|
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
32 |
|
|
// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR
|
33 |
|
|
// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
34 |
|
|
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
35 |
|
|
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
36 |
|
|
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
37 |
|
|
// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
38 |
|
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
39 |
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
40 |
|
|
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
41 |
|
|
// POSSIBILITY OF SUCH DAMAGE.
|
42 |
|
|
//
|
43 |
|
|
|
44 |
|
|
|
45 |
|
|
module or1200_fpu_post_norm_div
|
46 |
|
|
(
|
47 |
|
|
clk_i,
|
48 |
|
|
opa_i,
|
49 |
|
|
opb_i,
|
50 |
|
|
qutnt_i,
|
51 |
|
|
rmndr_i,
|
52 |
|
|
exp_10_i,
|
53 |
|
|
sign_i,
|
54 |
|
|
rmode_i,
|
55 |
|
|
output_o,
|
56 |
|
|
ine_o
|
57 |
|
|
);
|
58 |
|
|
|
59 |
|
|
parameter FP_WIDTH = 32;
|
60 |
|
|
parameter MUL_SERIAL = 0; // 0 for parallel multiplier, 1 for serial
|
61 |
|
|
parameter MUL_COUNT = 11; //11 for parallel multiplier, 34 for serial
|
62 |
|
|
parameter FRAC_WIDTH = 23;
|
63 |
|
|
parameter EXP_WIDTH = 8;
|
64 |
|
|
parameter ZERO_VECTOR = 31'd0;
|
65 |
|
|
parameter INF = 31'b1111111100000000000000000000000;
|
66 |
|
|
parameter QNAN = 31'b1111111110000000000000000000000;
|
67 |
|
|
parameter SNAN = 31'b1111111100000000000000000000001;
|
68 |
|
|
|
69 |
|
|
input clk_i;
|
70 |
|
|
input [FP_WIDTH-1:0] opa_i;
|
71 |
|
|
input [FP_WIDTH-1:0] opb_i;
|
72 |
|
|
input [FRAC_WIDTH+3:0] qutnt_i;
|
73 |
|
|
input [FRAC_WIDTH+3:0] rmndr_i;
|
74 |
|
|
input [EXP_WIDTH+1:0] exp_10_i;
|
75 |
|
|
input sign_i;
|
76 |
|
|
input [1:0] rmode_i;
|
77 |
|
|
output reg [FP_WIDTH-1:0] output_o;
|
78 |
|
|
output reg ine_o;
|
79 |
|
|
|
80 |
|
|
// input&output register wires
|
81 |
|
|
reg [FP_WIDTH-1:0] s_opa_i;
|
82 |
|
|
reg [FP_WIDTH-1:0] s_opb_i;
|
83 |
|
|
reg [EXP_WIDTH-1:0] s_expa;
|
84 |
|
|
reg [EXP_WIDTH-1:0] s_expb;
|
85 |
|
|
reg [FRAC_WIDTH+3:0] s_qutnt_i;
|
86 |
|
|
reg [FRAC_WIDTH+3:0] s_rmndr_i;
|
87 |
|
|
reg [5:0] s_r_zeros;
|
88 |
|
|
reg [EXP_WIDTH+1:0] s_exp_10_i;
|
89 |
|
|
reg s_sign_i;
|
90 |
|
|
reg [1:0] s_rmode_i;
|
91 |
|
|
wire [FP_WIDTH-1:0] s_output_o;
|
92 |
|
|
|
93 |
|
|
wire s_ine_o, s_overflow;
|
94 |
|
|
wire s_opa_dn, s_opb_dn;
|
95 |
|
|
wire s_qutdn;
|
96 |
|
|
wire [9:0] s_exp_10b;
|
97 |
|
|
reg [5:0] s_shr1;
|
98 |
|
|
reg [5:0] s_shl1;
|
99 |
|
|
wire s_shr2;
|
100 |
|
|
reg [8:0] s_expo1;
|
101 |
|
|
wire [8:0] s_expo2;
|
102 |
|
|
reg [8:0] s_expo3;
|
103 |
|
|
reg [26:0] s_fraco1;
|
104 |
|
|
wire [24:0] s_frac_rnd;
|
105 |
|
|
reg [24:0] s_fraco2;
|
106 |
|
|
wire s_guard, s_round, s_sticky, s_roundup;
|
107 |
|
|
wire s_lost;
|
108 |
|
|
wire s_op_0, s_opab_0, s_opb_0;
|
109 |
|
|
wire s_infa, s_infb;
|
110 |
|
|
wire s_nan_in, s_nan_op, s_nan_a, s_nan_b;
|
111 |
|
|
wire s_inf_result;
|
112 |
|
|
|
113 |
|
|
always @(posedge clk_i)
|
114 |
|
|
begin
|
115 |
|
|
s_opa_i <= opa_i;
|
116 |
|
|
s_opb_i <= opb_i;
|
117 |
|
|
s_expa <= opa_i[30:23];
|
118 |
|
|
s_expb <= opb_i[30:23];
|
119 |
|
|
s_qutnt_i <= qutnt_i;
|
120 |
|
|
s_rmndr_i <= rmndr_i;
|
121 |
|
|
s_exp_10_i <= exp_10_i;
|
122 |
|
|
s_sign_i <= sign_i;
|
123 |
|
|
s_rmode_i <= rmode_i;
|
124 |
|
|
end
|
125 |
|
|
|
126 |
|
|
// Output Register
|
127 |
|
|
always @(posedge clk_i)
|
128 |
|
|
begin
|
129 |
|
|
output_o <= s_output_o;
|
130 |
|
|
ine_o <= s_ine_o;
|
131 |
|
|
end
|
132 |
|
|
|
133 |
|
|
// qutnt_i
|
134 |
|
|
// 26 25 3
|
135 |
|
|
// | | |
|
136 |
|
|
// h fffffffffffffffffffffff grs
|
137 |
|
|
|
138 |
|
|
//*** Stage 1 ****
|
139 |
|
|
// figure out the exponent and how far the fraction has to be shifted
|
140 |
|
|
// right or left
|
141 |
|
|
|
142 |
|
|
assign s_opa_dn = !(|s_expa) & (|opa_i[22:0]);
|
143 |
|
|
assign s_opb_dn = !(|s_expb) & (|opb_i[22:0]);
|
144 |
|
|
|
145 |
|
|
assign s_qutdn = !s_qutnt_i[26];
|
146 |
|
|
|
147 |
|
|
assign s_exp_10b = s_exp_10_i - {9'd0,s_qutdn};
|
148 |
|
|
|
149 |
|
|
wire [9:0] v_shr;
|
150 |
|
|
wire [9:0] v_shl;
|
151 |
|
|
|
152 |
|
|
assign v_shr = (s_exp_10b[9] | !(|s_exp_10b)) ?
|
153 |
|
|
(10'd1 - s_exp_10b) - s_qutdn : 0;
|
154 |
|
|
|
155 |
|
|
assign v_shl = (s_exp_10b[9] | !(|s_exp_10b)) ?
|
156 |
|
|
|
157 |
|
|
s_exp_10b[8] ?
|
158 |
|
|
|
159 |
|
|
|
160 |
|
|
always @(posedge clk_i)
|
161 |
|
|
if (s_exp_10b[9] | !(|s_exp_10b))
|
162 |
|
|
s_expo1 <= 9'd1;
|
163 |
|
|
else
|
164 |
|
|
s_expo1 <= s_exp_10b[8:0];
|
165 |
|
|
|
166 |
|
|
always @(posedge clk_i)
|
167 |
|
|
s_shr1 <= v_shr[6] ? 6'b111111 : v_shr[5:0];
|
168 |
|
|
|
169 |
|
|
always @(posedge clk_i)
|
170 |
|
|
s_shl1 <= v_shl[5:0];
|
171 |
|
|
|
172 |
|
|
// *** Stage 2 ***
|
173 |
|
|
// Shifting the fraction and rounding
|
174 |
|
|
|
175 |
|
|
// shift the fraction
|
176 |
|
|
always @(posedge clk_i)
|
177 |
|
|
if (|s_shr1)
|
178 |
|
|
s_fraco1 <= s_qutnt_i >> s_shr1;
|
179 |
|
|
else
|
180 |
|
|
s_fraco1 <= s_qutnt_i << s_shl1;
|
181 |
|
|
|
182 |
|
|
assign s_expo2 = s_fraco1[26] ? s_expo1 : s_expo1 - 9'd1;
|
183 |
|
|
|
184 |
|
|
//s_r_zeros <= count_r_zeros(s_qutnt_i);
|
185 |
|
|
always @(s_qutnt_i)
|
186 |
|
|
casex(s_qutnt_i) // synopsys full_case parallel_case
|
187 |
|
|
27'b??????????????????????????1: s_r_zeros <= 0;
|
188 |
|
|
27'b?????????????????????????10: s_r_zeros <= 1;
|
189 |
|
|
27'b????????????????????????100: s_r_zeros <= 2;
|
190 |
|
|
27'b???????????????????????1000: s_r_zeros <= 3;
|
191 |
|
|
27'b??????????????????????10000: s_r_zeros <= 4;
|
192 |
|
|
27'b?????????????????????100000: s_r_zeros <= 5;
|
193 |
|
|
27'b????????????????????1000000: s_r_zeros <= 6;
|
194 |
|
|
27'b???????????????????10000000: s_r_zeros <= 7;
|
195 |
|
|
27'b??????????????????100000000: s_r_zeros <= 8;
|
196 |
|
|
27'b?????????????????1000000000: s_r_zeros <= 9;
|
197 |
|
|
27'b????????????????10000000000: s_r_zeros <= 10;
|
198 |
|
|
27'b???????????????100000000000: s_r_zeros <= 11;
|
199 |
|
|
27'b??????????????1000000000000: s_r_zeros <= 12;
|
200 |
|
|
27'b?????????????10000000000000: s_r_zeros <= 13;
|
201 |
|
|
27'b????????????100000000000000: s_r_zeros <= 14;
|
202 |
|
|
27'b???????????1000000000000000: s_r_zeros <= 15;
|
203 |
|
|
27'b??????????10000000000000000: s_r_zeros <= 16;
|
204 |
|
|
27'b?????????100000000000000000: s_r_zeros <= 17;
|
205 |
|
|
27'b????????1000000000000000000: s_r_zeros <= 18;
|
206 |
|
|
27'b???????10000000000000000000: s_r_zeros <= 19;
|
207 |
|
|
27'b??????100000000000000000000: s_r_zeros <= 20;
|
208 |
|
|
27'b?????1000000000000000000000: s_r_zeros <= 21;
|
209 |
|
|
27'b????10000000000000000000000: s_r_zeros <= 22;
|
210 |
|
|
27'b???100000000000000000000000: s_r_zeros <= 23;
|
211 |
|
|
27'b??1000000000000000000000000: s_r_zeros <= 24;
|
212 |
|
|
27'b?10000000000000000000000000: s_r_zeros <= 25;
|
213 |
|
|
27'b100000000000000000000000000: s_r_zeros <= 26;
|
214 |
|
|
27'b000000000000000000000000000: s_r_zeros <= 27;
|
215 |
|
|
endcase // casex (s_qutnt_i)
|
216 |
|
|
|
217 |
|
|
assign s_lost = (s_shr1+{5'd0,s_shr2}) > s_r_zeros;
|
218 |
|
|
|
219 |
|
|
// ***Stage 3***
|
220 |
|
|
// Rounding
|
221 |
|
|
|
222 |
|
|
assign s_guard = s_fraco1[2];
|
223 |
|
|
assign s_round = s_fraco1[1];
|
224 |
|
|
assign s_sticky = s_fraco1[0] | (|s_rmndr_i);
|
225 |
|
|
|
226 |
|
|
assign s_roundup = s_rmode_i==2'b00 ? // round to nearest even
|
227 |
|
|
s_guard & ((s_round | s_sticky) | s_fraco1[3]) :
|
228 |
|
|
s_rmode_i==2'b10 ? // round up
|
229 |
|
|
(s_guard | s_round | s_sticky) & !s_sign_i :
|
230 |
|
|
s_rmode_i==2'b11 ? // round down
|
231 |
|
|
(s_guard | s_round | s_sticky) & s_sign_i :
|
232 |
|
|
0; // round to zero(truncate = no rounding)
|
233 |
|
|
|
234 |
|
|
assign s_frac_rnd = s_roundup ?{1'b0,s_fraco1[26:3]} + 1 :
|
235 |
|
|
{1'b0,s_fraco1[26:3]};
|
236 |
|
|
assign s_shr2 = s_frac_rnd[24];
|
237 |
|
|
|
238 |
|
|
always @(posedge clk_i)
|
239 |
|
|
begin
|
240 |
|
|
s_expo3 <= s_shr2 ? s_expo2 + "1" : s_expo2;
|
241 |
|
|
s_fraco2 <= s_shr2 ? {1'b0,s_frac_rnd[24:1]} : s_frac_rnd;
|
242 |
|
|
end
|
243 |
|
|
//
|
244 |
|
|
// ***Stage 4****
|
245 |
|
|
// Output
|
246 |
|
|
|
247 |
|
|
assign s_op_0 = !((|s_opa_i[30:0]) & (|s_opb_i[30:0]));
|
248 |
|
|
|
249 |
|
|
assign s_opab_0 = !((|s_opa_i[30:0]) | (|s_opb_i[30:0]));
|
250 |
|
|
|
251 |
|
|
assign s_opb_0 = !(|s_opb_i[30:0]);
|
252 |
|
|
|
253 |
|
|
assign s_infa = &s_expa;
|
254 |
|
|
|
255 |
|
|
assign s_infb = &s_expb;
|
256 |
|
|
|
257 |
|
|
assign s_nan_a = s_infa & (|s_opa_i[22:0]);
|
258 |
|
|
|
259 |
|
|
assign s_nan_b = s_infb & (|s_opb_i[22:0]);
|
260 |
|
|
|
261 |
|
|
assign s_nan_in = s_nan_a | s_nan_b;
|
262 |
|
|
|
263 |
|
|
assign s_nan_op = (s_infa & s_infb) | s_opab_0; // 0 / 0, inf / inf
|
264 |
|
|
|
265 |
|
|
assign s_inf_result = (&s_expo3[7:0]) | s_expo3[8] | s_opb_0;
|
266 |
|
|
|
267 |
|
|
assign s_overflow = s_inf_result & !(s_infa) & !s_opb_0;
|
268 |
|
|
|
269 |
|
|
assign s_ine_o = !s_op_0 &
|
270 |
|
|
(s_lost | (|s_fraco1[2:0]) | s_overflow | (|s_rmndr_i));
|
271 |
|
|
|
272 |
|
|
assign s_output_o = (s_nan_in | s_nan_op) ?
|
273 |
|
|
{s_sign_i,QNAN} :
|
274 |
|
|
s_infa | s_overflow | s_inf_result ?
|
275 |
|
|
{s_sign_i,INF} :
|
276 |
|
|
s_op_0 | s_infb ?
|
277 |
|
|
{s_sign_i,ZERO_VECTOR} :
|
278 |
|
|
{s_sign_i,s_expo3[7:0],s_fraco2[22:0]};
|
279 |
|
|
|
280 |
|
|
endmodule // or1200_fpu_post_norm_div
|
281 |
|
|
|
282 |
|
|
|
283 |
|
|
|