1 |
6 |
davidklun |
/////////////////////////////////////////////////////////////////////
|
2 |
|
|
//// ////
|
3 |
|
|
//// FPU ////
|
4 |
|
|
//// Floating Point Unit (Double precision) ////
|
5 |
|
|
//// ////
|
6 |
|
|
//// Author: David Lundgren ////
|
7 |
|
|
//// davidklun@gmail.com ////
|
8 |
|
|
//// ////
|
9 |
|
|
/////////////////////////////////////////////////////////////////////
|
10 |
|
|
//// ////
|
11 |
|
|
//// Copyright (C) 2009 David Lundgren ////
|
12 |
|
|
//// davidklun@gmail.com ////
|
13 |
|
|
//// ////
|
14 |
|
|
//// This source file may be used and distributed without ////
|
15 |
|
|
//// restriction provided that this copyright statement is not ////
|
16 |
|
|
//// removed from the file and that any derivative work contains ////
|
17 |
|
|
//// the original copyright notice and the associated disclaimer.////
|
18 |
|
|
//// ////
|
19 |
|
|
//// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY ////
|
20 |
|
|
//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ////
|
21 |
|
|
//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ////
|
22 |
|
|
//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR ////
|
23 |
|
|
//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, ////
|
24 |
|
|
//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ////
|
25 |
|
|
//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE ////
|
26 |
|
|
//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR ////
|
27 |
|
|
//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ////
|
28 |
|
|
//// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ////
|
29 |
|
|
//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT ////
|
30 |
|
|
//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ////
|
31 |
|
|
//// POSSIBILITY OF SUCH DAMAGE. ////
|
32 |
|
|
//// ////
|
33 |
|
|
/////////////////////////////////////////////////////////////////////
|
34 |
|
|
|
35 |
|
|
`timescale 1ns / 100ps
|
36 |
|
|
|
37 |
|
|
module fpu_mul( clk, rst, enable, opa, opb, ready, outfp);
|
38 |
|
|
input clk;
|
39 |
|
|
input rst;
|
40 |
|
|
input enable;
|
41 |
|
|
input [63:0] opa, opb;
|
42 |
|
|
output ready;
|
43 |
|
|
output [63:0] outfp;
|
44 |
|
|
|
45 |
|
|
|
46 |
|
|
|
47 |
|
|
reg product_shift;
|
48 |
|
|
reg sign, sign_1, sign_2, sign_3, sign_4, sign_5, sign_6, sign_7, sign_8;
|
49 |
|
|
reg sign_9, sign_10, sign_11, sign_12, sign_13, sign_14, sign_15, sign_16, sign_17;
|
50 |
|
|
reg [51:0] mantissa_a1, mantissa_a2;
|
51 |
|
|
reg [51:0] mantissa_b1, mantissa_b2;
|
52 |
|
|
reg [10:0] exponent_a;
|
53 |
|
|
reg [10:0] exponent_b;
|
54 |
|
|
reg ready, count_ready, count_ready_0;
|
55 |
|
|
reg [4:0] count;
|
56 |
|
|
reg a_is_zero, b_is_zero, a_is_inf, b_is_inf, in_inf_1, in_inf_2;
|
57 |
|
|
reg in_zero_1;
|
58 |
|
|
reg [11:0] exponent_terms_1, exponent_terms_2, exponent_terms_3, exponent_terms_4;
|
59 |
|
|
reg [11:0] exponent_terms_5, exponent_terms_6, exponent_terms_7;
|
60 |
|
|
reg [11:0] exponent_terms_8, exponent_terms_9;
|
61 |
|
|
reg exponent_gt_expoffset;
|
62 |
|
|
reg [11:0] exponent_1;
|
63 |
|
|
wire [11:0] exponent = 0;
|
64 |
|
|
reg [11:0] exponent_2, exponent_2_0, exponent_2_1;
|
65 |
|
|
reg exponent_gt_prodshift, exponent_is_infinity, exponent_is_infinity_2;
|
66 |
|
|
reg [11:0] exponent_3;
|
67 |
|
|
reg [11:0] exponent_4;
|
68 |
|
|
reg set_mantissa_zero;
|
69 |
|
|
reg [52:0] mul_a, mul_a1, mul_a2, mul_a3, mul_a4, mul_a5, mul_a6, mul_a7, mul_a8;
|
70 |
|
|
reg [52:0] mul_b, mul_b1, mul_b2, mul_b3, mul_b4, mul_b5, mul_b6, mul_b7, mul_b8;
|
71 |
|
|
reg [40:0] product_a;
|
72 |
|
|
reg [16:0] product_a_2, product_a_3, product_a_4, product_a_5, product_a_6;
|
73 |
|
|
reg [16:0] product_a_7, product_a_8, product_a_9, product_a_10;
|
74 |
|
|
reg [40:0] product_b;
|
75 |
|
|
reg [40:0] product_c;
|
76 |
|
|
reg [25:0] product_d;
|
77 |
|
|
reg [33:0] product_e;
|
78 |
|
|
reg [33:0] product_f;
|
79 |
|
|
reg [35:0] product_g;
|
80 |
|
|
reg [28:0] product_h;
|
81 |
|
|
reg [28:0] product_i;
|
82 |
|
|
reg [30:0] product_j;
|
83 |
|
|
reg [41:0] sum_0;
|
84 |
|
|
reg [6:0] sum_0_2, sum_0_3, sum_0_4, sum_0_5, sum_0_6, sum_0_7, sum_0_8, sum_0_9;
|
85 |
|
|
reg [35:0] sum_1;
|
86 |
|
|
reg [9:0] sum_1_2, sum_1_3, sum_1_4, sum_1_5, sum_1_6, sum_1_7, sum_1_8;
|
87 |
|
|
reg [41:0] sum_2;
|
88 |
|
|
reg [6:0] sum_2_2, sum_2_3, sum_2_4, sum_2_5, sum_2_6, sum_2_7;
|
89 |
|
|
reg [35:0] sum_3;
|
90 |
|
|
reg [36:0] sum_4;
|
91 |
|
|
reg [9:0] sum_4_2, sum_4_3, sum_4_4, sum_4_5;
|
92 |
|
|
reg [27:0] sum_5;
|
93 |
|
|
reg [6:0] sum_5_2, sum_5_3, sum_5_4;
|
94 |
|
|
reg [29:0] sum_6;
|
95 |
|
|
reg [36:0] sum_7;
|
96 |
|
|
reg [16:0] sum_7_2;
|
97 |
|
|
reg [30:0] sum_8;
|
98 |
|
|
reg [105:0] product;
|
99 |
|
|
reg [105:0] product_1;
|
100 |
|
|
reg [105:0] product_2, product_3;
|
101 |
|
|
reg product_lsb; // if there are any 1's in the remainder
|
102 |
|
|
reg [55:0] product_4;
|
103 |
|
|
reg [11:0] exponent_5, exponent_6;
|
104 |
|
|
wire [63:0] outfp = { sign, exponent_6[10:0], product_4[53:2]};
|
105 |
|
|
|
106 |
|
|
always @(posedge clk)
|
107 |
|
|
begin
|
108 |
|
|
if (rst) begin
|
109 |
|
|
sign <= 0; sign_1 <= 0; sign_2 <= 0; sign_3 <= 0; sign_4 <= 0;
|
110 |
|
|
sign_5 <= 0; sign_6 <= 0; sign_7 <= 0; sign_8 <= 0; sign_9 <= 0;
|
111 |
|
|
sign_10 <= 0; sign_11 <= 0; sign_12 <= 0; sign_13 <= 0;
|
112 |
|
|
sign_14 <= 0; sign_15 <= 0; sign_16 <= 0; sign_17 <= 0;
|
113 |
|
|
mantissa_a1 <= 0;
|
114 |
|
|
mantissa_b1 <= 0;
|
115 |
|
|
mantissa_a2 <= 0;
|
116 |
|
|
mantissa_b2 <= 0;
|
117 |
|
|
exponent_a <= 0;
|
118 |
|
|
exponent_b <= 0;
|
119 |
|
|
a_is_zero <= 0; b_is_zero <= 0;
|
120 |
|
|
a_is_inf <= 0; b_is_inf <= 0; in_inf_1 <= 0; in_inf_2 <= 0;
|
121 |
|
|
in_zero_1 <= 0;
|
122 |
|
|
exponent_terms_1 <= 0; exponent_terms_2 <= 0; exponent_terms_3 <= 0;
|
123 |
|
|
exponent_terms_4 <= 0; exponent_terms_5 <= 0; exponent_terms_6 <= 0;
|
124 |
|
|
exponent_terms_7 <= 0; exponent_terms_8 <= 0; exponent_terms_9 <= 0;
|
125 |
|
|
exponent_gt_expoffset <= 0;
|
126 |
|
|
exponent_1 <= 0;
|
127 |
|
|
exponent_2_0 <= 0; exponent_2_1 <= 0; exponent_2 <= 0; exponent_gt_prodshift <= 0;
|
128 |
|
|
exponent_is_infinity <= 0; exponent_is_infinity_2 <= 0;
|
129 |
|
|
exponent_3 <= 0;
|
130 |
|
|
exponent_4 <= 0;
|
131 |
|
|
set_mantissa_zero <= 0;
|
132 |
|
|
mul_a <= 0; mul_b <= 0; mul_a1 <= 0; mul_b1 <= 0; mul_a2 <= 0; mul_b2 <= 0;
|
133 |
|
|
mul_a3 <= 0; mul_b3 <= 0; mul_a4 <= 0; mul_b4 <= 0; mul_a5 <= 0; mul_b5 <= 0;
|
134 |
|
|
mul_a6 <= 0; mul_b6 <= 0; mul_a7 <= 0; mul_b7 <= 0; mul_a8 <= 0; mul_b8 <= 0;
|
135 |
|
|
product_a <= 0; product_a_2 <= 0; product_a_3 <= 0; product_a_4 <= 0; product_a_5 <= 0;
|
136 |
|
|
product_a_6 <= 0; product_a_7 <= 0; product_a_8 <= 0; product_a_9 <= 0; product_a_10 <= 0;
|
137 |
|
|
product_b <= 0; product_c <= 0; product_d <= 0; product_e <= 0; product_f <= 0;
|
138 |
|
|
product_g <= 0; product_h <= 0; product_i <= 0; product_j <= 0;
|
139 |
|
|
sum_0 <= 0; sum_0_2 <= 0; sum_0_3 <= 0; sum_0_4 <= 0; sum_0_5 <= 0; sum_0_6 <= 0;
|
140 |
|
|
sum_0_7 <= 0; sum_0_8 <= 0; sum_0_9 <= 0;
|
141 |
|
|
sum_1 <= 0; sum_1_2 <= 0; sum_1_3 <= 0; sum_1_4 <= 0; sum_1_5 <= 0; sum_1_6 <= 0;
|
142 |
|
|
sum_1_7 <= 0; sum_1_8 <= 0;
|
143 |
|
|
sum_2 <= 0; sum_2_2 <= 0; sum_2_3 <= 0; sum_2_4 <= 0; sum_2_5 <= 0; sum_2_6 <= 0; sum_2_7 <= 0;
|
144 |
|
|
sum_3 <= 0; sum_4 <= 0; sum_4_2 <= 0; sum_4_3 <= 0; sum_4_4 <= 0; sum_4_5 <= 0;
|
145 |
|
|
sum_5 <= 0; sum_5_2 <= 0; sum_5_3 <= 0; sum_5_4 <= 0;
|
146 |
|
|
sum_6 <= 0; sum_7 <= 0; sum_7_2 <= 0; sum_8 <= 0;
|
147 |
|
|
product <= 0;
|
148 |
|
|
product_1 <= 0;
|
149 |
|
|
product_2 <= 0; product_3 <= 0;
|
150 |
|
|
product_lsb <= 0;
|
151 |
|
|
exponent_5 <= 0; exponent_6 <= 0;
|
152 |
|
|
product_shift <= 0;
|
153 |
|
|
end
|
154 |
|
|
else if (enable) begin
|
155 |
|
|
sign_1 <= opa[63] ^ opb[63]; sign_2 <= sign_1; sign_3 <= sign_2; sign_4 <= sign_3;
|
156 |
|
|
sign_5 <= sign_4; sign_6 <= sign_5; sign_7 <= sign_6; sign_8 <= sign_7; sign_9 <= sign_8;
|
157 |
|
|
sign_10 <= sign_9; sign_11 <= sign_10; sign_12 <= sign_11; sign_13 <= sign_12;
|
158 |
|
|
sign_14 <= sign_13; sign_15 <= sign_14; sign_16 <= sign_15; sign_17 <= sign_16; sign <= sign_17;
|
159 |
|
|
mantissa_a1 <= opa[51:0];
|
160 |
|
|
mantissa_b1 <= opb[51:0];
|
161 |
|
|
mantissa_a2 <= mantissa_a1;
|
162 |
|
|
mantissa_b2 <= mantissa_b1;
|
163 |
|
|
exponent_a <= opa[62:52];
|
164 |
|
|
exponent_b <= opb[62:52];
|
165 |
|
|
a_is_zero <= !(|exponent_a);
|
166 |
|
|
b_is_zero <= !(|exponent_b);
|
167 |
|
|
a_is_inf <= exponent_a == 2047;
|
168 |
|
|
b_is_inf <= exponent_b == 2047;
|
169 |
|
|
in_inf_1 <= a_is_inf | b_is_inf;
|
170 |
|
|
in_inf_2 <= in_inf_1;
|
171 |
|
|
in_zero_1 <= a_is_zero | b_is_zero;
|
172 |
|
|
exponent_terms_1 <= exponent_a + exponent_b;
|
173 |
|
|
exponent_terms_2 <= exponent_terms_1;
|
174 |
|
|
exponent_terms_3 <= in_zero_1 ? 12'b0 : exponent_terms_2;
|
175 |
|
|
exponent_terms_4 <= in_inf_2 ? 12'b110000000000 : exponent_terms_3;
|
176 |
|
|
exponent_terms_5 <= exponent_terms_4;
|
177 |
|
|
exponent_terms_6 <= exponent_terms_5;
|
178 |
|
|
exponent_terms_7 <= exponent_terms_6;
|
179 |
|
|
exponent_terms_8 <= exponent_terms_7;
|
180 |
|
|
exponent_terms_9 <= exponent_terms_8;
|
181 |
|
|
exponent_gt_expoffset <= exponent_terms_9 > 1022;
|
182 |
|
|
exponent_1 <= exponent_terms_9 - 1022;
|
183 |
|
|
exponent_2_0 <= exponent_gt_expoffset ? exponent_1 : exponent;
|
184 |
|
|
exponent_2_1 <= exponent_2_0;
|
185 |
|
|
exponent_2 <= exponent_2_1;
|
186 |
|
|
exponent_is_infinity <= exponent_2 > 2046;
|
187 |
|
|
exponent_is_infinity_2 <= exponent_is_infinity;
|
188 |
|
|
exponent_3 <= exponent_2 - product_shift;
|
189 |
|
|
exponent_gt_prodshift <= exponent_2 >= product_shift;
|
190 |
|
|
exponent_4 <= exponent_gt_prodshift ? exponent_3 : exponent;
|
191 |
|
|
exponent_5 <= exponent_is_infinity_2 ? 12'b011111111111 : exponent_4;
|
192 |
|
|
set_mantissa_zero <= exponent_4 == 0 | exponent_is_infinity_2;
|
193 |
|
|
exponent_6 <= exponent_5;
|
194 |
|
|
mul_a <= { !a_is_zero, mantissa_a2 };
|
195 |
|
|
mul_b <= { !b_is_zero, mantissa_b2 };
|
196 |
|
|
mul_a1 <= mul_a; mul_b1 <= mul_b;
|
197 |
|
|
mul_a2 <= mul_a1; mul_b2 <= mul_b1; mul_a3 <= mul_a2; mul_b3 <= mul_b2;
|
198 |
|
|
mul_a4 <= mul_a3; mul_b4 <= mul_b3; mul_a5 <= mul_a4; mul_b5 <= mul_b4;
|
199 |
|
|
mul_a6 <= mul_a5; mul_b6 <= mul_b5; mul_a7 <= mul_a6; mul_b7 <= mul_b6;
|
200 |
|
|
mul_a8 <= mul_a7; mul_b8 <= mul_b7;
|
201 |
|
|
product_a <= mul_a[23:0] * mul_b[16:0]; product_a_2 <= product_a[16:0];
|
202 |
|
|
product_a_3 <= product_a_2; product_a_4 <= product_a_3; product_a_5 <= product_a_4;
|
203 |
|
|
product_a_6 <= product_a_5; product_a_7 <= product_a_6; product_a_8 <= product_a_7;
|
204 |
|
|
product_a_9 <= product_a_8; product_a_10 <= product_a_9;
|
205 |
|
|
product_b <= mul_a[23:0] * mul_b[33:17];
|
206 |
|
|
product_c <= mul_a2[23:0] * mul_b2[50:34];
|
207 |
|
|
product_d <= mul_a5[23:0] * mul_b5[52:51];
|
208 |
|
|
product_e <= mul_a1[40:24] * mul_b1[16:0];
|
209 |
|
|
product_f <= mul_a4[40:24] * mul_b4[33:17];
|
210 |
|
|
product_g <= mul_a7[40:24] * mul_b7[52:34];
|
211 |
|
|
product_h <= mul_a3[52:41] * mul_b3[16:0];
|
212 |
|
|
product_i <= mul_a6[52:41] * mul_b6[33:17];
|
213 |
|
|
product_j <= mul_a8[52:41] * mul_b8[52:34];
|
214 |
|
|
sum_0 <= product_a[40:17] + product_b; sum_0_2 <= sum_0[6:0]; sum_0_3 <= sum_0_2;
|
215 |
|
|
sum_0_4 <= sum_0_3; sum_0_5 <= sum_0_4; sum_0_6 <= sum_0_5; sum_0_7 <= sum_0_6;
|
216 |
|
|
sum_0_8 <= sum_0_7; sum_0_9 <= sum_0_8;
|
217 |
|
|
sum_1 <= sum_0[41:7] + product_e; sum_1_2 <= sum_1[9:0]; sum_1_3 <= sum_1_2;
|
218 |
|
|
sum_1_4 <= sum_1_3; sum_1_5 <= sum_1_4; sum_1_6 <= sum_1_5; sum_1_7 <= sum_1_6;
|
219 |
|
|
sum_1_8 <= sum_1_7;
|
220 |
|
|
sum_2 <= sum_1[35:10] + product_c; sum_2_2 <= sum_2[6:0]; sum_2_3 <= sum_2_2;
|
221 |
|
|
sum_2_4 <= sum_2_3; sum_2_5 <= sum_2_4; sum_2_6 <= sum_2_5; sum_2_7 <= sum_2_6;
|
222 |
|
|
sum_3 <= sum_2[41:7] + product_h;
|
223 |
|
|
sum_4 <= sum_3 + product_f; sum_4_2 <= sum_4[9:0]; sum_4_3 <= sum_4_2;
|
224 |
|
|
sum_4_4 <= sum_4_3; sum_4_5 <= sum_4_4;
|
225 |
|
|
sum_5 <= sum_4[36:10] + product_d; sum_5_2 <= sum_5[6:0];
|
226 |
|
|
sum_5_3 <= sum_5_2; sum_5_4 <= sum_5_3;
|
227 |
|
|
sum_6 <= sum_5[27:7] + product_i;
|
228 |
|
|
sum_7 <= sum_6 + product_g; sum_7_2 <= sum_7[16:0];
|
229 |
|
|
sum_8 <= sum_7[36:17] + product_j;
|
230 |
|
|
product <= { sum_8, sum_7_2[16:0], sum_5_4[6:0], sum_4_5[9:0], sum_2_7[6:0],
|
231 |
|
|
sum_1_8[9:0], sum_0_9[6:0], product_a_10[16:0] };
|
232 |
|
|
product_1 <= product << product_shift;
|
233 |
|
|
product_2 <= product_1; product_3 <= product_2;
|
234 |
|
|
product_4 <= set_mantissa_zero ? 56'b0 : { 1'b0, product_3[105:52] , |product_3[51:0]};
|
235 |
|
|
product_shift <= !sum_8[30];
|
236 |
|
|
end
|
237 |
|
|
end
|
238 |
|
|
|
239 |
|
|
always @(posedge clk)
|
240 |
|
|
begin
|
241 |
|
|
if (rst) begin
|
242 |
|
|
ready <= 0;
|
243 |
|
|
count_ready_0 <= 0;
|
244 |
|
|
count_ready <= 0;
|
245 |
|
|
end
|
246 |
|
|
else if (enable) begin
|
247 |
|
|
ready <= count_ready;
|
248 |
|
|
count_ready_0 <= count == 15;
|
249 |
|
|
count_ready <= count == 16;
|
250 |
|
|
end
|
251 |
|
|
end
|
252 |
|
|
|
253 |
|
|
always @(posedge clk)
|
254 |
|
|
begin
|
255 |
|
|
if (rst)
|
256 |
|
|
count <= 0;
|
257 |
|
|
else if (enable & !count_ready_0 & !count_ready)
|
258 |
|
|
count <= count + 1;
|
259 |
|
|
end
|
260 |
|
|
|
261 |
|
|
endmodule
|