1 |
2 |
dmitryr |
// ========== Copyright Header Begin ==========================================
|
2 |
|
|
//
|
3 |
|
|
// OpenSPARC T1 Processor File: sparc_exu_div.v
|
4 |
|
|
// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
|
5 |
|
|
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
|
6 |
|
|
//
|
7 |
|
|
// The above named program is free software; you can redistribute it and/or
|
8 |
|
|
// modify it under the terms of the GNU General Public
|
9 |
|
|
// License version 2 as published by the Free Software Foundation.
|
10 |
|
|
//
|
11 |
|
|
// The above named program is distributed in the hope that it will be
|
12 |
|
|
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
|
|
// General Public License for more details.
|
15 |
|
|
//
|
16 |
|
|
// You should have received a copy of the GNU General Public
|
17 |
|
|
// License along with this work; if not, write to the Free Software
|
18 |
|
|
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
19 |
|
|
//
|
20 |
|
|
// ========== Copyright Header End ============================================
|
21 |
|
|
////////////////////////////////////////////////////////////////////////
|
22 |
|
|
/*
|
23 |
|
|
// Module Name: sparc_exu_div
|
24 |
|
|
*/
|
25 |
|
|
module sparc_exu_div (/*AUTOARG*/
|
26 |
|
|
// Outputs
|
27 |
|
|
so, div_ecl_xin_msb_l, div_ecl_x_msb, div_ecl_d_msb,
|
28 |
|
|
div_ecl_cout64, div_ecl_cout32, div_ecl_gencc_in_msb_l,
|
29 |
|
|
div_ecl_gencc_in_31, div_ecl_upper32_equal, div_ecl_low32_nonzero,
|
30 |
|
|
div_ecl_dividend_msb, div_byp_muldivout_g, div_byp_yreg_e,
|
31 |
|
|
div_ecl_yreg_0_l, exu_mul_rs1_data, exu_mul_rs2_data,
|
32 |
|
|
div_ecl_adder_out_31, div_ecl_detect_zero_low,
|
33 |
|
|
div_ecl_detect_zero_high, div_ecl_d_62,
|
34 |
|
|
// Inputs
|
35 |
|
|
ecl_div_yreg_wen_w, ecl_div_yreg_wen_l, ecl_div_yreg_wen_g,
|
36 |
|
|
ecl_div_yreg_shift_g, ecl_div_yreg_data_31_g, ecl_div_thr_e,
|
37 |
|
|
byp_div_yreg_data_w, rclk, se, si, ecl_div_keep_d,
|
38 |
|
|
ecl_div_ld_inputs, ecl_div_sel_adder, ecl_div_last_cycle,
|
39 |
|
|
ecl_div_almostlast_cycle, ecl_div_div64, ecl_div_sel_u32,
|
40 |
|
|
ecl_div_sel_pos32, ecl_div_sel_neg32, ecl_div_sel_64b,
|
41 |
|
|
ecl_div_upper32_zero, ecl_div_upper33_one, ecl_div_upper33_zero,
|
42 |
|
|
mul_exu_data_g, ecl_div_sel_div, ecl_div_mul_wen,
|
43 |
|
|
ecl_div_dividend_sign, ecl_div_subtract_l, ecl_div_cin,
|
44 |
|
|
ecl_div_newq, ecl_div_xinmask, ecl_div_keepx,
|
45 |
|
|
ecl_div_mul_get_new_data, ecl_div_mul_keep_data,
|
46 |
|
|
ecl_div_mul_get_32bit_data, ecl_div_mul_sext_rs2_e,
|
47 |
|
|
ecl_div_mul_sext_rs1_e, byp_div_rs1_data_e, byp_div_rs2_data_e,
|
48 |
|
|
ecl_div_muls_rs1_31_e_l, ecl_div_muls, ecl_div_zero_rs2_e
|
49 |
|
|
) ;
|
50 |
|
|
/*AUTOINPUT*/
|
51 |
|
|
// Beginning of automatic inputs (from unused autoinst inputs)
|
52 |
|
|
input [31:0] byp_div_yreg_data_w; // To yreg of sparc_exu_div_yreg.v
|
53 |
|
|
input [3:0] ecl_div_thr_e; // To yreg of sparc_exu_div_yreg.v
|
54 |
|
|
input ecl_div_yreg_data_31_g; // To yreg of sparc_exu_div_yreg.v
|
55 |
|
|
input [3:0] ecl_div_yreg_shift_g; // To yreg of sparc_exu_div_yreg.v
|
56 |
|
|
input [3:0] ecl_div_yreg_wen_g; // To yreg of sparc_exu_div_yreg.v
|
57 |
|
|
input [3:0] ecl_div_yreg_wen_l; // To yreg of sparc_exu_div_yreg.v
|
58 |
|
|
input [3:0] ecl_div_yreg_wen_w; // To yreg of sparc_exu_div_yreg.v
|
59 |
|
|
// End of automatics
|
60 |
|
|
input rclk;
|
61 |
|
|
input se;
|
62 |
|
|
input si;
|
63 |
|
|
input ecl_div_keep_d; // d should store (w/ overflow calcs)
|
64 |
|
|
input ecl_div_ld_inputs;// load in d and x
|
65 |
|
|
input ecl_div_sel_adder;// d should use adder output
|
66 |
|
|
input ecl_div_last_cycle;// last cycle of computations
|
67 |
|
|
input ecl_div_almostlast_cycle;// 2nd to last cycle of div
|
68 |
|
|
input ecl_div_div64;
|
69 |
|
|
input ecl_div_sel_u32;
|
70 |
|
|
input ecl_div_sel_pos32;
|
71 |
|
|
input ecl_div_sel_neg32;
|
72 |
|
|
input ecl_div_sel_64b;
|
73 |
|
|
input ecl_div_upper32_zero;
|
74 |
|
|
input ecl_div_upper33_one;
|
75 |
|
|
input ecl_div_upper33_zero;
|
76 |
|
|
input [63:0] mul_exu_data_g;
|
77 |
|
|
input ecl_div_sel_div;
|
78 |
|
|
input ecl_div_mul_wen;
|
79 |
|
|
input ecl_div_dividend_sign;
|
80 |
|
|
input ecl_div_subtract_l; // add/subtract to adder
|
81 |
|
|
input ecl_div_cin;
|
82 |
|
|
input ecl_div_newq; // newest q bit
|
83 |
|
|
input ecl_div_xinmask;
|
84 |
|
|
input ecl_div_keepx;
|
85 |
|
|
input ecl_div_mul_get_new_data;
|
86 |
|
|
input ecl_div_mul_keep_data;
|
87 |
|
|
input ecl_div_mul_get_32bit_data;
|
88 |
|
|
input ecl_div_mul_sext_rs2_e;
|
89 |
|
|
input ecl_div_mul_sext_rs1_e;
|
90 |
|
|
input [63:0] byp_div_rs1_data_e;
|
91 |
|
|
input [63:0] byp_div_rs2_data_e;
|
92 |
|
|
input ecl_div_muls_rs1_31_e_l;
|
93 |
|
|
input ecl_div_muls;
|
94 |
|
|
input ecl_div_zero_rs2_e;
|
95 |
|
|
|
96 |
|
|
output so;
|
97 |
|
|
output div_ecl_xin_msb_l;
|
98 |
|
|
output div_ecl_x_msb;
|
99 |
|
|
output div_ecl_d_msb;
|
100 |
|
|
output div_ecl_cout64; // cout from adder
|
101 |
|
|
output div_ecl_cout32; // cout from adder
|
102 |
|
|
output div_ecl_gencc_in_msb_l;
|
103 |
|
|
output div_ecl_gencc_in_31;
|
104 |
|
|
output div_ecl_upper32_equal;
|
105 |
|
|
output div_ecl_low32_nonzero;
|
106 |
|
|
output div_ecl_dividend_msb;
|
107 |
|
|
output [63:0] div_byp_muldivout_g;
|
108 |
|
|
output [31:0] div_byp_yreg_e;
|
109 |
|
|
output [3:0] div_ecl_yreg_0_l;
|
110 |
|
|
output [63:0] exu_mul_rs1_data;
|
111 |
|
|
output [63:0] exu_mul_rs2_data;
|
112 |
|
|
output div_ecl_adder_out_31;
|
113 |
|
|
output div_ecl_detect_zero_low;
|
114 |
|
|
output div_ecl_detect_zero_high;
|
115 |
|
|
output div_ecl_d_62;
|
116 |
|
|
|
117 |
|
|
/*AUTOWIRE*/
|
118 |
|
|
// Beginning of automatic wires (for undeclared instantiated-module outputs)
|
119 |
|
|
wire [31:0] yreg_mdq_y_e; // From yreg of sparc_exu_div_yreg.v
|
120 |
|
|
// End of automatics
|
121 |
|
|
wire clk;
|
122 |
|
|
wire [127:0] din; // sign extended dividend
|
123 |
|
|
wire [127:0] d; // current dividend/quotient
|
124 |
|
|
wire [63:0] adder_out; // output of adder
|
125 |
|
|
wire [127:0] dnext; // input to d flop
|
126 |
|
|
wire [127:0] adder_dnext; // combination of adder out and quotient
|
127 |
|
|
wire [63:0] x; // divisor
|
128 |
|
|
wire [63:0] xin; // sign extended (for 32bit) divisor
|
129 |
|
|
wire [63:0] xnext; // input to divisor flop
|
130 |
|
|
wire [63:0] adderin1; // first input to adder
|
131 |
|
|
wire [63:0] adderin2; // 2nd input to adder
|
132 |
|
|
|
133 |
|
|
wire [63:0] curr_q; // current quotient
|
134 |
|
|
wire [63:0] out64; // 64 bit result
|
135 |
|
|
wire [63:0] pos32; // positive 32 bit result w/ ovfl
|
136 |
|
|
wire [63:0] neg32; // negative 32 bit result w/ ovfl
|
137 |
|
|
wire [63:0] u32; // unsigned 32 bit result w/ ovfl
|
138 |
|
|
wire [63:0] gencc_in;
|
139 |
|
|
wire [63:0] mul_result;
|
140 |
|
|
wire [63:0] mul_result_next;
|
141 |
|
|
wire [127:0] input_data_e;
|
142 |
|
|
wire [63:0] dividend;
|
143 |
|
|
wire [63:0] divisor;
|
144 |
|
|
wire [127:0] next_mul_data;
|
145 |
|
|
wire [127:0] mul_data_out;
|
146 |
|
|
wire [127:0] mul32_input_data_e;
|
147 |
|
|
wire subtract;
|
148 |
|
|
wire [63:0] spr_out;
|
149 |
|
|
wire [63:0] z_in;
|
150 |
|
|
|
151 |
|
|
assign clk = rclk;
|
152 |
|
|
///////////////////////////////////////
|
153 |
|
|
// Input masking for 32 bit operations
|
154 |
|
|
///////////////////////////////////////
|
155 |
|
|
dp_buffer #(128) buf_input_data(.dout(input_data_e[127:0]),
|
156 |
|
|
.in({byp_div_rs2_data_e[63:0], byp_div_rs1_data_e[63:0]}));
|
157 |
|
|
// Mux in yreg into upper 32 bits on 32 bit divides
|
158 |
|
|
dp_mux2es #(32) dividendmux(.dout(dividend[63:32]),
|
159 |
|
|
.in0(yreg_mdq_y_e[31:0]),
|
160 |
|
|
.in1(input_data_e[63:32]),
|
161 |
|
|
.sel(ecl_div_div64));
|
162 |
|
|
assign dividend[31:0] = input_data_e[31:0];
|
163 |
|
|
assign divisor[63:0] = input_data_e[127:64];
|
164 |
|
|
|
165 |
|
|
|
166 |
|
|
/////////////////////
|
167 |
|
|
// Output assignment
|
168 |
|
|
/////////////////////
|
169 |
|
|
dp_mux2es #(64) output_mux(.dout(div_byp_muldivout_g[63:0]), .in1(d[63:0]),
|
170 |
|
|
.in0(mul_result[63:0]),
|
171 |
|
|
.sel(ecl_div_sel_div));
|
172 |
|
|
///////////////////////////
|
173 |
|
|
// Generate Condition Codes and divide by zero exception and overflow
|
174 |
|
|
///////////////////////////
|
175 |
|
|
dp_mux2es #(64) gencc_mux(.dout(gencc_in[63:0]),
|
176 |
|
|
.in0(mul_result[63:0]),
|
177 |
|
|
.in1(curr_q[63:0]),
|
178 |
|
|
.sel(ecl_div_sel_div));
|
179 |
|
|
sparc_exu_div_32eql u32eql(.in(gencc_in[63:32]), .equal(div_ecl_upper32_equal));
|
180 |
|
|
sparc_exu_aluor32 low32or(// Outputs
|
181 |
|
|
.out (div_ecl_low32_nonzero),
|
182 |
|
|
// Inputs
|
183 |
|
|
.in (gencc_in[31:0]));
|
184 |
|
|
assign div_ecl_gencc_in_msb_l = ~gencc_in[63];
|
185 |
|
|
assign div_ecl_gencc_in_31 = gencc_in[31];
|
186 |
|
|
|
187 |
|
|
|
188 |
|
|
// Division overflow calculations
|
189 |
|
|
assign curr_q = d[127:64];
|
190 |
|
|
assign u32 = {32'b0, (curr_q[31:0] | {32{~ecl_div_upper32_zero}})};
|
191 |
|
|
assign pos32 = {33'b0, (curr_q[30:0] | {31{~ecl_div_upper33_zero}})};
|
192 |
|
|
assign neg32 = {{33{1'b1}}, (curr_q[30:0] & {31{ecl_div_upper33_one}})};
|
193 |
|
|
|
194 |
|
|
mux4ds #(64) result_mux(.dout(out64[63:0]), .in0(curr_q[63:0]), .in1(u32[63:0]),
|
195 |
|
|
.in2(pos32[63:0]), .in3(neg32[63:0]), .sel0(ecl_div_sel_64b),
|
196 |
|
|
.sel1(ecl_div_sel_u32), .sel2(ecl_div_sel_pos32),
|
197 |
|
|
.sel3(ecl_div_sel_neg32));
|
198 |
|
|
|
199 |
|
|
//////////////////////////
|
200 |
|
|
// Logic for D (dividend)
|
201 |
|
|
//////////////////////////
|
202 |
|
|
|
203 |
|
|
// If signed div sign extend dividend to 127 bits
|
204 |
|
|
assign div_ecl_dividend_msb = dividend[63];
|
205 |
|
|
assign din[62:0] = dividend[62:0];
|
206 |
|
|
dp_mux2es #(32) din_mux(.dout(din[94:63]),
|
207 |
|
|
.in0({{31{ecl_div_dividend_sign}}, dividend[63]}),
|
208 |
|
|
.in1({~ecl_div_muls_rs1_31_e_l, dividend[31:1]}),
|
209 |
|
|
.sel(ecl_div_muls));
|
210 |
|
|
assign din[127:95] = {33{ecl_div_dividend_sign}};
|
211 |
|
|
// assign din = {{64{ecl_div_dividend_sign}}, dividend[63:0]};
|
212 |
|
|
|
213 |
|
|
|
214 |
|
|
// Select input to FF for d
|
215 |
|
|
mux3ds #(128) d_mux(.dout(dnext[127:0]), .in0({d[127:64], out64[63:0]}),
|
216 |
|
|
.in1(adder_dnext[127:0]), .in2(din[127:0]),
|
217 |
|
|
.sel0(ecl_div_keep_d),
|
218 |
|
|
.sel1(ecl_div_sel_adder),
|
219 |
|
|
.sel2(ecl_div_ld_inputs));
|
220 |
|
|
assign div_ecl_d_62 = d[62];
|
221 |
|
|
|
222 |
|
|
// FF for d
|
223 |
|
|
dff_s #(128) d_dff(.din(dnext[127:0]), .clk(clk), .q(d[127:0]), .se(se), .si(), .so());
|
224 |
|
|
|
225 |
|
|
////////////////////////////
|
226 |
|
|
// Logic for X (divisor)
|
227 |
|
|
////////////////////////////
|
228 |
|
|
// if signed div and 32 bits sign extend to upper 32 bits
|
229 |
|
|
dp_mux2es #(32) xin_mux(.dout(xin[63:32]), .in1(divisor[63:32]),
|
230 |
|
|
.in0({32{ecl_div_xinmask}}),
|
231 |
|
|
.sel(ecl_div_div64));
|
232 |
|
|
assign xin[31:0] = divisor[31:0] & {32{~ecl_div_zero_rs2_e}};
|
233 |
|
|
//assign xin[31:0] = divisor[31:0];
|
234 |
|
|
|
235 |
|
|
// Pick between x and divisor and 1 (use divisor on first cycle, 1 last cycle)
|
236 |
|
|
mux3ds #(64) x_mux(.dout(xnext[63:0]), .in0(x[63:0]), .in1(xin[63:0]), .in2({64'b0}),
|
237 |
|
|
.sel0(ecl_div_keepx),
|
238 |
|
|
.sel1(ecl_div_ld_inputs),
|
239 |
|
|
.sel2(ecl_div_almostlast_cycle));
|
240 |
|
|
|
241 |
|
|
// FF for x
|
242 |
|
|
dff_s #(64) x_dff(.din(xnext[63:0]), .clk(clk), .q(x[63:0]), .se(se), .si(), .so());
|
243 |
|
|
|
244 |
|
|
|
245 |
|
|
///////////////////////////
|
246 |
|
|
// Logic for inputs to adder
|
247 |
|
|
//////////////////////////
|
248 |
|
|
assign div_ecl_xin_msb_l = ~xin[63];
|
249 |
|
|
assign div_ecl_x_msb = x[63];
|
250 |
|
|
assign div_ecl_d_msb = d[127];
|
251 |
|
|
dp_mux2es #(64) in1_mux(.dout(adderin1[63:0]), .in0(d[126:63]),
|
252 |
|
|
.in1({d[62:0], ecl_div_newq}), .sel(ecl_div_last_cycle));
|
253 |
|
|
|
254 |
|
|
assign subtract = ~ecl_div_subtract_l;
|
255 |
|
|
assign adderin2[63:0] = x[63:0] ^ {64{subtract}};
|
256 |
|
|
|
257 |
|
|
//////////////////////////
|
258 |
|
|
// Adder
|
259 |
|
|
/////////////////////////
|
260 |
|
|
sparc_exu_aluadder64 add64(// Outputs
|
261 |
|
|
.adder_out(adder_out[63:0]),
|
262 |
|
|
.cout32 (div_ecl_cout32),
|
263 |
|
|
.cout64 (div_ecl_cout64),
|
264 |
|
|
// Inputs
|
265 |
|
|
.rs1_data (adderin1[63:0]),
|
266 |
|
|
.rs2_data (adderin2[63:0]),
|
267 |
|
|
.cin (ecl_div_cin));
|
268 |
|
|
|
269 |
|
|
assign adder_dnext = {adder_out[63:0], d[62:0], ecl_div_newq};
|
270 |
|
|
assign div_ecl_adder_out_31 = adder_out[31];
|
271 |
|
|
|
272 |
|
|
// sum predict and zero detection
|
273 |
|
|
sparc_exu_aluspr spr(.rs1_data(adderin1[63:0]), .rs2_data(adderin2[63:0]), .cin(ecl_div_cin),
|
274 |
|
|
.spr_out(spr_out[63:0]));
|
275 |
|
|
dp_mux2es #(64) zero_detect_mux(.dout(z_in[63:0]),
|
276 |
|
|
.in0(spr_out[63:0]),
|
277 |
|
|
.in1(xin[63:0]),
|
278 |
|
|
.sel(ecl_div_ld_inputs));
|
279 |
|
|
//sparc_exu_aluzcmp64 regzcmp(.in(z_in[63:0]), .zero64(div_ecl_detect_zero));
|
280 |
|
|
assign div_ecl_detect_zero_low = ~(|z_in[31:0]);
|
281 |
|
|
assign div_ecl_detect_zero_high = ~(|z_in[63:32]);
|
282 |
|
|
|
283 |
|
|
|
284 |
|
|
// y register
|
285 |
|
|
assign div_byp_yreg_e = yreg_mdq_y_e;
|
286 |
|
|
sparc_exu_div_yreg yreg(.mul_div_yreg_data_g(mul_exu_data_g[63:32]),
|
287 |
|
|
/*AUTOINST*/
|
288 |
|
|
// Outputs
|
289 |
|
|
.yreg_mdq_y_e(yreg_mdq_y_e[31:0]),
|
290 |
|
|
.div_ecl_yreg_0_l(div_ecl_yreg_0_l[3:0]),
|
291 |
|
|
// Inputs
|
292 |
|
|
.clk (clk),
|
293 |
|
|
.se (se),
|
294 |
|
|
.byp_div_yreg_data_w(byp_div_yreg_data_w[31:0]),
|
295 |
|
|
.ecl_div_thr_e(ecl_div_thr_e[3:0]),
|
296 |
|
|
.ecl_div_yreg_wen_w(ecl_div_yreg_wen_w[3:0]),
|
297 |
|
|
.ecl_div_yreg_wen_g(ecl_div_yreg_wen_g[3:0]),
|
298 |
|
|
.ecl_div_yreg_wen_l(ecl_div_yreg_wen_l[3:0]),
|
299 |
|
|
.ecl_div_yreg_data_31_g(ecl_div_yreg_data_31_g),
|
300 |
|
|
.ecl_div_yreg_shift_g(ecl_div_yreg_shift_g[3:0]));
|
301 |
|
|
|
302 |
|
|
|
303 |
|
|
//////////////////////////////////
|
304 |
|
|
// MULTIPLIER inputs
|
305 |
|
|
//////////////////////////////////
|
306 |
|
|
assign mul32_input_data_e[127:64] = {{32{ecl_div_mul_sext_rs2_e}}, input_data_e[95:64]};
|
307 |
|
|
assign mul32_input_data_e[63:0] = {{32{ecl_div_mul_sext_rs1_e}}, input_data_e[31:0]};
|
308 |
|
|
mux3ds #(128) mul_data_mux(.dout(next_mul_data[127:0]),
|
309 |
|
|
.in0(input_data_e[127:0]),
|
310 |
|
|
.in1(mul32_input_data_e[127:0]),
|
311 |
|
|
.in2(mul_data_out[127:0]),
|
312 |
|
|
.sel0(ecl_div_mul_get_new_data),
|
313 |
|
|
.sel1(ecl_div_mul_get_32bit_data),
|
314 |
|
|
.sel2(ecl_div_mul_keep_data));
|
315 |
|
|
dff_s #(128) mul_data_dff(.din(next_mul_data[127:0]), .clk(clk), .q(mul_data_out[127:0]),
|
316 |
|
|
.se(se), .si(), .so());
|
317 |
|
|
assign exu_mul_rs1_data = mul_data_out[63:0];
|
318 |
|
|
assign exu_mul_rs2_data = mul_data_out[127:64];
|
319 |
|
|
|
320 |
|
|
///////////////////////////////////
|
321 |
|
|
// Store output from mul
|
322 |
|
|
//////////////////////////////////
|
323 |
|
|
dp_mux2es #(64) mul_result_mux(.dout(mul_result_next[63:0]), .in0(mul_result[63:0]),
|
324 |
|
|
.in1(mul_exu_data_g[63:0]),
|
325 |
|
|
.sel(ecl_div_mul_wen));
|
326 |
|
|
dff_s #(64) mul_result_dff(.din(mul_result_next[63:0]), .clk(clk), .q(mul_result[63:0]),
|
327 |
|
|
.se(se), .si(), .so());
|
328 |
|
|
|
329 |
|
|
|
330 |
|
|
endmodule // sparc_exu_div
|