1 |
2 |
dmitryr |
// ========== Copyright Header Begin ==========================================
|
2 |
|
|
//
|
3 |
|
|
// OpenSPARC T1 Processor File: sparc_ifu_dcl.v
|
4 |
|
|
// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
|
5 |
|
|
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
|
6 |
|
|
//
|
7 |
|
|
// The above named program is free software; you can redistribute it and/or
|
8 |
|
|
// modify it under the terms of the GNU General Public
|
9 |
|
|
// License version 2 as published by the Free Software Foundation.
|
10 |
|
|
//
|
11 |
|
|
// The above named program is distributed in the hope that it will be
|
12 |
|
|
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
|
|
// General Public License for more details.
|
15 |
|
|
//
|
16 |
|
|
// You should have received a copy of the GNU General Public
|
17 |
|
|
// License along with this work; if not, write to the Free Software
|
18 |
|
|
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
19 |
|
|
//
|
20 |
|
|
// ========== Copyright Header End ============================================
|
21 |
|
|
////////////////////////////////////////////////////////////////////////
|
22 |
|
|
/*
|
23 |
|
|
// Module Name: sparc_ifu_dcl
|
24 |
|
|
// Description:
|
25 |
|
|
// The decode control logic block does branch condition evaluation,
|
26 |
|
|
// delay slot management, and appropriate condition code
|
27 |
|
|
// selection. It also executes the tcc instruction and kills the E
|
28 |
|
|
// stage instruction if a move did not succeed. The DCL block is
|
29 |
|
|
// also responsible for generating the correct select signals to
|
30 |
|
|
// choose the branch offset and immediate operand.
|
31 |
|
|
//
|
32 |
|
|
*/
|
33 |
|
|
////////////////////////////////////////////////////////////////////////
|
34 |
|
|
|
35 |
|
|
`define CC_N 3
|
36 |
|
|
`define CC_Z 2
|
37 |
|
|
`define CC_V 1
|
38 |
|
|
`define CC_C 0
|
39 |
|
|
|
40 |
|
|
`define FP_U 3
|
41 |
|
|
`define FP_G 2
|
42 |
|
|
`define FP_L 1
|
43 |
|
|
`define FP_E 0
|
44 |
|
|
|
45 |
|
|
`define FSR_FCC0_HI 11
|
46 |
|
|
`define FSR_FCC0_LO 10
|
47 |
|
|
`define FSR_FCC1_HI 33
|
48 |
|
|
`define FSR_FCC1_LO 32
|
49 |
|
|
`define FSR_FCC2_HI 35
|
50 |
|
|
`define FSR_FCC2_LO 34
|
51 |
|
|
`define FSR_FCC3_HI 37
|
52 |
|
|
`define FSR_FCC3_LO 36
|
53 |
|
|
|
54 |
|
|
|
55 |
|
|
module sparc_ifu_dcl(/*AUTOARG*/
|
56 |
|
|
// Outputs
|
57 |
|
|
ifu_exu_kill_e, ifu_exu_dontmv_regz0_e, ifu_exu_dontmv_regz1_e,
|
58 |
|
|
ifu_exu_tcc_e, ifu_exu_dbrinst_d, ifu_ffu_mvcnd_m,
|
59 |
|
|
dcl_fcl_bcregz0_e, dcl_fcl_bcregz1_e, dtu_inst_anull_e,
|
60 |
|
|
dcl_swl_tcc_done_m, dcl_imd_immdata_sel_simm13_d_l,
|
61 |
|
|
dcl_imd_immdata_sel_movcc_d_l, dcl_imd_immdata_sel_sethi_d_l,
|
62 |
|
|
dcl_imd_immdata_sel_movr_d_l, dcl_imd_broff_sel_call_d_l,
|
63 |
|
|
dcl_imd_broff_sel_br_d_l, dcl_imd_broff_sel_bcc_d_l,
|
64 |
|
|
dcl_imd_broff_sel_bpcc_d_l, dcl_imd_immbr_sel_br_d, so,
|
65 |
|
|
// Inputs
|
66 |
|
|
rclk, se, si, dtu_reset, exu_ifu_cc_d, fcl_dcl_regz_e,
|
67 |
|
|
exu_ifu_regn_e, ffu_ifu_cc_w2, ffu_ifu_cc_vld_w2,
|
68 |
|
|
tlu_ifu_flush_pipe_w, swl_dcl_thr_d, swl_dcl_thr_w2,
|
69 |
|
|
imd_dcl_brcond_d, imd_dcl_mvcond_d, fdp_dcl_op_s, fdp_dcl_op3_s,
|
70 |
|
|
imd_dcl_abit_d, dec_dcl_cctype_d, dtu_dcl_opf2_d,
|
71 |
|
|
fcl_dtu_inst_vld_e, fcl_dtu_intr_vld_e, ifu_tlu_flush_w
|
72 |
|
|
);
|
73 |
|
|
|
74 |
|
|
input rclk,
|
75 |
|
|
se,
|
76 |
|
|
si,
|
77 |
|
|
dtu_reset;
|
78 |
|
|
|
79 |
|
|
input [7:0] exu_ifu_cc_d; // latest CCs from EXU
|
80 |
|
|
|
81 |
|
|
input fcl_dcl_regz_e, // rs1=0
|
82 |
|
|
exu_ifu_regn_e; // rs1<0
|
83 |
|
|
|
84 |
|
|
input [7:0] ffu_ifu_cc_w2;
|
85 |
|
|
input [3:0] ffu_ifu_cc_vld_w2;
|
86 |
|
|
|
87 |
|
|
input tlu_ifu_flush_pipe_w;
|
88 |
|
|
|
89 |
|
|
input [3:0] swl_dcl_thr_d,
|
90 |
|
|
swl_dcl_thr_w2;
|
91 |
|
|
|
92 |
|
|
input [3:0] imd_dcl_brcond_d; // branch condition type
|
93 |
|
|
input [7:0] imd_dcl_mvcond_d; // mov condition type
|
94 |
|
|
|
95 |
|
|
input [1:0] fdp_dcl_op_s;
|
96 |
|
|
input [5:0] fdp_dcl_op3_s;
|
97 |
|
|
input imd_dcl_abit_d; // anull bit for cond branch
|
98 |
|
|
input [2:0] dec_dcl_cctype_d; // which cond codes to use
|
99 |
|
|
input dtu_dcl_opf2_d;
|
100 |
|
|
|
101 |
|
|
input fcl_dtu_inst_vld_e;
|
102 |
|
|
input fcl_dtu_intr_vld_e;
|
103 |
|
|
input ifu_tlu_flush_w;
|
104 |
|
|
|
105 |
|
|
output ifu_exu_kill_e,
|
106 |
|
|
ifu_exu_dontmv_regz0_e,
|
107 |
|
|
ifu_exu_dontmv_regz1_e,
|
108 |
|
|
ifu_exu_tcc_e;
|
109 |
|
|
output ifu_exu_dbrinst_d;
|
110 |
|
|
|
111 |
|
|
output ifu_ffu_mvcnd_m;
|
112 |
|
|
|
113 |
|
|
output dcl_fcl_bcregz0_e,
|
114 |
|
|
dcl_fcl_bcregz1_e;
|
115 |
|
|
|
116 |
|
|
output dtu_inst_anull_e;
|
117 |
|
|
output dcl_swl_tcc_done_m;
|
118 |
|
|
|
119 |
|
|
output dcl_imd_immdata_sel_simm13_d_l, // imm data select
|
120 |
|
|
dcl_imd_immdata_sel_movcc_d_l,
|
121 |
|
|
dcl_imd_immdata_sel_sethi_d_l,
|
122 |
|
|
dcl_imd_immdata_sel_movr_d_l;
|
123 |
|
|
|
124 |
|
|
output dcl_imd_broff_sel_call_d_l, // dir branch offset select
|
125 |
|
|
dcl_imd_broff_sel_br_d_l,
|
126 |
|
|
dcl_imd_broff_sel_bcc_d_l,
|
127 |
|
|
dcl_imd_broff_sel_bpcc_d_l;
|
128 |
|
|
|
129 |
|
|
output dcl_imd_immbr_sel_br_d;
|
130 |
|
|
|
131 |
|
|
output so;
|
132 |
|
|
|
133 |
|
|
//----------------------------------------------------------------------
|
134 |
|
|
// Declarations
|
135 |
|
|
//----------------------------------------------------------------------
|
136 |
|
|
|
137 |
|
|
wire [7:0] cc_breval_e,
|
138 |
|
|
fp_breval_d;
|
139 |
|
|
|
140 |
|
|
wire abit_e;
|
141 |
|
|
|
142 |
|
|
wire cond_brtaken_e,
|
143 |
|
|
anull_all,
|
144 |
|
|
anull_ubr,
|
145 |
|
|
anull_cbr;
|
146 |
|
|
|
147 |
|
|
wire [3:0] anull_next_e,
|
148 |
|
|
anull_e,
|
149 |
|
|
thr_anull_d;
|
150 |
|
|
|
151 |
|
|
wire inst_anull_d,
|
152 |
|
|
inst_anull_e;
|
153 |
|
|
|
154 |
|
|
wire [3:0] flush_abit;
|
155 |
|
|
wire all_flush_w,
|
156 |
|
|
all_flush_w2;
|
157 |
|
|
|
158 |
|
|
wire br_always_e;
|
159 |
|
|
|
160 |
|
|
wire sel_movcc,
|
161 |
|
|
sel_movr;
|
162 |
|
|
|
163 |
|
|
wire [3:0] br_cond_e,
|
164 |
|
|
br_cond_d;
|
165 |
|
|
wire [3:0] thr_vld_e;
|
166 |
|
|
|
167 |
|
|
wire [3:0] ls_brcond_d,
|
168 |
|
|
ls_brcond_e;
|
169 |
|
|
wire [1:0] ccfp_sel;
|
170 |
|
|
|
171 |
|
|
wire [3:0] cc_e;
|
172 |
|
|
|
173 |
|
|
wire [1:0] curr_fcc_d;
|
174 |
|
|
|
175 |
|
|
wire [7:0] fcc_d;
|
176 |
|
|
|
177 |
|
|
wire [7:0] t0_fcc_d,
|
178 |
|
|
t1_fcc_d,
|
179 |
|
|
t2_fcc_d,
|
180 |
|
|
t3_fcc_d,
|
181 |
|
|
t0_fcc_nxt,
|
182 |
|
|
t1_fcc_nxt,
|
183 |
|
|
t2_fcc_nxt,
|
184 |
|
|
t3_fcc_nxt;
|
185 |
|
|
|
186 |
|
|
wire use_fcc0_d,
|
187 |
|
|
use_fcc1_d,
|
188 |
|
|
use_fcc2_d,
|
189 |
|
|
use_fcc3_d;
|
190 |
|
|
|
191 |
|
|
wire [3:0] thr_e,
|
192 |
|
|
thr_dec_d;
|
193 |
|
|
// fcc_dec_d,
|
194 |
|
|
// fcc_dec_e;
|
195 |
|
|
|
196 |
|
|
wire [1:0] op_d;
|
197 |
|
|
wire [5:0] op3_d;
|
198 |
|
|
|
199 |
|
|
wire use_xcc_d,
|
200 |
|
|
ltz_e,
|
201 |
|
|
cc_eval0,
|
202 |
|
|
cc_eval1,
|
203 |
|
|
fp_eval0_d,
|
204 |
|
|
fp_eval1_d,
|
205 |
|
|
fp_eval_d,
|
206 |
|
|
fp_eval_e,
|
207 |
|
|
r_eval1,
|
208 |
|
|
r_eval0,
|
209 |
|
|
ccfp_eval,
|
210 |
|
|
ccbr_taken_e,
|
211 |
|
|
mvbr_sel_br_d,
|
212 |
|
|
cc_mvbr_d,
|
213 |
|
|
cc_mvbr_e,
|
214 |
|
|
fpcond_mvbr_d,
|
215 |
|
|
fpcond_mvbr_e;
|
216 |
|
|
|
217 |
|
|
wire call_inst_e,
|
218 |
|
|
call_inst_d,
|
219 |
|
|
dbr_inst_d,
|
220 |
|
|
dbr_inst_e,
|
221 |
|
|
ibr_inst_d,
|
222 |
|
|
ibr_inst_e,
|
223 |
|
|
mov_inst_d,
|
224 |
|
|
mov_inst_e,
|
225 |
|
|
tcc_done_e,
|
226 |
|
|
tcc_inst_d,
|
227 |
|
|
tcc_inst_e;
|
228 |
|
|
|
229 |
|
|
wire clk;
|
230 |
|
|
|
231 |
|
|
|
232 |
|
|
|
233 |
|
|
//----------------------------------------------------------------------
|
234 |
|
|
// Code start here
|
235 |
|
|
//----------------------------------------------------------------------
|
236 |
|
|
assign clk = rclk;
|
237 |
|
|
|
238 |
|
|
|
239 |
|
|
// S Stage Operands
|
240 |
|
|
dff_s #(2) opreg(.din (fdp_dcl_op_s),
|
241 |
|
|
.clk (clk),
|
242 |
|
|
.q (op_d),
|
243 |
|
|
.se (se), .si(), .so());
|
244 |
|
|
|
245 |
|
|
dff_s #(6) op3_reg(.din (fdp_dcl_op3_s),
|
246 |
|
|
.clk (clk),
|
247 |
|
|
.q (op3_d),
|
248 |
|
|
.se (se), .si(), .so());
|
249 |
|
|
|
250 |
|
|
dff_s abite_reg(.din (imd_dcl_abit_d),
|
251 |
|
|
.clk (clk),
|
252 |
|
|
.q (abit_e),
|
253 |
|
|
.se (se), .si(), .so());
|
254 |
|
|
|
255 |
|
|
// need to protect from scan contention
|
256 |
|
|
dff_s #(4) thre_reg(.din (swl_dcl_thr_d),
|
257 |
|
|
.q (thr_e),
|
258 |
|
|
.clk (clk), .se(se), .si(), .so());
|
259 |
|
|
|
260 |
|
|
//------------------------------
|
261 |
|
|
// Choose correct immediate data
|
262 |
|
|
//------------------------------
|
263 |
|
|
// movcc if op3 = 101100
|
264 |
|
|
assign dcl_imd_immdata_sel_movcc_d_l = ~(op_d[1] &
|
265 |
|
|
op3_d[5] & ~op3_d[4] &
|
266 |
|
|
op3_d[3] & ~op3_d[0]);
|
267 |
|
|
|
268 |
|
|
// movr if op3 = 101111
|
269 |
|
|
//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
270 |
|
|
// Reduced the number of terms in the eqn to help with timing
|
271 |
|
|
// path, the result of which is that the immediate data sent to the
|
272 |
|
|
// exu for a FLUSH instruction is INCORRECT! (It is decoded as a
|
273 |
|
|
// MOVR). However, since our architecture completely ignores the
|
274 |
|
|
// address of the flush, this should be ok. Confirmed with Sanjay
|
275 |
|
|
// 03/31/03. (v1.29 -> 1.30)
|
276 |
|
|
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
277 |
|
|
assign dcl_imd_immdata_sel_movr_d_l = ~(op_d[1] &
|
278 |
|
|
op3_d[5] & op3_d[3] &
|
279 |
|
|
op3_d[1] & op3_d[0]);
|
280 |
|
|
|
281 |
|
|
// sethi if op3 = 100xx
|
282 |
|
|
assign dcl_imd_immdata_sel_sethi_d_l = ~(~op_d[1]);
|
283 |
|
|
|
284 |
|
|
// everything else
|
285 |
|
|
assign dcl_imd_immdata_sel_simm13_d_l =
|
286 |
|
|
~(dcl_imd_immdata_sel_movcc_d_l &
|
287 |
|
|
dcl_imd_immdata_sel_movr_d_l &
|
288 |
|
|
dcl_imd_immdata_sel_sethi_d_l);
|
289 |
|
|
|
290 |
|
|
//------------------------------
|
291 |
|
|
// Choose correct branch offset
|
292 |
|
|
//------------------------------
|
293 |
|
|
// call or ld/store
|
294 |
|
|
assign dcl_imd_broff_sel_call_d_l = ~(op_d[0]);
|
295 |
|
|
|
296 |
|
|
// branch on register
|
297 |
|
|
assign dcl_imd_broff_sel_br_d_l = ~(~op_d[0] &
|
298 |
|
|
op3_d[4] & op3_d[3]);
|
299 |
|
|
// branch w/o prediction
|
300 |
|
|
assign dcl_imd_broff_sel_bcc_d_l = ~(~op_d[0] &
|
301 |
|
|
op3_d[4] & ~op3_d[3]);
|
302 |
|
|
// everything else
|
303 |
|
|
assign dcl_imd_broff_sel_bpcc_d_l = ~(~op_d[0] &
|
304 |
|
|
~op3_d[4]);
|
305 |
|
|
|
306 |
|
|
//------------------------------------
|
307 |
|
|
// mark branch/conditional instrctions
|
308 |
|
|
//------------------------------------
|
309 |
|
|
// call
|
310 |
|
|
assign call_inst_d = ~op_d[1] & op_d[0];
|
311 |
|
|
dff_s #(1) call_inste_reg(.din (call_inst_d),
|
312 |
|
|
.clk (clk),
|
313 |
|
|
.q (call_inst_e),
|
314 |
|
|
.se (se), .si(), .so());
|
315 |
|
|
|
316 |
|
|
// call or branch but not nop/sethi
|
317 |
|
|
assign dbr_inst_d = ~op_d[1] & (op_d[0] | op3_d[4] | op3_d[3]);
|
318 |
|
|
|
319 |
|
|
// Choose between branch offset and immediate operand
|
320 |
|
|
assign dcl_imd_immbr_sel_br_d = dbr_inst_d;
|
321 |
|
|
|
322 |
|
|
// tell exu to use pc instead of rs1
|
323 |
|
|
assign ifu_exu_dbrinst_d = ~op_d[1];
|
324 |
|
|
|
325 |
|
|
dff_s #(1) dbr_inste_reg(.din (dbr_inst_d),
|
326 |
|
|
.clk (clk),
|
327 |
|
|
.q (dbr_inst_e),
|
328 |
|
|
.se (se), .si(), .so());
|
329 |
|
|
|
330 |
|
|
// jmpl + return
|
331 |
|
|
assign ibr_inst_d = op_d[1] & ~op_d[0] &
|
332 |
|
|
op3_d[5] & op3_d[4] & op3_d[3] &
|
333 |
|
|
~op3_d[2] & ~op3_d[1];
|
334 |
|
|
dff_s #(1) ibr_inste_reg(.din (ibr_inst_d),
|
335 |
|
|
.clk (clk),
|
336 |
|
|
.q (ibr_inst_e),
|
337 |
|
|
.se (se), .si(), .so());
|
338 |
|
|
// mov
|
339 |
|
|
assign mov_inst_d = (op_d[1] & ~op_d[0] &
|
340 |
|
|
op3_d[5] & ~op3_d[4] & op3_d[3] & op3_d[2] &
|
341 |
|
|
(~op3_d[1] & ~op3_d[0] | op3_d[1] & op3_d[0]));
|
342 |
|
|
|
343 |
|
|
dff_s #(1) mov_inste_reg(.din (mov_inst_d),
|
344 |
|
|
.clk (clk),
|
345 |
|
|
.q (mov_inst_e),
|
346 |
|
|
.se (se), .si(), .so());
|
347 |
|
|
// tcc
|
348 |
|
|
assign tcc_inst_d = op_d[1] & ~op_d[0] &
|
349 |
|
|
op3_d[5] & op3_d[4] & op3_d[3] &
|
350 |
|
|
~op3_d[2] & op3_d[1] & ~op3_d[0];
|
351 |
|
|
dff_s #(1) tcc_inste_reg(.din (tcc_inst_d),
|
352 |
|
|
.clk (clk),
|
353 |
|
|
.q (tcc_inst_e),
|
354 |
|
|
.se (se), .si(), .so());
|
355 |
|
|
|
356 |
|
|
assign mvbr_sel_br_d = ~op_d[1] & ~op_d[0] | // br
|
357 |
|
|
op3_d[3] & ~op3_d[2] & op3_d[1] & ~op3_d[0]; // tcc
|
358 |
|
|
|
359 |
|
|
assign cc_mvbr_d = ~(~op_d[1] & ~op_d[0] & op3_d[4] & op3_d[3] | // bpr
|
360 |
|
|
op_d[1] & ~op_d[0] & op3_d[5] & ~op3_d[4] &
|
361 |
|
|
op3_d[3] & op3_d[2] & op3_d[1] & op3_d[0] | // movr
|
362 |
|
|
op_d[1] & ~op_d[0] & op3_d[5] & op3_d[4] &
|
363 |
|
|
~op3_d[3] & op3_d[2] & ~op3_d[1] & op3_d[0] &
|
364 |
|
|
dtu_dcl_opf2_d); // fmovr
|
365 |
|
|
|
366 |
|
|
|
367 |
|
|
//---------------------------
|
368 |
|
|
// FCC Logic
|
369 |
|
|
//--------------------------
|
370 |
|
|
// choose current fcc
|
371 |
|
|
assign use_fcc0_d = ~dec_dcl_cctype_d[1] & ~dec_dcl_cctype_d[0];
|
372 |
|
|
assign use_fcc1_d = ~dec_dcl_cctype_d[1] & dec_dcl_cctype_d[0];
|
373 |
|
|
assign use_fcc2_d = dec_dcl_cctype_d[1] & ~dec_dcl_cctype_d[0];
|
374 |
|
|
assign use_fcc3_d = dec_dcl_cctype_d[1] & dec_dcl_cctype_d[0];
|
375 |
|
|
|
376 |
|
|
mux4ds #(2) fcc_mux(.dout (curr_fcc_d[1:0]),
|
377 |
|
|
.in0 (fcc_d[1:0]),
|
378 |
|
|
.in1 (fcc_d[3:2]),
|
379 |
|
|
.in2 (fcc_d[5:4]),
|
380 |
|
|
.in3 (fcc_d[7:6]),
|
381 |
|
|
.sel0 (use_fcc0_d),
|
382 |
|
|
.sel1 (use_fcc1_d),
|
383 |
|
|
.sel2 (use_fcc2_d),
|
384 |
|
|
.sel3 (use_fcc3_d));
|
385 |
|
|
|
386 |
|
|
// decode to make next step easier
|
387 |
|
|
// assign fcc_dec_d[0] = ~curr_fcc_d[1] & ~curr_fcc_d[0];
|
388 |
|
|
// assign fcc_dec_d[1] = ~curr_fcc_d[1] & curr_fcc_d[0];
|
389 |
|
|
// assign fcc_dec_d[2] = curr_fcc_d[1] & ~curr_fcc_d[0];
|
390 |
|
|
// assign fcc_dec_d[3] = curr_fcc_d[1] & curr_fcc_d[0];
|
391 |
|
|
|
392 |
|
|
// dff #(4) fcce_reg(.din (fcc_dec_d),
|
393 |
|
|
// .q (fcc_dec_e),
|
394 |
|
|
// .clk (clk),
|
395 |
|
|
// .se (se), .si(), .so());
|
396 |
|
|
|
397 |
|
|
|
398 |
|
|
//------------------
|
399 |
|
|
// CC Logic for BCC
|
400 |
|
|
//------------------
|
401 |
|
|
// Choose appropriate CCs
|
402 |
|
|
//
|
403 |
|
|
// dec_cctype is 3 bits
|
404 |
|
|
// 10X icc
|
405 |
|
|
// 11X xcc
|
406 |
|
|
// 000 fcc0
|
407 |
|
|
// 001 fcc1
|
408 |
|
|
// 010 fcc2
|
409 |
|
|
// 011 fcc3
|
410 |
|
|
// assign use_xcc_d = (dec_dcl_cctype_d[2] | op3_d[3]) & dec_dcl_cctype_d[1];
|
411 |
|
|
assign use_xcc_d = dec_dcl_cctype_d[1];
|
412 |
|
|
assign fpcond_mvbr_d = ~dec_dcl_cctype_d[2] & ~tcc_inst_d;
|
413 |
|
|
|
414 |
|
|
dff_s fpbr_reg(.din (fpcond_mvbr_d),
|
415 |
|
|
.clk (clk),
|
416 |
|
|
.q (fpcond_mvbr_e),
|
417 |
|
|
.se (se), .si(), .so());
|
418 |
|
|
|
419 |
|
|
// mux between xcc and icc
|
420 |
|
|
// assign cc_d = use_xcc_d ? exu_ifu_cc_d[7:4] : // xcc
|
421 |
|
|
// exu_ifu_cc_d[3:0]; // icc
|
422 |
|
|
// dff #(4) ccreg_e(.din (cc_d),
|
423 |
|
|
// .clk (clk),
|
424 |
|
|
// .q (cc_e),
|
425 |
|
|
// .se (se), .si(), .so());
|
426 |
|
|
|
427 |
|
|
bw_u1_soffm2_4x UZsize_ccreg0_e(.d0 (exu_ifu_cc_d[0]),
|
428 |
|
|
.d1 (exu_ifu_cc_d[4]),
|
429 |
|
|
.s (use_xcc_d),
|
430 |
|
|
.q (cc_e[0]),
|
431 |
|
|
.ck (clk), .se(se), .sd(), .so());
|
432 |
|
|
bw_u1_soffm2_4x UZsize_ccreg1_e(.d0 (exu_ifu_cc_d[1]),
|
433 |
|
|
.d1 (exu_ifu_cc_d[5]),
|
434 |
|
|
.s (use_xcc_d),
|
435 |
|
|
.q (cc_e[1]),
|
436 |
|
|
.ck (clk), .se(se), .sd(), .so());
|
437 |
|
|
bw_u1_soffm2_4x UZsize_ccreg2_e(.d0 (exu_ifu_cc_d[2]),
|
438 |
|
|
.d1 (exu_ifu_cc_d[6]),
|
439 |
|
|
.s (use_xcc_d),
|
440 |
|
|
.q (cc_e[2]),
|
441 |
|
|
.ck (clk), .se(se), .sd(), .so());
|
442 |
|
|
bw_u1_soffm2_4x UZsize_ccreg3_e(.d0 (exu_ifu_cc_d[3]),
|
443 |
|
|
.d1 (exu_ifu_cc_d[7]),
|
444 |
|
|
.s (use_xcc_d),
|
445 |
|
|
.q (cc_e[3]),
|
446 |
|
|
.ck (clk), .se(se), .sd(), .so());
|
447 |
|
|
|
448 |
|
|
|
449 |
|
|
//------------------------------
|
450 |
|
|
// Evaluate Branch
|
451 |
|
|
//------------------------------
|
452 |
|
|
// Select correct branch condition
|
453 |
|
|
assign sel_movcc = ~mvbr_sel_br_d & cc_mvbr_d;
|
454 |
|
|
assign sel_movr = ~mvbr_sel_br_d & ~cc_mvbr_d;
|
455 |
|
|
|
456 |
|
|
// br_cond is the same as the "cond" field = inst[28:25] for bcc
|
457 |
|
|
mux3ds #(4) brcond_mux(.dout (br_cond_d),
|
458 |
|
|
.in0 (imd_dcl_brcond_d), // br or tcc
|
459 |
|
|
.in1 (imd_dcl_mvcond_d[7:4]), // movcc
|
460 |
|
|
.in2 (imd_dcl_mvcond_d[3:0]), // movr
|
461 |
|
|
.sel0 (mvbr_sel_br_d),
|
462 |
|
|
.sel1 (sel_movcc),
|
463 |
|
|
.sel2 (sel_movr));
|
464 |
|
|
|
465 |
|
|
dff_s #(4) brcond_e_reg(.din (br_cond_d),
|
466 |
|
|
.clk (clk),
|
467 |
|
|
.q (br_cond_e),
|
468 |
|
|
.se (se), .si(), .so());
|
469 |
|
|
|
470 |
|
|
// Branch Type Decode
|
471 |
|
|
assign ls_brcond_d[0] = ~br_cond_d[1] & ~br_cond_d[0];
|
472 |
|
|
assign ls_brcond_d[1] = ~br_cond_d[1] & br_cond_d[0];
|
473 |
|
|
assign ls_brcond_d[2] = br_cond_d[1] & ~br_cond_d[0];
|
474 |
|
|
assign ls_brcond_d[3] = br_cond_d[1] & br_cond_d[0];
|
475 |
|
|
|
476 |
|
|
dff_s #(4) lsbrc_e_reg(.din (ls_brcond_d),
|
477 |
|
|
.clk (clk),
|
478 |
|
|
.q (ls_brcond_e),
|
479 |
|
|
.se (se), .si(), .so());
|
480 |
|
|
|
481 |
|
|
// Evaluate potential integer CC branches
|
482 |
|
|
assign ltz_e = (cc_e[`CC_N] ^ cc_e[`CC_V]);
|
483 |
|
|
|
484 |
|
|
assign cc_breval_e[0] = 1'b0; // BPN
|
485 |
|
|
assign cc_breval_e[1] = cc_e[`CC_Z]; // BPE
|
486 |
|
|
assign cc_breval_e[2] = cc_e[`CC_Z] | ltz_e; // BPLE
|
487 |
|
|
assign cc_breval_e[3] = ltz_e; // BPL
|
488 |
|
|
assign cc_breval_e[4] = cc_e[`CC_Z] | cc_e[`CC_C]; // BPLEU
|
489 |
|
|
assign cc_breval_e[5] = cc_e[`CC_C]; // BPCS
|
490 |
|
|
assign cc_breval_e[6] = cc_e[`CC_N]; // BPNEG
|
491 |
|
|
assign cc_breval_e[7] = cc_e[`CC_V]; // BPVS
|
492 |
|
|
|
493 |
|
|
// mux to choose right condition
|
494 |
|
|
assign cc_eval0 = cc_breval_e[0] & ls_brcond_e[0] |
|
495 |
|
|
cc_breval_e[1] & ls_brcond_e[1] |
|
496 |
|
|
cc_breval_e[2] & ls_brcond_e[2] |
|
497 |
|
|
cc_breval_e[3] & ls_brcond_e[3];
|
498 |
|
|
|
499 |
|
|
assign cc_eval1 = cc_breval_e[4] & ls_brcond_e[0] |
|
500 |
|
|
cc_breval_e[5] & ls_brcond_e[1] |
|
501 |
|
|
cc_breval_e[6] & ls_brcond_e[2] |
|
502 |
|
|
cc_breval_e[7] & ls_brcond_e[3];
|
503 |
|
|
|
504 |
|
|
// Evaluate FP CC branches in D stage
|
505 |
|
|
assign fp_breval_d[0] = 1'b0; // FBN / A
|
506 |
|
|
assign fp_breval_d[1] = (curr_fcc_d[1] | curr_fcc_d[0]); // FBNE / E
|
507 |
|
|
assign fp_breval_d[2] = curr_fcc_d[1] ^ curr_fcc_d[0]; // FBLG / UE
|
508 |
|
|
assign fp_breval_d[3] = curr_fcc_d[0]; // FBUL / GE
|
509 |
|
|
assign fp_breval_d[4] = ~curr_fcc_d[1] & curr_fcc_d[0]; // FBL / UGE
|
510 |
|
|
assign fp_breval_d[5] = curr_fcc_d[1]; // FBUG / LE
|
511 |
|
|
assign fp_breval_d[6] = curr_fcc_d[1] & ~curr_fcc_d[0]; // FBG / ULE
|
512 |
|
|
assign fp_breval_d[7] = curr_fcc_d[1] & curr_fcc_d[0]; // FBU / O
|
513 |
|
|
|
514 |
|
|
assign fp_eval0_d = fp_breval_d[0] & ls_brcond_d[0] |
|
515 |
|
|
fp_breval_d[1] & ls_brcond_d[1] |
|
516 |
|
|
fp_breval_d[2] & ls_brcond_d[2] |
|
517 |
|
|
fp_breval_d[3] & ls_brcond_d[3];
|
518 |
|
|
|
519 |
|
|
assign fp_eval1_d = fp_breval_d[4] & ls_brcond_d[0] |
|
520 |
|
|
fp_breval_d[5] & ls_brcond_d[1] |
|
521 |
|
|
fp_breval_d[6] & ls_brcond_d[2] |
|
522 |
|
|
fp_breval_d[7] & ls_brcond_d[3];
|
523 |
|
|
|
524 |
|
|
assign fp_eval_d = br_cond_d[2] ? fp_eval1_d :
|
525 |
|
|
fp_eval0_d;
|
526 |
|
|
|
527 |
|
|
dff_s #(1) fpev_ff(.din (fp_eval_d),
|
528 |
|
|
.q (fp_eval_e),
|
529 |
|
|
.clk (clk),
|
530 |
|
|
.se (se), .si(), .so());
|
531 |
|
|
|
532 |
|
|
// merge eval0, eval1 and fp condition codes
|
533 |
|
|
assign ccfp_sel[0] = ~fpcond_mvbr_e & ~br_cond_e[2];
|
534 |
|
|
assign ccfp_sel[1] = ~fpcond_mvbr_e & br_cond_e[2];
|
535 |
|
|
// assign ccfp_sel[2] = fpcond_mvbr_e & ~br_cond_e[2];
|
536 |
|
|
// assign ccfp_sel[3] = fpcond_mvbr_e & br_cond_e[2];
|
537 |
|
|
|
538 |
|
|
assign ccfp_eval = ccfp_sel[0] & cc_eval0 |
|
539 |
|
|
ccfp_sel[1] & cc_eval1 |
|
540 |
|
|
fpcond_mvbr_e & fp_eval_e;
|
541 |
|
|
|
542 |
|
|
// invert branch condition if this is an inverted br type
|
543 |
|
|
// assign ccbr_taken_e = (ccfp_eval ^ br_cond_e[3]) & cc_mvbr_e;
|
544 |
|
|
assign ccbr_taken_e = ccfp_eval ? (cc_mvbr_e & ~br_cond_e[3]) :
|
545 |
|
|
(cc_mvbr_e & br_cond_e[3]);
|
546 |
|
|
|
547 |
|
|
assign br_always_e = (~br_cond_e[0] & ~br_cond_e[1] & ~br_cond_e[2] &
|
548 |
|
|
br_cond_e[3] & cc_mvbr_e);
|
549 |
|
|
|
550 |
|
|
//--------------
|
551 |
|
|
// For BRZ
|
552 |
|
|
// -------------
|
553 |
|
|
// Calculate Cond Assuming Z=1 And Z=0. Then Mux
|
554 |
|
|
// assign r_eval1 = ((exu_ifu_regn_e | ~br_cond_e[1] | ~br_cond_e[0]) ^
|
555 |
|
|
// br_cond_e[2]) & ~cc_mvbr_e;
|
556 |
|
|
assign r_eval1 = exu_ifu_regn_e ? (~br_cond_e[2] & ~cc_mvbr_e) :
|
557 |
|
|
(((br_cond_e[1] & br_cond_e[0]) ^
|
558 |
|
|
~br_cond_e[2]) & ~cc_mvbr_e);
|
559 |
|
|
|
560 |
|
|
// assign r_eval0 = ((exu_ifu_regn_e & br_cond_e[1]) ^
|
561 |
|
|
// br_cond_e[2]) & ~cc_mvbr_e;
|
562 |
|
|
assign r_eval0 = exu_ifu_regn_e ? ((br_cond_e[1] ^ br_cond_e[2]) &
|
563 |
|
|
~cc_mvbr_e) :
|
564 |
|
|
(br_cond_e[2] & ~cc_mvbr_e);
|
565 |
|
|
|
566 |
|
|
dff_s #(1) regcc_ff(.din (cc_mvbr_d),
|
567 |
|
|
.clk (clk),
|
568 |
|
|
.q (cc_mvbr_e),
|
569 |
|
|
.se (se), .si(), .so());
|
570 |
|
|
|
571 |
|
|
// Evaluate Final Branch condition
|
572 |
|
|
// 3:1 mux
|
573 |
|
|
// assign cond_brtaken_e = cc_mvbr_e ? ccbr_taken_e :
|
574 |
|
|
// exu_ifu_regz_e ? r_eval1 :
|
575 |
|
|
// r_eval0;
|
576 |
|
|
// 2:1 mux
|
577 |
|
|
// assign cond_brtaken_e = exu_ifu_regz_e ? (r_eval1 | ccbr_taken_e) :
|
578 |
|
|
// (r_eval0 | ccbr_taken_e);
|
579 |
|
|
|
580 |
|
|
//////// Chandra ////////
|
581 |
|
|
|
582 |
|
|
wire temp0, temp1, cond_brtaken_e_l;
|
583 |
|
|
|
584 |
|
|
// limit loading on this signal
|
585 |
|
|
// wire regz_buf_e;
|
586 |
|
|
// bw_u1_buf_5x UZfix_regz_bf(.a (exu_ifu_regz_e),
|
587 |
|
|
// .z (regz_buf_e));
|
588 |
|
|
|
589 |
|
|
assign temp0 = (r_eval0 | ccbr_taken_e);
|
590 |
|
|
assign temp1 = (r_eval1 | ccbr_taken_e);
|
591 |
|
|
|
592 |
|
|
bw_u1_muxi21_6x UZsize_cbtmux(.z(cond_brtaken_e_l),
|
593 |
|
|
.d0(temp0),
|
594 |
|
|
.d1(temp1),
|
595 |
|
|
.s(fcl_dcl_regz_e));
|
596 |
|
|
|
597 |
|
|
bw_u1_inv_20x UZsize_cbtinv(.z(cond_brtaken_e),
|
598 |
|
|
.a(cond_brtaken_e_l));
|
599 |
|
|
|
600 |
|
|
////////////////////////
|
601 |
|
|
|
602 |
|
|
assign dcl_fcl_bcregz0_e = (temp0 & dbr_inst_e | ibr_inst_e |
|
603 |
|
|
call_inst_e) & ~dtu_inst_anull_e;
|
604 |
|
|
assign dcl_fcl_bcregz1_e = (temp1 & dbr_inst_e | ibr_inst_e |
|
605 |
|
|
call_inst_e) & ~dtu_inst_anull_e;
|
606 |
|
|
|
607 |
|
|
// assign ifu_exu_dontmove_e = mov_inst_e & ~cond_brtaken_e;
|
608 |
|
|
assign ifu_exu_dontmv_regz0_e = ~temp0 & mov_inst_e;
|
609 |
|
|
assign ifu_exu_dontmv_regz1_e = ~temp1 & mov_inst_e;
|
610 |
|
|
|
611 |
|
|
// branch condition to FPU
|
612 |
|
|
dff_s #(1) fpcond_ff(.din (cond_brtaken_e),
|
613 |
|
|
.q (ifu_ffu_mvcnd_m),
|
614 |
|
|
.clk (clk),
|
615 |
|
|
.se (se), .si(), .so());
|
616 |
|
|
|
617 |
|
|
// branch / move completion and anull signals
|
618 |
|
|
// assign dtu_fcl_brtaken_e = ~dtu_inst_anull_e &
|
619 |
|
|
// (ibr_inst_e | call_inst_e |
|
620 |
|
|
// dbr_inst_e & cond_brtaken_e);
|
621 |
|
|
|
622 |
|
|
// if mov didn't succeed kill write back and bypass
|
623 |
|
|
// need to check thread as well
|
624 |
|
|
// assign ifu_exu_kill_e = dtu_inst_anull_e |
|
625 |
|
|
// ~fcl_dtu_inst_vld_e; // don't need this anymore
|
626 |
|
|
assign ifu_exu_kill_e = dtu_inst_anull_e;
|
627 |
|
|
|
628 |
|
|
|
629 |
|
|
// signal trap if tcc succeeds
|
630 |
|
|
assign ifu_exu_tcc_e = ~dtu_inst_anull_e & tcc_inst_e & ccbr_taken_e &
|
631 |
|
|
fcl_dtu_inst_vld_e;
|
632 |
|
|
|
633 |
|
|
assign tcc_done_e = ~dtu_inst_anull_e & tcc_inst_e & ~ccbr_taken_e &
|
634 |
|
|
fcl_dtu_inst_vld_e;
|
635 |
|
|
|
636 |
|
|
dff_s #(1) tccm_ff(.din (tcc_done_e),
|
637 |
|
|
.q (dcl_swl_tcc_done_m),
|
638 |
|
|
.clk (clk),
|
639 |
|
|
.se (se), .si(), .so());
|
640 |
|
|
|
641 |
|
|
// logic to anull delay slot, if this branch itsel is not anulled
|
642 |
|
|
assign anull_cbr = abit_e & dbr_inst_e & ~br_always_e & ~call_inst_e;
|
643 |
|
|
assign anull_ubr = abit_e & dbr_inst_e & br_always_e & ~call_inst_e;
|
644 |
|
|
|
645 |
|
|
assign anull_all = anull_ubr | anull_cbr & ~cond_brtaken_e;
|
646 |
|
|
|
647 |
|
|
// check which thread to anull
|
648 |
|
|
assign thr_vld_e = thr_e & {4{fcl_dtu_inst_vld_e}};
|
649 |
|
|
|
650 |
|
|
assign all_flush_w = tlu_ifu_flush_pipe_w | ifu_tlu_flush_w;
|
651 |
|
|
dff_s #(1) flshw2_ff(.din (all_flush_w),
|
652 |
|
|
.q (all_flush_w2),
|
653 |
|
|
.clk (clk), .se(se), .si(), .so());
|
654 |
|
|
|
655 |
|
|
assign flush_abit = swl_dcl_thr_w2 & {4{all_flush_w2}};
|
656 |
|
|
|
657 |
|
|
assign anull_next_e = ((~anull_e & {4{anull_all}} & thr_vld_e) |
|
658 |
|
|
(anull_e & ~(thr_e & {4{fcl_dtu_inst_vld_e |
|
659 |
|
|
fcl_dtu_intr_vld_e}}))) &
|
660 |
|
|
~flush_abit;
|
661 |
|
|
|
662 |
|
|
// anull_e needs to be per thread
|
663 |
|
|
dffr_s #(4) anull_ff(.din (anull_next_e),
|
664 |
|
|
.clk (clk),
|
665 |
|
|
.rst (dtu_reset),
|
666 |
|
|
.q (anull_e),
|
667 |
|
|
.se (se), .si(), .so());
|
668 |
|
|
|
669 |
|
|
//
|
670 |
|
|
// assign thr_dec_e[0] = swl_dcl_thr_e[0] | rst_tri_enable;
|
671 |
|
|
// assign thr_dec_e[3:1] = swl_dcl_thr_e[3:1] & {3{~rst_tri_enable}};
|
672 |
|
|
|
673 |
|
|
assign thr_anull_d = swl_dcl_thr_d & anull_next_e;
|
674 |
|
|
assign inst_anull_d = (|thr_anull_d[3:0]);
|
675 |
|
|
dff_s #(1) ina_ff(.din (inst_anull_d),
|
676 |
|
|
.q (inst_anull_e),
|
677 |
|
|
.clk (clk), .se (se), .si(), .so());
|
678 |
|
|
|
679 |
|
|
assign dtu_inst_anull_e = inst_anull_e;
|
680 |
|
|
|
681 |
|
|
// mux4ds dcla_mux(.dout (this_inst_anull_e),
|
682 |
|
|
// .in0 (anull_e[0]),
|
683 |
|
|
// .in1 (anull_e[1]),
|
684 |
|
|
// .in2 (anull_e[2]),
|
685 |
|
|
// .in3 (anull_e[3]),
|
686 |
|
|
// .sel0 (thr_dec_e[0]),
|
687 |
|
|
// .sel1 (thr_dec_e[1]),
|
688 |
|
|
// .sel2 (thr_dec_e[2]),
|
689 |
|
|
// .sel3 (thr_dec_e[3]));
|
690 |
|
|
// assign dtu_inst_anull_e = this_inst_anull_e & fcl_dtu_inst_vld_e;
|
691 |
|
|
|
692 |
|
|
|
693 |
|
|
//--------------------
|
694 |
|
|
// Copy of FCC
|
695 |
|
|
//--------------------
|
696 |
|
|
// FCC's are maintained in the ffu. A copy is kept here to run the
|
697 |
|
|
// FP branch instructions.
|
698 |
|
|
|
699 |
|
|
// load FCC from FFU
|
700 |
|
|
mux2ds #(8) t0_fcc_mux(.dout (t0_fcc_nxt[7:0]),
|
701 |
|
|
.in0 (t0_fcc_d[7:0]),
|
702 |
|
|
.in1 (ffu_ifu_cc_w2[7:0]),
|
703 |
|
|
.sel0 (~ffu_ifu_cc_vld_w2[0]),
|
704 |
|
|
.sel1 (ffu_ifu_cc_vld_w2[0]));
|
705 |
|
|
|
706 |
|
|
dffr_s #(8) t0_fcc_reg(.din (t0_fcc_nxt[7:0]),
|
707 |
|
|
.q (t0_fcc_d[7:0]),
|
708 |
|
|
.rst (dtu_reset),
|
709 |
|
|
.clk (clk), .se (se), .si(), .so());
|
710 |
|
|
`ifdef FPGA_SYN_1THREAD
|
711 |
|
|
assign fcc_d[7:0] = t0_fcc_d[7:0];
|
712 |
|
|
`else
|
713 |
|
|
|
714 |
|
|
mux2ds #(8) t1_fcc_mux(.dout (t1_fcc_nxt[7:0]),
|
715 |
|
|
.in0 (t1_fcc_d[7:0]),
|
716 |
|
|
.in1 (ffu_ifu_cc_w2[7:0]),
|
717 |
|
|
.sel0 (~ffu_ifu_cc_vld_w2[1]),
|
718 |
|
|
.sel1 (ffu_ifu_cc_vld_w2[1]));
|
719 |
|
|
|
720 |
|
|
mux2ds #(8) t2_fcc_mux(.dout (t2_fcc_nxt[7:0]),
|
721 |
|
|
.in0 (t2_fcc_d[7:0]),
|
722 |
|
|
.in1 (ffu_ifu_cc_w2[7:0]),
|
723 |
|
|
.sel0 (~ffu_ifu_cc_vld_w2[2]),
|
724 |
|
|
.sel1 (ffu_ifu_cc_vld_w2[2]));
|
725 |
|
|
|
726 |
|
|
mux2ds #(8) t3_fcc_mux(.dout (t3_fcc_nxt[7:0]),
|
727 |
|
|
.in0 (t3_fcc_d[7:0]),
|
728 |
|
|
.in1 (ffu_ifu_cc_w2[7:0]),
|
729 |
|
|
.sel0 (~ffu_ifu_cc_vld_w2[3]),
|
730 |
|
|
.sel1 (ffu_ifu_cc_vld_w2[3]));
|
731 |
|
|
|
732 |
|
|
// thread0 fcc registers
|
733 |
|
|
|
734 |
|
|
dffr_s #(8) t1_fcc_reg(.din (t1_fcc_nxt[7:0]),
|
735 |
|
|
.q (t1_fcc_d[7:0]),
|
736 |
|
|
.rst (dtu_reset),
|
737 |
|
|
.clk (clk), .se (se), .si(), .so());
|
738 |
|
|
dffr_s #(8) t2_fcc_reg(.din (t2_fcc_nxt[7:0]),
|
739 |
|
|
.q (t2_fcc_d[7:0]),
|
740 |
|
|
.rst (dtu_reset),
|
741 |
|
|
.clk (clk), .se (se), .si(), .so());
|
742 |
|
|
dffr_s #(8) t3_fcc_reg(.din (t3_fcc_nxt[7:0]),
|
743 |
|
|
.q (t3_fcc_d[7:0]),
|
744 |
|
|
.rst (dtu_reset),
|
745 |
|
|
.clk (clk), .se (se), .si(), .so());
|
746 |
|
|
|
747 |
|
|
// choose thread
|
748 |
|
|
assign thr_dec_d[0] = swl_dcl_thr_d[0];
|
749 |
|
|
assign thr_dec_d[3:1] = swl_dcl_thr_d[3:1];
|
750 |
|
|
|
751 |
|
|
mux4ds #(8) fcc0d_mx(.dout (fcc_d[7:0]),
|
752 |
|
|
.in0 (t0_fcc_d[7:0]),
|
753 |
|
|
.in1 (t1_fcc_d[7:0]),
|
754 |
|
|
.in2 (t2_fcc_d[7:0]),
|
755 |
|
|
.in3 (t3_fcc_d[7:0]),
|
756 |
|
|
.sel0 (thr_dec_d[0]),
|
757 |
|
|
.sel1 (thr_dec_d[1]),
|
758 |
|
|
.sel2 (thr_dec_d[2]),
|
759 |
|
|
.sel3 (thr_dec_d[3]));
|
760 |
|
|
|
761 |
|
|
`endif // !`ifdef FPGA_SYN_1THREAD
|
762 |
|
|
|
763 |
|
|
endmodule // sparc_ifu_dcl
|
764 |
|
|
|