1 |
2 |
dmitryr |
// ========== Copyright Header Begin ==========================================
|
2 |
|
|
//
|
3 |
|
|
// OpenSPARC T1 Processor File: sparc_ifu_fdp.v
|
4 |
|
|
// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
|
5 |
|
|
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
|
6 |
|
|
//
|
7 |
|
|
// The above named program is free software; you can redistribute it and/or
|
8 |
|
|
// modify it under the terms of the GNU General Public
|
9 |
|
|
// License version 2 as published by the Free Software Foundation.
|
10 |
|
|
//
|
11 |
|
|
// The above named program is distributed in the hope that it will be
|
12 |
|
|
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
|
|
// General Public License for more details.
|
15 |
|
|
//
|
16 |
|
|
// You should have received a copy of the GNU General Public
|
17 |
|
|
// License along with this work; if not, write to the Free Software
|
18 |
|
|
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
19 |
|
|
//
|
20 |
|
|
// ========== Copyright Header End ============================================
|
21 |
|
|
////////////////////////////////////////////////////////////////////////
|
22 |
|
|
/*
|
23 |
|
|
// Module Name: sparc_ifu_fdp
|
24 |
|
|
// Description:
|
25 |
|
|
// The fdp contains the pc's for all four threads and the PC and
|
26 |
|
|
// nPC for all pipestages register. The fetcher also contains two
|
27 |
|
|
// adders for doing PC + br_offset and PC + 4.
|
28 |
|
|
// The fdp also holds the last fetched icache data for each thread
|
29 |
|
|
// and the next instruction register, which has the top half of the
|
30 |
|
|
// double instruction bundle which is fetched from the icache.
|
31 |
|
|
*/
|
32 |
|
|
////////////////////////////////////////////////////////////////////////
|
33 |
|
|
// Local header file includes / local defines
|
34 |
|
|
////////////////////////////////////////////////////////////////////////
|
35 |
|
|
`include "ifu.h"
|
36 |
|
|
|
37 |
|
|
`define NOP 32'h01000000
|
38 |
|
|
`define PO_RESET_PC 48'hfffff0000020
|
39 |
|
|
`define VER_MANUF 16'h003e
|
40 |
|
|
`define VER_IMPL 16'h0023
|
41 |
|
|
`define VER_MAXGL 8'h03
|
42 |
|
|
`define VER_MAXWIN 8'h07
|
43 |
|
|
`define VER_MAXTL 8'h06
|
44 |
|
|
|
45 |
|
|
//`define VER_MAXTL {5'b0, fcl_fdp_hprivmode_e, 2'b10}
|
46 |
|
|
//`define VER_IMPL_MASK 24'h002301
|
47 |
|
|
|
48 |
|
|
//`define VERSION_REG_HPV {`VER_MANUF, `VER_IMPL_MASK, `VER_MAXGL, 5'b0, fcl_fdp_hprivmode_e, 2'b10, `VER_MAXWIN}
|
49 |
|
|
|
50 |
|
|
//`define VERSION_REG {`VER_MANUF, `VER_IMPL_MASK, `VER_MAXGL, 8'h06, `VER_MAXWIN}
|
51 |
|
|
|
52 |
|
|
//FPGA_SYN enables all FPGA related modifications
|
53 |
|
|
`ifdef FPGA_SYN
|
54 |
|
|
`define FPGA_SYN_CLK_EN
|
55 |
|
|
`define FPGA_SYN_CLK_DFF
|
56 |
|
|
`endif
|
57 |
|
|
|
58 |
|
|
module sparc_ifu_fdp(/*AUTOARG*/
|
59 |
|
|
// Outputs
|
60 |
|
|
so, fdp_itlb_ctxt_bf, fdp_icd_vaddr_bf, fdp_icv_index_bf,
|
61 |
|
|
fdp_erb_pc_f, fdp_dtu_inst_s, ifu_exu_pc_d, ifu_exu_rs1_s,
|
62 |
|
|
ifu_exu_rs2_s, ifu_exu_rs3_s, ifu_tlu_pc_m, ifu_tlu_npc_m,
|
63 |
|
|
ifu_tlu_pc_oor_e, ifu_exu_pcver_e, fdp_fcl_swc_s2,
|
64 |
|
|
fdp_fcl_pc_oor_vec_f, fdp_fcl_pc_oor_e, fdp_fcl_op_s,
|
65 |
|
|
fdp_fcl_op3_s, fdp_fcl_ibit_s,
|
66 |
|
|
// Inputs
|
67 |
|
|
rclk, se, si, const_maskid, lsu_t0_pctxt_state,
|
68 |
|
|
lsu_t1_pctxt_state, lsu_t2_pctxt_state, lsu_t3_pctxt_state,
|
69 |
|
|
exu_ifu_brpc_e, tlu_ifu_trappc_w2, tlu_ifu_trapnpc_w2,
|
70 |
|
|
tlu_itlb_dmp_nctxt_g, tlu_itlb_dmp_actxt_g, tlu_itlb_tte_tag_w2,
|
71 |
|
|
dtu_fdp_thrconf_e, icd_fdp_fetdata_s1, icd_fdp_topdata_s1,
|
72 |
|
|
ifq_fdp_fill_inst, fcl_fdp_oddwin_s, fcl_fdp_pcoor_vec_f,
|
73 |
|
|
fcl_fdp_pcoor_f, fcl_fdp_mask32b_f, fcl_fdp_addr_mask_d,
|
74 |
|
|
fcl_fdp_tctxt_sel_prim, fcl_fdp_usenir_sel_nir_s1,
|
75 |
|
|
fcl_fdp_rbinst_sel_inste_s, fcl_fdp_thrtnpc_sel_tnpc_l,
|
76 |
|
|
fcl_fdp_thrtnpc_sel_npcw_l, fcl_fdp_thrtnpc_sel_pcf_l,
|
77 |
|
|
fcl_fdp_thrtnpc_sel_old_l, fcl_fdp_thr_s1_l,
|
78 |
|
|
fcl_fdp_next_thr_bf_l, fcl_fdp_next_ctxt_bf_l, fcl_fdp_thr_s2_l,
|
79 |
|
|
fcl_fdp_nirthr_s1_l, fcl_fdp_tpcbf_sel_pcp4_bf_l,
|
80 |
|
|
fcl_fdp_tpcbf_sel_brpc_bf_l, fcl_fdp_tpcbf_sel_trap_bf_l,
|
81 |
|
|
fcl_fdp_tpcbf_sel_old_bf_l, fcl_fdp_pcbf_sel_swpc_bf_l,
|
82 |
|
|
fcl_fdp_pcbf_sel_nosw_bf_l, fcl_fdp_pcbf_sel_br_bf_l,
|
83 |
|
|
fcl_fdp_trrbpc_sel_trap_bf_l, fcl_fdp_trrbpc_sel_rb_bf_l,
|
84 |
|
|
fcl_fdp_trrbpc_sel_err_bf_l, fcl_fdp_trrbpc_sel_pcs_bf_l,
|
85 |
|
|
fcl_fdp_noswpc_sel_tnpc_l_bf, fcl_fdp_noswpc_sel_old_l_bf,
|
86 |
|
|
fcl_fdp_noswpc_sel_inc_l_bf, fcl_fdp_nextpcs_sel_pce_f_l,
|
87 |
|
|
fcl_fdp_nextpcs_sel_pcd_f_l, fcl_fdp_nextpcs_sel_pcs_f_l,
|
88 |
|
|
fcl_fdp_nextpcs_sel_pcf_f_l, fcl_fdp_rdsr_sel_pc_e_l,
|
89 |
|
|
fcl_fdp_rdsr_sel_ver_e_l, fcl_fdp_rdsr_sel_thr_e_l,
|
90 |
|
|
fcl_fdp_inst_sel_curr_s_l, fcl_fdp_inst_sel_switch_s_l,
|
91 |
|
|
fcl_fdp_inst_sel_nir_s_l, fcl_fdp_inst_sel_nop_s_l,
|
92 |
|
|
fcl_fdp_tinst_sel_curr_s_l, fcl_fdp_tinst_sel_rb_s_l,
|
93 |
|
|
fcl_fdp_tinst_sel_old_s_l, fcl_fdp_tinst_sel_ifq_s_l,
|
94 |
|
|
fcl_fdp_dmpthr_l, fcl_fdp_ctxt_sel_dmp_bf_l,
|
95 |
|
|
fcl_fdp_ctxt_sel_sw_bf_l, fcl_fdp_ctxt_sel_curr_bf_l
|
96 |
|
|
);
|
97 |
|
|
|
98 |
|
|
input rclk,
|
99 |
|
|
se,
|
100 |
|
|
si;
|
101 |
|
|
|
102 |
|
|
input [7:0] const_maskid;
|
103 |
|
|
|
104 |
|
|
input [12:0] lsu_t0_pctxt_state, // primary context
|
105 |
|
|
lsu_t1_pctxt_state,
|
106 |
|
|
lsu_t2_pctxt_state,
|
107 |
|
|
lsu_t3_pctxt_state;
|
108 |
|
|
|
109 |
|
|
// input exu_ifu_va_oor_e;
|
110 |
|
|
input [47:0] exu_ifu_brpc_e; // br address for dir branch
|
111 |
|
|
|
112 |
|
|
input [48:0] tlu_ifu_trappc_w2, // trap/exception PC
|
113 |
|
|
tlu_ifu_trapnpc_w2; // next trap PC
|
114 |
|
|
|
115 |
|
|
input tlu_itlb_dmp_nctxt_g,
|
116 |
|
|
tlu_itlb_dmp_actxt_g;
|
117 |
|
|
|
118 |
|
|
input [12:0] tlu_itlb_tte_tag_w2;
|
119 |
|
|
|
120 |
|
|
// input [`IC_IDX_HI:4] ifq_fdp_icindex_bf; // index + 1 bit for 16B write
|
121 |
|
|
|
122 |
|
|
input [40:0] dtu_fdp_thrconf_e;
|
123 |
|
|
|
124 |
|
|
input [32:0] icd_fdp_fetdata_s1, // 4 inst + 4 sw bits
|
125 |
|
|
icd_fdp_topdata_s1; // next instruction
|
126 |
|
|
|
127 |
|
|
input [32:0] ifq_fdp_fill_inst; // icache miss return
|
128 |
|
|
|
129 |
|
|
input fcl_fdp_oddwin_s;
|
130 |
|
|
input [3:0] fcl_fdp_pcoor_vec_f;
|
131 |
|
|
input fcl_fdp_pcoor_f;
|
132 |
|
|
input fcl_fdp_mask32b_f;
|
133 |
|
|
input fcl_fdp_addr_mask_d;
|
134 |
|
|
input [3:0] fcl_fdp_tctxt_sel_prim;
|
135 |
|
|
|
136 |
|
|
// 2:1 mux selects
|
137 |
|
|
input fcl_fdp_usenir_sel_nir_s1; // same as usenir_d2
|
138 |
|
|
input [3:0] fcl_fdp_rbinst_sel_inste_s; // rollback 1 or 2
|
139 |
|
|
|
140 |
|
|
input [3:0] fcl_fdp_thrtnpc_sel_tnpc_l, // load npc
|
141 |
|
|
fcl_fdp_thrtnpc_sel_npcw_l,
|
142 |
|
|
fcl_fdp_thrtnpc_sel_pcf_l,
|
143 |
|
|
fcl_fdp_thrtnpc_sel_old_l;
|
144 |
|
|
|
145 |
|
|
input [3:0] fcl_fdp_thr_s1_l; // s2 thr (64*5 muxes)
|
146 |
|
|
|
147 |
|
|
// other mux selects
|
148 |
|
|
input [3:0] fcl_fdp_next_thr_bf_l; // for thrpc output mux
|
149 |
|
|
input [3:0] fcl_fdp_next_ctxt_bf_l; // for ctxt output mux
|
150 |
|
|
|
151 |
|
|
input [3:0] fcl_fdp_thr_s2_l; // s2 thr (64*5 muxes)
|
152 |
|
|
input [3:0] fcl_fdp_nirthr_s1_l; // same as thr_s1, but protected
|
153 |
|
|
|
154 |
|
|
input [3:0] fcl_fdp_tpcbf_sel_pcp4_bf_l, // selects for thread PC muxes
|
155 |
|
|
fcl_fdp_tpcbf_sel_brpc_bf_l,
|
156 |
|
|
fcl_fdp_tpcbf_sel_trap_bf_l,
|
157 |
|
|
fcl_fdp_tpcbf_sel_old_bf_l;
|
158 |
|
|
|
159 |
|
|
input fcl_fdp_pcbf_sel_swpc_bf_l,
|
160 |
|
|
fcl_fdp_pcbf_sel_nosw_bf_l,
|
161 |
|
|
fcl_fdp_pcbf_sel_br_bf_l;
|
162 |
|
|
|
163 |
|
|
input [3:0] fcl_fdp_trrbpc_sel_trap_bf_l,
|
164 |
|
|
fcl_fdp_trrbpc_sel_rb_bf_l,
|
165 |
|
|
fcl_fdp_trrbpc_sel_err_bf_l,
|
166 |
|
|
fcl_fdp_trrbpc_sel_pcs_bf_l;
|
167 |
|
|
|
168 |
|
|
input fcl_fdp_noswpc_sel_tnpc_l_bf, // next pc select from trap,
|
169 |
|
|
fcl_fdp_noswpc_sel_old_l_bf,
|
170 |
|
|
fcl_fdp_noswpc_sel_inc_l_bf;
|
171 |
|
|
|
172 |
|
|
input [3:0] fcl_fdp_nextpcs_sel_pce_f_l,
|
173 |
|
|
fcl_fdp_nextpcs_sel_pcd_f_l,
|
174 |
|
|
fcl_fdp_nextpcs_sel_pcs_f_l,
|
175 |
|
|
fcl_fdp_nextpcs_sel_pcf_f_l;
|
176 |
|
|
|
177 |
|
|
input fcl_fdp_rdsr_sel_pc_e_l,
|
178 |
|
|
fcl_fdp_rdsr_sel_ver_e_l,
|
179 |
|
|
fcl_fdp_rdsr_sel_thr_e_l;
|
180 |
|
|
|
181 |
|
|
input fcl_fdp_inst_sel_curr_s_l, // selects for inst_s2
|
182 |
|
|
fcl_fdp_inst_sel_switch_s_l,
|
183 |
|
|
fcl_fdp_inst_sel_nir_s_l,
|
184 |
|
|
fcl_fdp_inst_sel_nop_s_l;
|
185 |
|
|
|
186 |
|
|
input [3:0] fcl_fdp_tinst_sel_curr_s_l, // selects for tinst regs
|
187 |
|
|
fcl_fdp_tinst_sel_rb_s_l,
|
188 |
|
|
fcl_fdp_tinst_sel_old_s_l,
|
189 |
|
|
fcl_fdp_tinst_sel_ifq_s_l;
|
190 |
|
|
|
191 |
|
|
input [3:0] fcl_fdp_dmpthr_l;
|
192 |
|
|
|
193 |
|
|
input fcl_fdp_ctxt_sel_dmp_bf_l,
|
194 |
|
|
fcl_fdp_ctxt_sel_sw_bf_l,
|
195 |
|
|
fcl_fdp_ctxt_sel_curr_bf_l;
|
196 |
|
|
|
197 |
|
|
|
198 |
|
|
output so;
|
199 |
|
|
output [12:0] fdp_itlb_ctxt_bf;
|
200 |
|
|
output [47:2] fdp_icd_vaddr_bf; // 11:2 is index to ic
|
201 |
|
|
output [11:5] fdp_icv_index_bf;
|
202 |
|
|
output [47:0] fdp_erb_pc_f;
|
203 |
|
|
output [31:0] fdp_dtu_inst_s; // 32b inst + switch bit
|
204 |
|
|
|
205 |
|
|
output [47:0] ifu_exu_pc_d; // PC for rel branch
|
206 |
|
|
output [4:0] ifu_exu_rs1_s, // reg file read address
|
207 |
|
|
ifu_exu_rs2_s,
|
208 |
|
|
ifu_exu_rs3_s;
|
209 |
|
|
|
210 |
|
|
output [48:0] ifu_tlu_pc_m,
|
211 |
|
|
ifu_tlu_npc_m;
|
212 |
|
|
|
213 |
|
|
output ifu_tlu_pc_oor_e;
|
214 |
|
|
|
215 |
|
|
output [63:0] ifu_exu_pcver_e; // PCs to different dests.
|
216 |
|
|
|
217 |
|
|
output fdp_fcl_swc_s2; // tells whether to switch or not
|
218 |
|
|
output [3:0] fdp_fcl_pc_oor_vec_f; // PC va hole check
|
219 |
|
|
output fdp_fcl_pc_oor_e;
|
220 |
|
|
|
221 |
|
|
output [1:0] fdp_fcl_op_s;
|
222 |
|
|
output [5:2] fdp_fcl_op3_s;
|
223 |
|
|
output fdp_fcl_ibit_s;
|
224 |
|
|
|
225 |
|
|
|
226 |
|
|
|
227 |
|
|
|
228 |
|
|
//----------------------------------------------------------------------
|
229 |
|
|
// Declarations
|
230 |
|
|
//----------------------------------------------------------------------
|
231 |
|
|
|
232 |
|
|
// local signals
|
233 |
|
|
|
234 |
|
|
// Contexts
|
235 |
|
|
wire [12:0] curr_ctxt,
|
236 |
|
|
sw_ctxt,
|
237 |
|
|
dmp_ctxt,
|
238 |
|
|
dmp_ctxt_unq,
|
239 |
|
|
dmp_ctxt1,
|
240 |
|
|
dmp_ctxt2,
|
241 |
|
|
t0_ctxt_bf,
|
242 |
|
|
t1_ctxt_bf,
|
243 |
|
|
t2_ctxt_bf,
|
244 |
|
|
t3_ctxt_bf;
|
245 |
|
|
|
246 |
|
|
// PCs
|
247 |
|
|
wire [48:0] t0pc_f, t1pc_f, t2pc_f, t3pc_f, // F stage thread PC
|
248 |
|
|
t0pc_s, t1pc_s, t2pc_s, t3pc_s, // S stage thr pc
|
249 |
|
|
t0_next_pcs_f, t1_next_pcs_f, t2_next_pcs_f, t3_next_pcs_f,
|
250 |
|
|
t0npc_bf, t1npc_bf, t2npc_bf, t3npc_bf, // Next PC in
|
251 |
|
|
// BF stage
|
252 |
|
|
pc_s, pc_d, pc_e, pc_m, pc_w,
|
253 |
|
|
npc_s, npc_d, npc_e, npc_m, npc_w,
|
254 |
|
|
pc_d_adj, npc_d_adj;
|
255 |
|
|
|
256 |
|
|
wire [47:0] pc_bf,
|
257 |
|
|
swpc_bf, // PC of next thread if not branch
|
258 |
|
|
pc_f;
|
259 |
|
|
|
260 |
|
|
wire [48:0] nextpc_nosw_bf, // next pc if no switch
|
261 |
|
|
am_mask;
|
262 |
|
|
|
263 |
|
|
// trap PCs and rollback PCs
|
264 |
|
|
wire [48:0] t0_trap_rb_pc_bf,
|
265 |
|
|
t1_trap_rb_pc_bf,
|
266 |
|
|
t2_trap_rb_pc_bf,
|
267 |
|
|
t3_trap_rb_pc_bf;
|
268 |
|
|
|
269 |
|
|
wire [48:0] thr_trappc_bf,
|
270 |
|
|
t0_trapnpc_f,
|
271 |
|
|
t1_trapnpc_f,
|
272 |
|
|
t2_trapnpc_f,
|
273 |
|
|
t3_trapnpc_f,
|
274 |
|
|
trapnpc0_bf,
|
275 |
|
|
trapnpc1_bf,
|
276 |
|
|
trapnpc2_bf,
|
277 |
|
|
trapnpc3_bf;
|
278 |
|
|
|
279 |
|
|
// Branch PCs
|
280 |
|
|
wire [48:0] pcinc_f; // incr output
|
281 |
|
|
|
282 |
|
|
// Instruction Words
|
283 |
|
|
wire [32:0] inst_s2, // instruction to switch to in S
|
284 |
|
|
fdp_inst_s, // instruction to be sent to D
|
285 |
|
|
t0inst_s1, // input to thr inst reg in S
|
286 |
|
|
t1inst_s1,
|
287 |
|
|
t2inst_s1,
|
288 |
|
|
t3inst_s1,
|
289 |
|
|
t0inst_s2, // thr inst reg output
|
290 |
|
|
t1inst_s2,
|
291 |
|
|
t2inst_s2,
|
292 |
|
|
t3inst_s2;
|
293 |
|
|
|
294 |
|
|
wire [32:0] inst_s1; // fetched instruction in S
|
295 |
|
|
wire [32:0] inst_s1_bf1; // buf version of inst_s1
|
296 |
|
|
|
297 |
|
|
wire [32:0] rb_inst0_s, // instruction to rollback to
|
298 |
|
|
rb_inst1_s, // instruction to rollback to
|
299 |
|
|
rb_inst2_s, // instruction to rollback to
|
300 |
|
|
rb_inst3_s, // instruction to rollback to
|
301 |
|
|
inst_d, // rollback 1
|
302 |
|
|
inst_e; // rollback 2
|
303 |
|
|
|
304 |
|
|
// Next instruction word
|
305 |
|
|
wire [32:0] nirdata_s1, // next inst reg contents
|
306 |
|
|
t0nir, // thread NIR reg output
|
307 |
|
|
t1nir,
|
308 |
|
|
t2nir,
|
309 |
|
|
t3nir;
|
310 |
|
|
|
311 |
|
|
wire clk;
|
312 |
|
|
|
313 |
|
|
|
314 |
|
|
//
|
315 |
|
|
// Code start here
|
316 |
|
|
//
|
317 |
|
|
assign clk = rclk;
|
318 |
|
|
|
319 |
|
|
//----------------------------------------------------------------------
|
320 |
|
|
// Context Reg
|
321 |
|
|
//----------------------------------------------------------------------
|
322 |
|
|
assign t0_ctxt_bf = lsu_t0_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[0]}};
|
323 |
|
|
|
324 |
|
|
`ifdef FPGA_SYN_1THREAD
|
325 |
|
|
|
326 |
|
|
assign sw_ctxt = t0_ctxt_bf;
|
327 |
|
|
assign curr_ctxt = t0_ctxt_bf;
|
328 |
|
|
assign dmp_ctxt_unq = lsu_t0_pctxt_state;
|
329 |
|
|
|
330 |
|
|
`else
|
331 |
|
|
|
332 |
|
|
assign t1_ctxt_bf = lsu_t1_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[1]}};
|
333 |
|
|
assign t2_ctxt_bf = lsu_t2_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[2]}};
|
334 |
|
|
assign t3_ctxt_bf = lsu_t3_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[3]}};
|
335 |
|
|
|
336 |
|
|
dp_mux4ds #(13) sw_ctxt_mux(.dout (sw_ctxt),
|
337 |
|
|
.in0 (t0_ctxt_bf),
|
338 |
|
|
.in1 (t1_ctxt_bf),
|
339 |
|
|
.in2 (t2_ctxt_bf),
|
340 |
|
|
.in3 (t3_ctxt_bf),
|
341 |
|
|
.sel0_l (fcl_fdp_next_ctxt_bf_l[0]),
|
342 |
|
|
.sel1_l (fcl_fdp_next_ctxt_bf_l[1]),
|
343 |
|
|
.sel2_l (fcl_fdp_next_ctxt_bf_l[2]),
|
344 |
|
|
.sel3_l (fcl_fdp_next_ctxt_bf_l[3]));
|
345 |
|
|
|
346 |
|
|
dp_mux4ds #(13) curr_ctxt_mux(.dout (curr_ctxt),
|
347 |
|
|
.in0 (t0_ctxt_bf),
|
348 |
|
|
.in1 (t1_ctxt_bf),
|
349 |
|
|
.in2 (t2_ctxt_bf),
|
350 |
|
|
.in3 (t3_ctxt_bf),
|
351 |
|
|
.sel0_l (fcl_fdp_thr_s2_l[0]),
|
352 |
|
|
.sel1_l (fcl_fdp_thr_s2_l[1]),
|
353 |
|
|
.sel2_l (fcl_fdp_thr_s2_l[2]),
|
354 |
|
|
.sel3_l (fcl_fdp_thr_s2_l[3]));
|
355 |
|
|
|
356 |
|
|
dp_mux4ds #(13) dmp_ctxt_mux(.dout (dmp_ctxt_unq),
|
357 |
|
|
.in0 (lsu_t0_pctxt_state),
|
358 |
|
|
.in1 (lsu_t1_pctxt_state),
|
359 |
|
|
.in2 (lsu_t2_pctxt_state),
|
360 |
|
|
.in3 (lsu_t3_pctxt_state),
|
361 |
|
|
.sel0_l (fcl_fdp_dmpthr_l[0]),
|
362 |
|
|
.sel1_l (fcl_fdp_dmpthr_l[1]),
|
363 |
|
|
.sel2_l (fcl_fdp_dmpthr_l[2]),
|
364 |
|
|
.sel3_l (fcl_fdp_dmpthr_l[3]));
|
365 |
|
|
`endif // !`ifdef FPGA_SYN_1THREAD
|
366 |
|
|
|
367 |
|
|
assign dmp_ctxt1 = dmp_ctxt_unq & {13{~(tlu_itlb_dmp_nctxt_g |
|
368 |
|
|
tlu_itlb_dmp_actxt_g)}};
|
369 |
|
|
//`ifdef SPARC_HPV_EN
|
370 |
|
|
assign dmp_ctxt2 = {tlu_itlb_tte_tag_w2[12:7],tlu_itlb_tte_tag_w2[6:0]} &
|
371 |
|
|
{13{tlu_itlb_dmp_actxt_g}};
|
372 |
|
|
//`else
|
373 |
|
|
// assign dmp_ctxt2 = {tlu_itlb_tte_tag_w2[13:8],tlu_itlb_tte_tag_w2[6:0]} &
|
374 |
|
|
// {13{tlu_itlb_dmp_actxt_g}};
|
375 |
|
|
//`endif
|
376 |
|
|
|
377 |
|
|
assign dmp_ctxt = dmp_ctxt1 | dmp_ctxt2;
|
378 |
|
|
|
379 |
|
|
dp_mux3ds #(13) ctxt_mux (.dout (fdp_itlb_ctxt_bf),
|
380 |
|
|
.in0 (curr_ctxt),
|
381 |
|
|
.in1 (sw_ctxt),
|
382 |
|
|
.in2 (dmp_ctxt),
|
383 |
|
|
.sel0_l (fcl_fdp_ctxt_sel_curr_bf_l),
|
384 |
|
|
.sel1_l (fcl_fdp_ctxt_sel_sw_bf_l),
|
385 |
|
|
.sel2_l (fcl_fdp_ctxt_sel_dmp_bf_l));
|
386 |
|
|
|
387 |
|
|
|
388 |
|
|
// ----------------------------------------------------------------------
|
389 |
|
|
// PC datapath
|
390 |
|
|
// ----------------------------------------------------------------------
|
391 |
|
|
|
392 |
|
|
// pc/thr to exu for rdsr instruction
|
393 |
|
|
// this is the only 64 bit cell in the IFU
|
394 |
|
|
dp_mux3ds #(64) ver_mux(.dout (ifu_exu_pcver_e[63:0]),
|
395 |
|
|
.in0 ({{16{pc_e[47]}}, pc_e[47:0]}),
|
396 |
|
|
.in1 ({`VER_MANUF,
|
397 |
|
|
`VER_IMPL,
|
398 |
|
|
const_maskid[7:0],
|
399 |
|
|
`VER_MAXGL,
|
400 |
|
|
`VER_MAXTL,
|
401 |
|
|
`VER_MAXWIN}),
|
402 |
|
|
.in2 ({12'b0,
|
403 |
|
|
dtu_fdp_thrconf_e[40:29],
|
404 |
|
|
4'b0,
|
405 |
|
|
dtu_fdp_thrconf_e[28:9],
|
406 |
|
|
2'b0,
|
407 |
|
|
dtu_fdp_thrconf_e[8:3],
|
408 |
|
|
5'b0,
|
409 |
|
|
dtu_fdp_thrconf_e[2:0]}),
|
410 |
|
|
.sel0_l (fcl_fdp_rdsr_sel_pc_e_l),
|
411 |
|
|
.sel1_l (fcl_fdp_rdsr_sel_ver_e_l),
|
412 |
|
|
.sel2_l (fcl_fdp_rdsr_sel_thr_e_l));
|
413 |
|
|
|
414 |
|
|
// Select the next thread pc (for F stage)
|
415 |
|
|
dp_mux4ds #(49) t0_pcbf_mux(.dout (t0npc_bf),
|
416 |
|
|
.in0 ({fcl_fdp_pcoor_vec_f[0], t0pc_f[47:0]}),
|
417 |
|
|
.in1 (nextpc_nosw_bf),
|
418 |
|
|
.in2 (t0_trap_rb_pc_bf),
|
419 |
|
|
.in3 ({1'b0, exu_ifu_brpc_e}),
|
420 |
|
|
.sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[0]),
|
421 |
|
|
.sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[0]),
|
422 |
|
|
.sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[0]),
|
423 |
|
|
.sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[0]));
|
424 |
|
|
|
425 |
|
|
`ifdef FPGA_SYN_1THREAD
|
426 |
|
|
`else
|
427 |
|
|
dp_mux4ds #(49) t1_pcbf_mux(.dout (t1npc_bf),
|
428 |
|
|
.in0 ({fcl_fdp_pcoor_vec_f[1], t1pc_f[47:0]}),
|
429 |
|
|
.in1 (nextpc_nosw_bf),
|
430 |
|
|
.in2 (t1_trap_rb_pc_bf),
|
431 |
|
|
.in3 ({1'b0, exu_ifu_brpc_e}),
|
432 |
|
|
.sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[1]),
|
433 |
|
|
.sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[1]),
|
434 |
|
|
.sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[1]),
|
435 |
|
|
.sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[1]));
|
436 |
|
|
|
437 |
|
|
dp_mux4ds #(49) t2_pcbf_mux(.dout (t2npc_bf),
|
438 |
|
|
.in0 ({fcl_fdp_pcoor_vec_f[2], t2pc_f[47:0]}),
|
439 |
|
|
.in1 (nextpc_nosw_bf),
|
440 |
|
|
.in2 (t2_trap_rb_pc_bf),
|
441 |
|
|
.in3 ({1'b0, exu_ifu_brpc_e}),
|
442 |
|
|
.sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[2]),
|
443 |
|
|
.sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[2]),
|
444 |
|
|
.sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[2]),
|
445 |
|
|
.sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[2]));
|
446 |
|
|
|
447 |
|
|
dp_mux4ds #(49) t3_pcbf_mux(.dout (t3npc_bf),
|
448 |
|
|
.in0 ({fcl_fdp_pcoor_vec_f[3], t3pc_f[47:0]}),
|
449 |
|
|
.in1 (nextpc_nosw_bf),
|
450 |
|
|
.in2 (t3_trap_rb_pc_bf),
|
451 |
|
|
.in3 ({1'b0, exu_ifu_brpc_e}),
|
452 |
|
|
.sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[3]),
|
453 |
|
|
.sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[3]),
|
454 |
|
|
.sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[3]),
|
455 |
|
|
.sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[3]));
|
456 |
|
|
`endif
|
457 |
|
|
|
458 |
|
|
// F stage thread PC regs; use low power thr flop
|
459 |
|
|
dff_s #(49) t0_pcf_reg(.din (t0npc_bf),
|
460 |
|
|
.clk (clk),
|
461 |
|
|
.q (t0pc_f),
|
462 |
|
|
.se (se), .si(), .so());
|
463 |
|
|
`ifdef FPGA_SYN_1THREAD
|
464 |
|
|
assign fdp_fcl_pc_oor_vec_f = {3'b0, t0pc_f[48]};
|
465 |
|
|
assign swpc_bf = t0pc_f[47:0];
|
466 |
|
|
`else
|
467 |
|
|
dff_s #(49) t1_pcf_reg(.din (t1npc_bf),
|
468 |
|
|
.clk (clk),
|
469 |
|
|
.q (t1pc_f),
|
470 |
|
|
.se (se), .si(), .so());
|
471 |
|
|
dff_s #(49) t2_pcf_reg(.din (t2npc_bf),
|
472 |
|
|
.clk (clk),
|
473 |
|
|
.q (t2pc_f),
|
474 |
|
|
.se (se), .si(), .so());
|
475 |
|
|
dff_s #(49) t3_pcf_reg(.din (t3npc_bf),
|
476 |
|
|
.clk (clk),
|
477 |
|
|
.q (t3pc_f),
|
478 |
|
|
.se (se), .si(), .so());
|
479 |
|
|
|
480 |
|
|
assign fdp_fcl_pc_oor_vec_f = {t3pc_f[48], t2pc_f[48],
|
481 |
|
|
t1pc_f[48], t0pc_f[48]};
|
482 |
|
|
|
483 |
|
|
|
484 |
|
|
// select the pc to be used on a switch -- need to protect
|
485 |
|
|
dp_mux4ds #(48) swpc_mux(.dout (swpc_bf),
|
486 |
|
|
.in0 (t0pc_f[47:0]),
|
487 |
|
|
.in1 (t1pc_f[47:0]),
|
488 |
|
|
.in2 (t2pc_f[47:0]),
|
489 |
|
|
.in3 (t3pc_f[47:0]),
|
490 |
|
|
.sel0_l (fcl_fdp_next_thr_bf_l[0]),
|
491 |
|
|
.sel1_l (fcl_fdp_next_thr_bf_l[1]),
|
492 |
|
|
.sel2_l (fcl_fdp_next_thr_bf_l[2]),
|
493 |
|
|
.sel3_l (fcl_fdp_next_thr_bf_l[3]));
|
494 |
|
|
`endif
|
495 |
|
|
|
496 |
|
|
// choose between I$ write address and read address
|
497 |
|
|
// need mux only for lower 11 bits (2+3 + ICINDEX_SIZE)
|
498 |
|
|
// dp_mux2es #(48) ifqfdp_mux(.dout (icaddr_nosw_bf[47:0]),
|
499 |
|
|
// .in0 (nextpc_nosw_bf[47:0]),
|
500 |
|
|
// .in1 ({{37{1'b0}}, ifq_fdp_icindex_bf, 4'b0}),
|
501 |
|
|
// .sel (fcl_fdp_ifqfdp_sel_ifq_bf)); // 1=ifq
|
502 |
|
|
|
503 |
|
|
// implements switch and branch
|
504 |
|
|
// can we cut this down to 11 bits? No! tlb needs all 48
|
505 |
|
|
|
506 |
|
|
// dp_mux4ds #(48) nxt_icaddr_mux(.dout (icaddr_bf),
|
507 |
|
|
// .in0 (swpc_bf[47:0]),
|
508 |
|
|
// .in1 (nextpc_nosw_bf[47:0]),
|
509 |
|
|
// .in2 ({8'b0, {`IC_TAG_SZ{1'b0}},
|
510 |
|
|
// ifq_fdp_icindex_bf, 4'b0}),
|
511 |
|
|
// .in3 (exu_ifu_brpc_e[47:0]),
|
512 |
|
|
// .sel0_l (fcl_fdp_icaddr_sel_swpc_bf_l),
|
513 |
|
|
// .sel1_l (fcl_fdp_icaddr_sel_curr_bf_l),
|
514 |
|
|
// .sel2_l (fcl_fdp_icaddr_sel_ifq_bf_l),
|
515 |
|
|
// .sel3_l (fcl_fdp_icaddr_sel_br_bf_l));
|
516 |
|
|
|
517 |
|
|
// assign fdp_icd_vaddr_bf = icaddr_bf[47:0];
|
518 |
|
|
// this goes to the itlb, icd and ict on top of fdp
|
519 |
|
|
// this is !!very critical!!
|
520 |
|
|
assign fdp_icd_vaddr_bf = pc_bf[47:2];
|
521 |
|
|
|
522 |
|
|
// create separate output for the icv to the left
|
523 |
|
|
assign fdp_icv_index_bf = pc_bf[11:5];
|
524 |
|
|
|
525 |
|
|
// Place this mux as close to the top (itlb) as possible
|
526 |
|
|
dp_mux3ds #(48) pcbf_mux(.dout (pc_bf[47:0]),
|
527 |
|
|
.in0 (swpc_bf[47:0]),
|
528 |
|
|
.in1 (nextpc_nosw_bf[47:0]),
|
529 |
|
|
.in2 (exu_ifu_brpc_e[47:0]),
|
530 |
|
|
.sel0_l (fcl_fdp_pcbf_sel_swpc_bf_l),
|
531 |
|
|
.sel1_l (fcl_fdp_pcbf_sel_nosw_bf_l),
|
532 |
|
|
.sel2_l (fcl_fdp_pcbf_sel_br_bf_l));
|
533 |
|
|
|
534 |
|
|
dff_s #(48) pcf_reg(.din (pc_bf),
|
535 |
|
|
.clk (clk),
|
536 |
|
|
.q (pc_f),
|
537 |
|
|
.se (se), .si(), .so());
|
538 |
|
|
|
539 |
|
|
assign fdp_erb_pc_f = pc_f[47:0];
|
540 |
|
|
|
541 |
|
|
// trappc mux (choose trap pc vs rollback/uTrap pc)
|
542 |
|
|
dp_mux4ds #(49) trap_pc0_mux(.dout (t0_trap_rb_pc_bf),
|
543 |
|
|
.in0 (tlu_ifu_trappc_w2),
|
544 |
|
|
.in1 (pc_d_adj),
|
545 |
|
|
.in2 (t0pc_s),
|
546 |
|
|
.in3 (pc_w),
|
547 |
|
|
.sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[0]),
|
548 |
|
|
.sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[0]),
|
549 |
|
|
.sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[0]),
|
550 |
|
|
.sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[0]));
|
551 |
|
|
|
552 |
|
|
`ifdef FPGA_SYN_1THREAD
|
553 |
|
|
`else
|
554 |
|
|
dp_mux4ds #(49) trap_pc1_mux(.dout (t1_trap_rb_pc_bf),
|
555 |
|
|
.in0 (tlu_ifu_trappc_w2),
|
556 |
|
|
.in1 (pc_d_adj),
|
557 |
|
|
.in2 (t1pc_s),
|
558 |
|
|
.in3 (pc_w),
|
559 |
|
|
.sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[1]),
|
560 |
|
|
.sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[1]),
|
561 |
|
|
.sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[1]),
|
562 |
|
|
.sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[1]));
|
563 |
|
|
|
564 |
|
|
dp_mux4ds #(49) trap_pc2_mux(.dout (t2_trap_rb_pc_bf),
|
565 |
|
|
.in0 (tlu_ifu_trappc_w2),
|
566 |
|
|
.in1 (pc_d_adj),
|
567 |
|
|
.in2 (t2pc_s),
|
568 |
|
|
.in3 (pc_w),
|
569 |
|
|
.sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[2]),
|
570 |
|
|
.sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[2]),
|
571 |
|
|
.sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[2]),
|
572 |
|
|
.sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[2]));
|
573 |
|
|
|
574 |
|
|
dp_mux4ds #(49) trap_pc3_mux(.dout (t3_trap_rb_pc_bf),
|
575 |
|
|
.in0 (tlu_ifu_trappc_w2),
|
576 |
|
|
.in1 (pc_d_adj),
|
577 |
|
|
.in2 (t3pc_s),
|
578 |
|
|
.in3 (pc_w),
|
579 |
|
|
.sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[3]),
|
580 |
|
|
.sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[3]),
|
581 |
|
|
.sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[3]),
|
582 |
|
|
.sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[3]));
|
583 |
|
|
`endif
|
584 |
|
|
|
585 |
|
|
|
586 |
|
|
// can reduce this to a 2:1 mux since reset pc is not used any more and
|
587 |
|
|
// pc_f is not needed.
|
588 |
|
|
dp_mux3ds #(49) pcp4_mux(.dout (nextpc_nosw_bf),
|
589 |
|
|
.in0 (pcinc_f),
|
590 |
|
|
.in1 (thr_trappc_bf),
|
591 |
|
|
.in2 ({fcl_fdp_pcoor_f, pc_f[47:0]}),
|
592 |
|
|
.sel0_l (fcl_fdp_noswpc_sel_inc_l_bf),
|
593 |
|
|
.sel1_l (fcl_fdp_noswpc_sel_tnpc_l_bf),
|
594 |
|
|
.sel2_l (fcl_fdp_noswpc_sel_old_l_bf));
|
595 |
|
|
|
596 |
|
|
|
597 |
|
|
// next S stage thread pc mux per thread
|
598 |
|
|
// Use advtpcs signal which works for stall (Aug '01)
|
599 |
|
|
// Merged pc_e/pc_d into the eqn to allow for rollback
|
600 |
|
|
dp_mux4ds #(49) t0pcf_mux(.dout (t0_next_pcs_f),
|
601 |
|
|
.in0 (t0pc_s),
|
602 |
|
|
.in1 ({fcl_fdp_pcoor_vec_f[0], t0pc_f[47:0]}),
|
603 |
|
|
.in2 (pc_d_adj),
|
604 |
|
|
.in3 (pc_e),
|
605 |
|
|
.sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[0]),
|
606 |
|
|
.sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[0]),
|
607 |
|
|
.sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[0]),
|
608 |
|
|
.sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[0]));
|
609 |
|
|
|
610 |
|
|
`ifdef FPGA_SYN_1THREAD
|
611 |
|
|
`else
|
612 |
|
|
dp_mux4ds #(49) t1pcf_mux(.dout (t1_next_pcs_f),
|
613 |
|
|
.in0 (t1pc_s),
|
614 |
|
|
.in1 ({fcl_fdp_pcoor_vec_f[1], t1pc_f[47:0]}),
|
615 |
|
|
.in2 (pc_d_adj),
|
616 |
|
|
.in3 (pc_e),
|
617 |
|
|
.sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[1]),
|
618 |
|
|
.sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[1]),
|
619 |
|
|
.sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[1]),
|
620 |
|
|
.sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[1]));
|
621 |
|
|
|
622 |
|
|
dp_mux4ds #(49) t2pcf_mux(.dout (t2_next_pcs_f),
|
623 |
|
|
.in0 (t2pc_s),
|
624 |
|
|
.in1 ({fcl_fdp_pcoor_vec_f[2], t2pc_f[47:0]}),
|
625 |
|
|
// .in1 ({fcl_fdp_pcoor_f, pc_f[47:0]}),
|
626 |
|
|
.in2 (pc_d_adj),
|
627 |
|
|
.in3 (pc_e),
|
628 |
|
|
.sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[2]),
|
629 |
|
|
.sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[2]),
|
630 |
|
|
.sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[2]),
|
631 |
|
|
.sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[2]));
|
632 |
|
|
|
633 |
|
|
dp_mux4ds #(49) t3pcf_mux(.dout (t3_next_pcs_f),
|
634 |
|
|
.in0 (t3pc_s),
|
635 |
|
|
.in1 ({fcl_fdp_pcoor_vec_f[3], t3pc_f[47:0]}),
|
636 |
|
|
// .in1 ({fcl_fdp_pcoor_f, pc_f[47:0]}),
|
637 |
|
|
.in2 (pc_d_adj),
|
638 |
|
|
.in3 (pc_e),
|
639 |
|
|
.sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[3]),
|
640 |
|
|
.sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[3]),
|
641 |
|
|
.sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[3]),
|
642 |
|
|
.sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[3]));
|
643 |
|
|
`endif
|
644 |
|
|
|
645 |
|
|
|
646 |
|
|
// S stage thread PC regs; use low power thr flop
|
647 |
|
|
dff_s #(49) t0pcs_reg(.din (t0_next_pcs_f),
|
648 |
|
|
.q (t0pc_s),
|
649 |
|
|
.clk (clk), .se(se), .si(), .so());
|
650 |
|
|
`ifdef FPGA_SYN_1THREAD
|
651 |
|
|
assign pc_s = t0pc_s;
|
652 |
|
|
assign npc_s = t0_next_pcs_f;
|
653 |
|
|
`else
|
654 |
|
|
dff_s #(49) t1pcs_reg(.din (t1_next_pcs_f),
|
655 |
|
|
.q (t1pc_s),
|
656 |
|
|
.clk (clk), .se(se), .si(), .so());
|
657 |
|
|
dff_s #(49) t2pcs_reg(.din (t2_next_pcs_f),
|
658 |
|
|
.q (t2pc_s),
|
659 |
|
|
.clk (clk), .se(se), .si(), .so());
|
660 |
|
|
dff_s #(49) t3pcs_reg(.din (t3_next_pcs_f),
|
661 |
|
|
.q (t3pc_s),
|
662 |
|
|
.clk (clk), .se(se), .si(), .so());
|
663 |
|
|
|
664 |
|
|
// S stage PC mux -- need to protect
|
665 |
|
|
dp_mux4ds #(49) pcs_mux(.dout (pc_s),
|
666 |
|
|
.in0 (t0pc_s),
|
667 |
|
|
.in1 (t1pc_s),
|
668 |
|
|
.in2 (t2pc_s),
|
669 |
|
|
.in3 (t3pc_s),
|
670 |
|
|
.sel0_l (fcl_fdp_thr_s2_l[0]),
|
671 |
|
|
.sel1_l (fcl_fdp_thr_s2_l[1]),
|
672 |
|
|
.sel2_l (fcl_fdp_thr_s2_l[2]),
|
673 |
|
|
.sel3_l (fcl_fdp_thr_s2_l[3]));
|
674 |
|
|
|
675 |
|
|
// S stage next PC mux -- need to protect
|
676 |
|
|
dp_mux4ds #(49) npcs_mux(.dout (npc_s),
|
677 |
|
|
.in0 (t0_next_pcs_f),
|
678 |
|
|
.in1 (t1_next_pcs_f),
|
679 |
|
|
.in2 (t2_next_pcs_f),
|
680 |
|
|
.in3 (t3_next_pcs_f),
|
681 |
|
|
.sel0_l (fcl_fdp_thr_s2_l[0]),
|
682 |
|
|
.sel1_l (fcl_fdp_thr_s2_l[1]),
|
683 |
|
|
.sel2_l (fcl_fdp_thr_s2_l[2]),
|
684 |
|
|
.sel3_l (fcl_fdp_thr_s2_l[3]));
|
685 |
|
|
`endif
|
686 |
|
|
|
687 |
|
|
// D stage PC and nPC
|
688 |
|
|
dff_s #(49) pcd_reg(.din (pc_s),
|
689 |
|
|
.q (pc_d),
|
690 |
|
|
.clk (clk), .se(se), .si(), .so());
|
691 |
|
|
dff_s #(49) npcd_reg(.din (npc_s),
|
692 |
|
|
.q (npc_d),
|
693 |
|
|
.clk (clk), .se(se), .si(), .so());
|
694 |
|
|
|
695 |
|
|
assign am_mask = {{17{~fcl_fdp_addr_mask_d}}, 32'hffffffff};
|
696 |
|
|
|
697 |
|
|
// nand2
|
698 |
|
|
assign pc_d_adj = pc_d & am_mask;
|
699 |
|
|
assign npc_d_adj = npc_d & am_mask;
|
700 |
|
|
|
701 |
|
|
assign ifu_exu_pc_d = pc_d_adj[47:0];
|
702 |
|
|
|
703 |
|
|
// E stage PC and nPC
|
704 |
|
|
dff_s #(49) pce_reg(.din (pc_d_adj),
|
705 |
|
|
.q (pc_e),
|
706 |
|
|
.clk (clk), .se(se), .si(), .so());
|
707 |
|
|
dff_s #(49) npce_reg(.din (npc_d_adj),
|
708 |
|
|
.q (npc_e),
|
709 |
|
|
.clk (clk), .se(se), .si(), .so());
|
710 |
|
|
|
711 |
|
|
assign fdp_fcl_pc_oor_e = pc_e[48];
|
712 |
|
|
assign ifu_tlu_pc_oor_e = pc_e[48];
|
713 |
|
|
|
714 |
|
|
// M stage PC and nPC
|
715 |
|
|
dff_s #(49) pcm_reg(.din (pc_e),
|
716 |
|
|
.q (pc_m),
|
717 |
|
|
.clk (clk), .se(se), .si(), .so());
|
718 |
|
|
dff_s #(49) npcm_reg(.din (npc_e),
|
719 |
|
|
.q (npc_m),
|
720 |
|
|
.clk (clk), .se(se), .si(), .so());
|
721 |
|
|
assign ifu_tlu_pc_m = pc_m[48:0];
|
722 |
|
|
assign ifu_tlu_npc_m = npc_m[48:0];
|
723 |
|
|
|
724 |
|
|
// W stage PC and nPC
|
725 |
|
|
dff_s #(49) pcw_reg(.din (pc_m),
|
726 |
|
|
.q (pc_w),
|
727 |
|
|
.clk (clk), .se(se), .si(), .so());
|
728 |
|
|
dff_s #(49) npcw_reg(.din (npc_m),
|
729 |
|
|
.q (npc_w),
|
730 |
|
|
.clk (clk), .se(se), .si(), .so());
|
731 |
|
|
|
732 |
|
|
// assign ifu_tlu_pc_w = pc_w;
|
733 |
|
|
// assign ifu_tlu_npc_w = npc_w;
|
734 |
|
|
|
735 |
|
|
// PC incrementer
|
736 |
|
|
// can we fit the ofl logic on the side of the incrementer?
|
737 |
|
|
assign pcinc_f[1:0] = pc_f[1:0];
|
738 |
|
|
sparc_ifu_incr46 pc_inc(.a (pc_f[47:2]),
|
739 |
|
|
.a_inc (pcinc_f[47:2]),
|
740 |
|
|
.ofl ()); // ofl output not needed
|
741 |
|
|
|
742 |
|
|
// assign pcinc_f[48] = inc_ofl & ~fcl_fdp_mask32b_f | fcl_fdp_pcoor_f;
|
743 |
|
|
assign pcinc_f[48] = ~pc_f[47] & pcinc_f[47] & ~fcl_fdp_mask32b_f |
|
744 |
|
|
fcl_fdp_pcoor_f;
|
745 |
|
|
|
746 |
|
|
// Enable for thr trapnpc reg
|
747 |
|
|
dp_mux4ds #(49) t0tnpc_mux(.dout (trapnpc0_bf),
|
748 |
|
|
.in0 (tlu_ifu_trapnpc_w2),
|
749 |
|
|
.in1 (npc_w),
|
750 |
|
|
.in2 (t0pc_f),
|
751 |
|
|
.in3 (t0_trapnpc_f),
|
752 |
|
|
.sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[0]),
|
753 |
|
|
.sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[0]),
|
754 |
|
|
.sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[0]),
|
755 |
|
|
.sel3_l (fcl_fdp_thrtnpc_sel_old_l[0]));
|
756 |
|
|
|
757 |
|
|
`ifdef FPGA_SYN_1THREAD
|
758 |
|
|
`else
|
759 |
|
|
dp_mux4ds #(49) t1tnpc_mux(.dout (trapnpc1_bf),
|
760 |
|
|
.in0 (tlu_ifu_trapnpc_w2),
|
761 |
|
|
.in1 (npc_w),
|
762 |
|
|
.in2 (t1pc_f),
|
763 |
|
|
.in3 (t1_trapnpc_f),
|
764 |
|
|
.sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[1]),
|
765 |
|
|
.sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[1]),
|
766 |
|
|
.sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[1]),
|
767 |
|
|
.sel3_l (fcl_fdp_thrtnpc_sel_old_l[1]));
|
768 |
|
|
|
769 |
|
|
dp_mux4ds #(49) t2tnpc_mux(.dout (trapnpc2_bf),
|
770 |
|
|
.in0 (tlu_ifu_trapnpc_w2),
|
771 |
|
|
.in1 (npc_w),
|
772 |
|
|
.in2 (t2pc_f),
|
773 |
|
|
.in3 (t2_trapnpc_f),
|
774 |
|
|
.sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[2]),
|
775 |
|
|
.sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[2]),
|
776 |
|
|
.sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[2]),
|
777 |
|
|
.sel3_l (fcl_fdp_thrtnpc_sel_old_l[2]));
|
778 |
|
|
|
779 |
|
|
dp_mux4ds #(49) t3tnpc_mux(.dout (trapnpc3_bf),
|
780 |
|
|
.in0 (tlu_ifu_trapnpc_w2),
|
781 |
|
|
.in1 (npc_w),
|
782 |
|
|
.in2 (t3pc_f),
|
783 |
|
|
.in3 (t3_trapnpc_f),
|
784 |
|
|
.sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[3]),
|
785 |
|
|
.sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[3]),
|
786 |
|
|
.sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[3]),
|
787 |
|
|
.sel3_l (fcl_fdp_thrtnpc_sel_old_l[3]));
|
788 |
|
|
`endif
|
789 |
|
|
|
790 |
|
|
// thread next trap pc reg
|
791 |
|
|
dff_s #(49) t0tnpcf_reg(.din (trapnpc0_bf),
|
792 |
|
|
.q (t0_trapnpc_f),
|
793 |
|
|
.clk (clk), .se(se), .si(), .so());
|
794 |
|
|
`ifdef FPGA_SYN_1THREAD
|
795 |
|
|
assign thr_trappc_bf = t0_trapnpc_f;
|
796 |
|
|
`else
|
797 |
|
|
dff_s #(49) t1tnpcf_reg(.din (trapnpc1_bf),
|
798 |
|
|
.q (t1_trapnpc_f),
|
799 |
|
|
.clk (clk), .se(se), .si(), .so());
|
800 |
|
|
dff_s #(49) t2tnpcf_reg(.din (trapnpc2_bf),
|
801 |
|
|
.q (t2_trapnpc_f),
|
802 |
|
|
.clk (clk), .se(se), .si(), .so());
|
803 |
|
|
dff_s #(49) t3tnpcf_reg(.din (trapnpc3_bf),
|
804 |
|
|
.q (t3_trapnpc_f),
|
805 |
|
|
.clk (clk), .se(se), .si(), .so());
|
806 |
|
|
|
807 |
|
|
dp_mux4ds #(49) nxttpc_mux(.dout (thr_trappc_bf),
|
808 |
|
|
.in0 (t0_trapnpc_f),
|
809 |
|
|
.in1 (t1_trapnpc_f),
|
810 |
|
|
.in2 (t2_trapnpc_f),
|
811 |
|
|
.in3 (t3_trapnpc_f),
|
812 |
|
|
.sel0_l (fcl_fdp_thr_s2_l[0]), // thr_s2 = thr_f
|
813 |
|
|
.sel1_l (fcl_fdp_thr_s2_l[1]),
|
814 |
|
|
.sel2_l (fcl_fdp_thr_s2_l[2]),
|
815 |
|
|
.sel3_l (fcl_fdp_thr_s2_l[3]));
|
816 |
|
|
`endif
|
817 |
|
|
|
818 |
|
|
// During rst nextpc_nosw_bf = PO_RESET_PC. All thread PC_f registers,
|
819 |
|
|
// the icaddr_f register and the nextpc register should be loaded
|
820 |
|
|
// with nextpc_nosw_bf during reset.
|
821 |
|
|
// Eventually, we will load the reset_pc from the trap logic unit,
|
822 |
|
|
// which will arrive on the trap_pc bus.
|
823 |
|
|
|
824 |
|
|
|
825 |
|
|
// TBD in PC datapath:
|
826 |
|
|
// 1. Add useNIR bit to PCs -- DONE
|
827 |
|
|
// 2. Add support for ifq request grant -- DONE
|
828 |
|
|
// 3. Generate icache read signal (from fcl?) -- DONE
|
829 |
|
|
// 4. Rollback functionality -- DONE
|
830 |
|
|
// 5. PC range checks -- DONE
|
831 |
|
|
// 6. Change PC to 48 bit value -- DONE
|
832 |
|
|
|
833 |
|
|
|
834 |
|
|
//----------------------------------------------------------------------
|
835 |
|
|
// Fetched Instruction Datapath
|
836 |
|
|
//----------------------------------------------------------------------
|
837 |
|
|
|
838 |
|
|
// This is logically 33 bits wide. The NIR and IR datapaths are laid
|
839 |
|
|
// side by side, making this a 66bit datapath. The NIR path is
|
840 |
|
|
// potentially a little longer.
|
841 |
|
|
|
842 |
|
|
// choose between NIR data and fetched data
|
843 |
|
|
dp_mux2es #(33) usenir_mux(.dout (inst_s1),
|
844 |
|
|
.in0 (icd_fdp_fetdata_s1[32:0]),
|
845 |
|
|
.in1 (nirdata_s1),
|
846 |
|
|
.sel (fcl_fdp_usenir_sel_nir_s1)); // 1=nir
|
847 |
|
|
|
848 |
|
|
// Instruction Output Mux
|
849 |
|
|
// CHANGE: now 4:1
|
850 |
|
|
dp_mux4ds #(33) instout_mux(.dout (fdp_inst_s),
|
851 |
|
|
.in0 (icd_fdp_fetdata_s1[32:0]),
|
852 |
|
|
.in1 (inst_s2),
|
853 |
|
|
.in2 ({`NOP, 1'b0}),
|
854 |
|
|
.in3 (nirdata_s1[32:0]),
|
855 |
|
|
.sel0_l (fcl_fdp_inst_sel_curr_s_l),
|
856 |
|
|
.sel1_l (fcl_fdp_inst_sel_switch_s_l),
|
857 |
|
|
.sel2_l (fcl_fdp_inst_sel_nop_s_l),
|
858 |
|
|
.sel3_l (fcl_fdp_inst_sel_nir_s_l));
|
859 |
|
|
|
860 |
|
|
assign fdp_fcl_swc_s2 = fdp_inst_s[0];
|
861 |
|
|
|
862 |
|
|
assign fdp_fcl_op_s = fdp_inst_s[32:31];
|
863 |
|
|
assign fdp_fcl_op3_s = fdp_inst_s[25:22];
|
864 |
|
|
assign fdp_fcl_ibit_s = fdp_inst_s[14];
|
865 |
|
|
|
866 |
|
|
assign fdp_dtu_inst_s = fdp_inst_s[32:1];
|
867 |
|
|
|
868 |
|
|
// CHANGE: Random logic to fix timing paths
|
869 |
|
|
// output pin on RHS, as close to IRF as possible
|
870 |
|
|
// 16x drivers
|
871 |
|
|
// nand2-xor-invert
|
872 |
|
|
assign ifu_exu_rs1_s[4] = fdp_inst_s[19] ^
|
873 |
|
|
(fdp_inst_s[18] & fcl_fdp_oddwin_s);
|
874 |
|
|
assign ifu_exu_rs1_s[3:0] = fdp_inst_s[18:15];
|
875 |
|
|
|
876 |
|
|
assign ifu_exu_rs2_s[4] = (fdp_inst_s[5] ^
|
877 |
|
|
(fdp_inst_s[4] & fcl_fdp_oddwin_s));
|
878 |
|
|
assign ifu_exu_rs2_s[3:0] = fdp_inst_s[4:1];
|
879 |
|
|
|
880 |
|
|
assign ifu_exu_rs3_s[4] = (fdp_inst_s[30] ^
|
881 |
|
|
(fdp_inst_s[29] & fcl_fdp_oddwin_s));
|
882 |
|
|
assign ifu_exu_rs3_s[3:0] = fdp_inst_s[29:26];
|
883 |
|
|
|
884 |
|
|
|
885 |
|
|
dp_buffer #(33) insts1_buf(inst_s1_bf1, inst_s1[32:0]);
|
886 |
|
|
|
887 |
|
|
// Thread instruction muxes
|
888 |
|
|
dp_mux4ds #(33) t0inst_mux(.dout (t0inst_s1),
|
889 |
|
|
.in0 (ifq_fdp_fill_inst),
|
890 |
|
|
.in1 (inst_s1_bf1),
|
891 |
|
|
.in2 (t0inst_s2),
|
892 |
|
|
.in3 (rb_inst0_s),
|
893 |
|
|
.sel0_l (fcl_fdp_tinst_sel_ifq_s_l[0]),
|
894 |
|
|
.sel1_l (fcl_fdp_tinst_sel_curr_s_l[0]),
|
895 |
|
|
.sel2_l (fcl_fdp_tinst_sel_old_s_l[0]),
|
896 |
|
|
.sel3_l (fcl_fdp_tinst_sel_rb_s_l[0]));
|
897 |
|
|
|
898 |
|
|
`ifdef FPGA_SYN_1THREAD
|
899 |
|
|
`else
|
900 |
|
|
dp_mux4ds #(33) t1inst_mux(.dout (t1inst_s1),
|
901 |
|
|
.in0 (ifq_fdp_fill_inst),
|
902 |
|
|
.in1 (inst_s1_bf1),
|
903 |
|
|
.in2 (t1inst_s2),
|
904 |
|
|
.in3 (rb_inst1_s),
|
905 |
|
|
.sel0_l (fcl_fdp_tinst_sel_ifq_s_l[1]),
|
906 |
|
|
.sel1_l (fcl_fdp_tinst_sel_curr_s_l[1]),
|
907 |
|
|
.sel2_l (fcl_fdp_tinst_sel_old_s_l[1]),
|
908 |
|
|
.sel3_l (fcl_fdp_tinst_sel_rb_s_l[1]));
|
909 |
|
|
|
910 |
|
|
dp_mux4ds #(33) t2inst_mux(.dout (t2inst_s1),
|
911 |
|
|
.in0 (ifq_fdp_fill_inst),
|
912 |
|
|
.in1 (inst_s1_bf1),
|
913 |
|
|
.in2 (t2inst_s2),
|
914 |
|
|
.in3 (rb_inst2_s),
|
915 |
|
|
.sel0_l (fcl_fdp_tinst_sel_ifq_s_l[2]),
|
916 |
|
|
.sel1_l (fcl_fdp_tinst_sel_curr_s_l[2]),
|
917 |
|
|
.sel2_l (fcl_fdp_tinst_sel_old_s_l[2]),
|
918 |
|
|
.sel3_l (fcl_fdp_tinst_sel_rb_s_l[2]));
|
919 |
|
|
|
920 |
|
|
dp_mux4ds #(33) t3inst_mux(.dout (t3inst_s1),
|
921 |
|
|
.in0 (ifq_fdp_fill_inst),
|
922 |
|
|
.in1 (inst_s1_bf1),
|
923 |
|
|
.in2 (t3inst_s2),
|
924 |
|
|
.in3 (rb_inst3_s),
|
925 |
|
|
.sel0_l (fcl_fdp_tinst_sel_ifq_s_l[3]),
|
926 |
|
|
.sel1_l (fcl_fdp_tinst_sel_curr_s_l[3]),
|
927 |
|
|
.sel2_l (fcl_fdp_tinst_sel_old_s_l[3]),
|
928 |
|
|
.sel3_l (fcl_fdp_tinst_sel_rb_s_l[3]));
|
929 |
|
|
`endif
|
930 |
|
|
|
931 |
|
|
// Thread Instruction Register
|
932 |
|
|
dff_s #(33) t0_inst_reg(.din (t0inst_s1),
|
933 |
|
|
.q (t0inst_s2),
|
934 |
|
|
.clk (clk), .se(se), .si(), .so());
|
935 |
|
|
`ifdef FPGA_SYN_1THREAD
|
936 |
|
|
assign inst_s2 = t0inst_s2;
|
937 |
|
|
`else
|
938 |
|
|
dff_s #(33) t1_inst_reg(.din (t1inst_s1),
|
939 |
|
|
.q (t1inst_s2),
|
940 |
|
|
.clk (clk), .se(se), .si(), .so());
|
941 |
|
|
dff_s #(33) t2_inst_reg(.din (t2inst_s1),
|
942 |
|
|
.q (t2inst_s2),
|
943 |
|
|
.clk (clk), .se(se), .si(), .so());
|
944 |
|
|
dff_s #(33) t3_inst_reg(.din (t3inst_s1),
|
945 |
|
|
.q (t3inst_s2),
|
946 |
|
|
.clk (clk), .se(se), .si(), .so());
|
947 |
|
|
|
948 |
|
|
// switch instruction mux -- choose the instruction to switch to
|
949 |
|
|
// fcl keep track of which t*inst_s2 is valid
|
950 |
|
|
dp_mux4ds #(33) swinst_mux(.dout (inst_s2),
|
951 |
|
|
.in0 (t0inst_s2),
|
952 |
|
|
.in1 (t1inst_s2),
|
953 |
|
|
.in2 (t2inst_s2),
|
954 |
|
|
.in3 (t3inst_s2),
|
955 |
|
|
.sel0_l (fcl_fdp_thr_s2_l[0]),
|
956 |
|
|
.sel1_l (fcl_fdp_thr_s2_l[1]),
|
957 |
|
|
.sel2_l (fcl_fdp_thr_s2_l[2]),
|
958 |
|
|
.sel3_l (fcl_fdp_thr_s2_l[3]));
|
959 |
|
|
`endif
|
960 |
|
|
|
961 |
|
|
// Rollback instruction
|
962 |
|
|
dff_s #(33) rbinst_d_reg(.din (fdp_inst_s[32:0]),
|
963 |
|
|
.q (inst_d),
|
964 |
|
|
.clk (clk),
|
965 |
|
|
.se (se), .si(), .so());
|
966 |
|
|
|
967 |
|
|
dff_s #(33) rbinst_e_reg(.din (inst_d),
|
968 |
|
|
.q (inst_e),
|
969 |
|
|
.clk (clk),
|
970 |
|
|
.se (se), .si(), .so());
|
971 |
|
|
|
972 |
|
|
dp_mux2es #(33) rbinst0_mux(.dout (rb_inst0_s),
|
973 |
|
|
.in0 (inst_d),
|
974 |
|
|
.in1 (inst_e),
|
975 |
|
|
.sel (fcl_fdp_rbinst_sel_inste_s[0]));
|
976 |
|
|
|
977 |
|
|
`ifdef FPGA_SYN_1THREAD
|
978 |
|
|
`else
|
979 |
|
|
dp_mux2es #(33) rbinst1_mux(.dout (rb_inst1_s),
|
980 |
|
|
.in0 (inst_d),
|
981 |
|
|
.in1 (inst_e),
|
982 |
|
|
.sel (fcl_fdp_rbinst_sel_inste_s[1]));
|
983 |
|
|
|
984 |
|
|
dp_mux2es #(33) rbinst2_mux(.dout (rb_inst2_s),
|
985 |
|
|
.in0 (inst_d),
|
986 |
|
|
.in1 (inst_e),
|
987 |
|
|
.sel (fcl_fdp_rbinst_sel_inste_s[2]));
|
988 |
|
|
|
989 |
|
|
dp_mux2es #(33) rbinst3_mux(.dout (rb_inst3_s),
|
990 |
|
|
.in0 (inst_d),
|
991 |
|
|
.in1 (inst_e),
|
992 |
|
|
.sel (fcl_fdp_rbinst_sel_inste_s[3]));
|
993 |
|
|
`endif
|
994 |
|
|
|
995 |
|
|
//----------------------------------------------------------------------
|
996 |
|
|
// Next Instruction Datapath
|
997 |
|
|
//----------------------------------------------------------------------
|
998 |
|
|
|
999 |
|
|
// Thread next instruction muxes
|
1000 |
|
|
// dp_mux2es #(33) t0nir_mux(.dout (t0nir_in),
|
1001 |
|
|
// .in0 (icd_fdp_topdata_s1[32:0]),
|
1002 |
|
|
// .in1 (t0nir),
|
1003 |
|
|
// .sel (fcl_fdp_thr_s1_l[0])); // 0=new
|
1004 |
|
|
// dp_mux2es #(33) t1nir_mux(.dout (t1nir_in),
|
1005 |
|
|
// .in0 (icd_fdp_topdata_s1[32:0]),
|
1006 |
|
|
// .in1 (t1nir),
|
1007 |
|
|
// .sel (fcl_fdp_thr_s1_l[1]));
|
1008 |
|
|
// dp_mux2es #(33) t2nir_mux(.dout (t2nir_in),
|
1009 |
|
|
// .in0 (icd_fdp_topdata_s1[32:0]),
|
1010 |
|
|
// .in1 (t2nir),
|
1011 |
|
|
// .sel (fcl_fdp_thr_s1_l[2]));
|
1012 |
|
|
// dp_mux2es #(33) t3nir_mux(.dout (t3nir_in),
|
1013 |
|
|
// .in0 (icd_fdp_topdata_s1[32:0]),
|
1014 |
|
|
// .in1 (t3nir),
|
1015 |
|
|
// .sel (fcl_fdp_thr_s1_l[3]));
|
1016 |
|
|
|
1017 |
|
|
// Thread Next Instruction Register
|
1018 |
|
|
wire clk_nir0;
|
1019 |
|
|
`ifdef FPGA_SYN_CLK_EN
|
1020 |
|
|
`else
|
1021 |
|
|
|
1022 |
|
|
bw_u1_ckenbuf_6x ckennir0(.rclk (rclk),
|
1023 |
|
|
.clk (clk_nir0),
|
1024 |
|
|
.en_l (fcl_fdp_thr_s1_l[0]),
|
1025 |
|
|
.tm_l (~se));
|
1026 |
|
|
`endif
|
1027 |
|
|
`ifdef FPGA_SYN_CLK_DFF
|
1028 |
|
|
dffe_s #(33) t0nir_reg(.din (icd_fdp_topdata_s1[32:0]),
|
1029 |
|
|
.q (t0nir),
|
1030 |
|
|
.en (~(fcl_fdp_thr_s1_l[0])), .clk(rclk), .se(se), .si(), .so());
|
1031 |
|
|
`else
|
1032 |
|
|
|
1033 |
|
|
dff_s #(33) t0nir_reg(.din (icd_fdp_topdata_s1[32:0]),
|
1034 |
|
|
.q (t0nir),
|
1035 |
|
|
.clk (clk_nir0), .se(se), .si(), .so());
|
1036 |
|
|
`endif
|
1037 |
|
|
|
1038 |
|
|
`ifdef FPGA_SYN_1THREAD
|
1039 |
|
|
assign nirdata_s1 = t0nir;
|
1040 |
|
|
`else
|
1041 |
|
|
wire clk_nir1;
|
1042 |
|
|
`ifdef FPGA_SYN_CLK_EN
|
1043 |
|
|
`else
|
1044 |
|
|
|
1045 |
|
|
bw_u1_ckenbuf_6x ckennir1(.rclk (rclk),
|
1046 |
|
|
.clk (clk_nir1),
|
1047 |
|
|
.en_l (fcl_fdp_thr_s1_l[1]),
|
1048 |
|
|
.tm_l (~se));
|
1049 |
|
|
`endif
|
1050 |
|
|
`ifdef FPGA_SYN_CLK_DFF
|
1051 |
|
|
dffe_s #(33) t1nir_reg(.din (icd_fdp_topdata_s1[32:0]),
|
1052 |
|
|
.q (t1nir),
|
1053 |
|
|
.en (~(fcl_fdp_thr_s1_l[1])), .clk (rclk), .se(se), .si(), .so());
|
1054 |
|
|
`else
|
1055 |
|
|
dff_s #(33) t1nir_reg(.din (icd_fdp_topdata_s1[32:0]),
|
1056 |
|
|
.q (t1nir),
|
1057 |
|
|
.clk (clk_nir1), .se(se), .si(), .so());
|
1058 |
|
|
`endif
|
1059 |
|
|
|
1060 |
|
|
wire clk_nir2;
|
1061 |
|
|
`ifdef FPGA_SYN_CLK_EN
|
1062 |
|
|
`else
|
1063 |
|
|
|
1064 |
|
|
bw_u1_ckenbuf_6x ckennir2(.rclk (rclk),
|
1065 |
|
|
.clk (clk_nir2),
|
1066 |
|
|
.en_l (fcl_fdp_thr_s1_l[2]),
|
1067 |
|
|
.tm_l (~se));
|
1068 |
|
|
`endif
|
1069 |
|
|
`ifdef FPGA_SYN_CLK_DFF
|
1070 |
|
|
dffe_s #(33) t2nir_reg(.din (icd_fdp_topdata_s1[32:0]),
|
1071 |
|
|
.q (t2nir),
|
1072 |
|
|
.en (~(fcl_fdp_thr_s1_l[2])), .clk (rclk), .se(se), .si(), .so());
|
1073 |
|
|
`else
|
1074 |
|
|
dff_s #(33) t2nir_reg(.din (icd_fdp_topdata_s1[32:0]),
|
1075 |
|
|
.q (t2nir),
|
1076 |
|
|
.clk (clk_nir2), .se(se), .si(), .so());
|
1077 |
|
|
`endif
|
1078 |
|
|
wire clk_nir3;
|
1079 |
|
|
`ifdef FPGA_SYN_CLK_EN
|
1080 |
|
|
`else
|
1081 |
|
|
|
1082 |
|
|
bw_u1_ckenbuf_6x ckennir3(.rclk (rclk),
|
1083 |
|
|
.clk (clk_nir3),
|
1084 |
|
|
.en_l (fcl_fdp_thr_s1_l[3]),
|
1085 |
|
|
.tm_l (~se));
|
1086 |
|
|
`endif
|
1087 |
|
|
`ifdef FPGA_SYN_CLK_DFF
|
1088 |
|
|
dffe_s #(33) t3nir_reg(.din (icd_fdp_topdata_s1[32:0]),
|
1089 |
|
|
.q (t3nir),
|
1090 |
|
|
.en (~(fcl_fdp_thr_s1_l[3])), .clk (rclk), .se(se), .si(), .so());
|
1091 |
|
|
`else
|
1092 |
|
|
|
1093 |
|
|
dff_s #(33) t3nir_reg(.din (icd_fdp_topdata_s1[32:0]),
|
1094 |
|
|
.q (t3nir),
|
1095 |
|
|
.clk (clk_nir3), .se(se), .si(), .so());
|
1096 |
|
|
`endif
|
1097 |
|
|
|
1098 |
|
|
// Next thread NIR mux (nir output mux)
|
1099 |
|
|
dp_mux4ds #(33) nextnir_mux(.dout (nirdata_s1),
|
1100 |
|
|
.in0 (t0nir),
|
1101 |
|
|
.in1 (t1nir),
|
1102 |
|
|
.in2 (t2nir),
|
1103 |
|
|
.in3 (t3nir),
|
1104 |
|
|
.sel0_l (fcl_fdp_nirthr_s1_l[0]),
|
1105 |
|
|
.sel1_l (fcl_fdp_nirthr_s1_l[1]),
|
1106 |
|
|
.sel2_l (fcl_fdp_nirthr_s1_l[2]),
|
1107 |
|
|
.sel3_l (fcl_fdp_nirthr_s1_l[3]));
|
1108 |
|
|
`endif
|
1109 |
|
|
|
1110 |
|
|
// TBD in fetched instruction DP:
|
1111 |
|
|
// 1. Rollback -- DONE
|
1112 |
|
|
// 2. Icache parity check (increase fet data and top data to 34 bits)
|
1113 |
|
|
|
1114 |
|
|
endmodule // sparc_ifu_fdp
|
1115 |
|
|
|