1 |
2 |
dmitryr |
// ========== Copyright Header Begin ==========================================
|
2 |
|
|
//
|
3 |
|
|
// OpenSPARC T1 Processor File: sparc_ffu_ctl_visctl.v
|
4 |
|
|
// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
|
5 |
|
|
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
|
6 |
|
|
//
|
7 |
|
|
// The above named program is free software; you can redistribute it and/or
|
8 |
|
|
// modify it under the terms of the GNU General Public
|
9 |
|
|
// License version 2 as published by the Free Software Foundation.
|
10 |
|
|
//
|
11 |
|
|
// The above named program is distributed in the hope that it will be
|
12 |
|
|
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
|
|
// General Public License for more details.
|
15 |
|
|
//
|
16 |
|
|
// You should have received a copy of the GNU General Public
|
17 |
|
|
// License along with this work; if not, write to the Free Software
|
18 |
|
|
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
19 |
|
|
//
|
20 |
|
|
// ========== Copyright Header End ============================================
|
21 |
|
|
////////////////////////////////////////////////////////////////////////
|
22 |
|
|
/*
|
23 |
|
|
// Module Name: sparc_ffu_ctl_visctl
|
24 |
|
|
// Description: This is the ffu vis control block.
|
25 |
|
|
*/
|
26 |
|
|
module sparc_ffu_ctl_visctl (/*AUTOARG*/
|
27 |
|
|
// Outputs
|
28 |
|
|
ctl_vis_sel_add, ctl_vis_sel_log, ctl_vis_sel_align,
|
29 |
|
|
ctl_vis_add32, ctl_vis_subtract, ctl_vis_cin, ctl_vis_align0,
|
30 |
|
|
ctl_vis_align2, ctl_vis_align4, ctl_vis_align6, ctl_vis_align_odd,
|
31 |
|
|
ctl_vis_log_sel_pass, ctl_vis_log_sel_nand, ctl_vis_log_sel_nor,
|
32 |
|
|
ctl_vis_log_sel_xor, ctl_vis_log_invert_rs1,
|
33 |
|
|
ctl_vis_log_invert_rs2, ctl_vis_log_constant,
|
34 |
|
|
ctl_vis_log_pass_const, ctl_vis_log_pass_rs1,
|
35 |
|
|
ctl_vis_log_pass_rs2, vis_result, illegal_vis_e, vis_nofrf_e,
|
36 |
|
|
visop_m, visop_w_vld, vis_wen_next, fpu_rnd,
|
37 |
|
|
ffu_exu_rsr_data_hi_m, ffu_exu_rsr_data_mid_m,
|
38 |
|
|
ffu_exu_rsr_data_lo_m, ctl_dp_wsr_data_w2, ctl_dp_gsr_wsr_w2,
|
39 |
|
|
ctl_dp_thr_e,
|
40 |
|
|
// Inputs
|
41 |
|
|
clk, se, reset, opf, tid_w2, tid_e, tid, visop_e, kill_w,
|
42 |
|
|
ifu_tlu_sraddr_d, exu_ffu_wsr_inst_e, exu_ffu_gsr_align_m,
|
43 |
|
|
exu_ffu_gsr_rnd_m, exu_ffu_gsr_mask_m, exu_ffu_gsr_scale_m,
|
44 |
|
|
ifu_ffu_rnd_e, dp_ctl_fsr_rnd, flush_w2, thr_match_mw2,
|
45 |
|
|
thr_match_ww2, ifu_tlu_inst_vld_w, ue_trap_w3, frs1_e, frs2_e,
|
46 |
|
|
frd_e, rollback_c3, rollback_rs2_w2, visop, rollback_rs1_w3,
|
47 |
|
|
dp_ctl_gsr_mask_e, dp_ctl_gsr_scale_e
|
48 |
|
|
) ;
|
49 |
|
|
input clk;
|
50 |
|
|
input se;
|
51 |
|
|
input reset;
|
52 |
|
|
input [8:0] opf;
|
53 |
|
|
input [1:0] tid_w2;
|
54 |
|
|
input [1:0] tid_e;
|
55 |
|
|
input [1:0] tid;
|
56 |
|
|
input visop_e;
|
57 |
|
|
input kill_w;
|
58 |
|
|
input [6:0] ifu_tlu_sraddr_d;
|
59 |
|
|
input exu_ffu_wsr_inst_e;
|
60 |
|
|
input [2:0] exu_ffu_gsr_align_m;
|
61 |
|
|
input [2:0] exu_ffu_gsr_rnd_m;
|
62 |
|
|
input [31:0] exu_ffu_gsr_mask_m;
|
63 |
|
|
input [4:0] exu_ffu_gsr_scale_m;
|
64 |
|
|
input [2:0] ifu_ffu_rnd_e;
|
65 |
|
|
input [1:0] dp_ctl_fsr_rnd;
|
66 |
|
|
input flush_w2;
|
67 |
|
|
input thr_match_mw2;
|
68 |
|
|
input thr_match_ww2;
|
69 |
|
|
input ifu_tlu_inst_vld_w;
|
70 |
|
|
input ue_trap_w3;
|
71 |
|
|
input [4:0] frs1_e;
|
72 |
|
|
input [4:0] frs2_e;
|
73 |
|
|
input [4:0] frd_e;
|
74 |
|
|
input rollback_c3;
|
75 |
|
|
input rollback_rs2_w2;
|
76 |
|
|
input visop;
|
77 |
|
|
input rollback_rs1_w3;
|
78 |
|
|
input [31:0] dp_ctl_gsr_mask_e;
|
79 |
|
|
input [4:0] dp_ctl_gsr_scale_e;
|
80 |
|
|
|
81 |
|
|
output ctl_vis_sel_add;
|
82 |
|
|
output ctl_vis_sel_log;
|
83 |
|
|
output ctl_vis_sel_align;
|
84 |
|
|
output ctl_vis_add32;
|
85 |
|
|
output ctl_vis_subtract;
|
86 |
|
|
output ctl_vis_cin;
|
87 |
|
|
output ctl_vis_align0;
|
88 |
|
|
output ctl_vis_align2;
|
89 |
|
|
output ctl_vis_align4;
|
90 |
|
|
output ctl_vis_align6;
|
91 |
|
|
output ctl_vis_align_odd;
|
92 |
|
|
output ctl_vis_log_sel_pass;
|
93 |
|
|
output ctl_vis_log_sel_nand;
|
94 |
|
|
output ctl_vis_log_sel_nor;
|
95 |
|
|
output ctl_vis_log_sel_xor;
|
96 |
|
|
output ctl_vis_log_invert_rs1;
|
97 |
|
|
output ctl_vis_log_invert_rs2;
|
98 |
|
|
output ctl_vis_log_constant;
|
99 |
|
|
output ctl_vis_log_pass_const;
|
100 |
|
|
output ctl_vis_log_pass_rs1;
|
101 |
|
|
output ctl_vis_log_pass_rs2;
|
102 |
|
|
output vis_result;
|
103 |
|
|
output illegal_vis_e;
|
104 |
|
|
output vis_nofrf_e;
|
105 |
|
|
output visop_m;
|
106 |
|
|
output visop_w_vld;
|
107 |
|
|
output vis_wen_next;
|
108 |
|
|
output [1:0] fpu_rnd;
|
109 |
|
|
output [31:0] ffu_exu_rsr_data_hi_m;
|
110 |
|
|
output [2:0] ffu_exu_rsr_data_mid_m;
|
111 |
|
|
output [7:0] ffu_exu_rsr_data_lo_m;
|
112 |
|
|
|
113 |
|
|
output [36:0] ctl_dp_wsr_data_w2;
|
114 |
|
|
output [3:0] ctl_dp_gsr_wsr_w2;
|
115 |
|
|
output [3:0] ctl_dp_thr_e;
|
116 |
|
|
|
117 |
|
|
wire illegal_rs1_e;
|
118 |
|
|
wire illegal_rs2_e;
|
119 |
|
|
wire illegal_siam_e;
|
120 |
|
|
wire rs2_check_nonzero_e;
|
121 |
|
|
wire rs1_check_nonzero_e;
|
122 |
|
|
wire visop_e;
|
123 |
|
|
wire issue_visop_e;
|
124 |
|
|
wire visop_m;
|
125 |
|
|
wire visop_w;
|
126 |
|
|
wire visop_w_vld;
|
127 |
|
|
wire visop_w2_vld;
|
128 |
|
|
wire visop_w2;
|
129 |
|
|
wire visop_w3;
|
130 |
|
|
wire visop_w3_vld;
|
131 |
|
|
wire add;
|
132 |
|
|
wire align;
|
133 |
|
|
wire logic;
|
134 |
|
|
wire siam;
|
135 |
|
|
wire alignaddr;
|
136 |
|
|
|
137 |
|
|
wire opf_log_zero;
|
138 |
|
|
wire opf_log_one;
|
139 |
|
|
wire opf_log_src1;
|
140 |
|
|
wire opf_log_src2;
|
141 |
|
|
wire opf_log_not1;
|
142 |
|
|
wire opf_log_not2;
|
143 |
|
|
wire opf_log_or;
|
144 |
|
|
wire opf_log_nor;
|
145 |
|
|
wire opf_log_and;
|
146 |
|
|
wire opf_log_nand;
|
147 |
|
|
wire opf_log_xor;
|
148 |
|
|
wire opf_log_xnor;
|
149 |
|
|
wire opf_log_ornot1;
|
150 |
|
|
wire opf_log_ornot2;
|
151 |
|
|
wire opf_log_andnot1;
|
152 |
|
|
wire opf_log_andnot2;
|
153 |
|
|
wire invert_rs1_next;
|
154 |
|
|
wire invert_rs2_next;
|
155 |
|
|
wire log_pass_rs1_next;
|
156 |
|
|
wire log_pass_rs2_next;
|
157 |
|
|
wire log_pass_rs1;
|
158 |
|
|
wire log_pass_rs2;
|
159 |
|
|
|
160 |
|
|
wire [2:0] t0_gsr_rnd;
|
161 |
|
|
wire [2:0] t1_gsr_rnd;
|
162 |
|
|
wire [2:0] t2_gsr_rnd;
|
163 |
|
|
wire [2:0] t3_gsr_rnd;
|
164 |
|
|
wire [2:0] t0_gsr_align;
|
165 |
|
|
wire [2:0] t1_gsr_align;
|
166 |
|
|
wire [2:0] t2_gsr_align;
|
167 |
|
|
wire [2:0] t3_gsr_align;
|
168 |
|
|
wire [2:0] t0_gsr_rnd_next;
|
169 |
|
|
wire [2:0] t1_gsr_rnd_next;
|
170 |
|
|
wire [2:0] t2_gsr_rnd_next;
|
171 |
|
|
wire [2:0] t3_gsr_rnd_next;
|
172 |
|
|
wire [2:0] t0_gsr_align_next;
|
173 |
|
|
wire [2:0] t1_gsr_align_next;
|
174 |
|
|
wire [2:0] t2_gsr_align_next;
|
175 |
|
|
wire [2:0] t3_gsr_align_next;
|
176 |
|
|
wire [2:0] gsr_rnd_e;
|
177 |
|
|
wire [2:0] gsr_align_e;
|
178 |
|
|
wire t0_rnd_wen_l;
|
179 |
|
|
wire t0_gsr_wsr_w2;
|
180 |
|
|
wire t0_siam_w2;
|
181 |
|
|
wire t0_align_wen_l;
|
182 |
|
|
wire t0_alignaddr_w2;
|
183 |
|
|
wire t1_rnd_wen_l;
|
184 |
|
|
wire t1_gsr_wsr_w2;
|
185 |
|
|
wire t1_siam_w2;
|
186 |
|
|
wire t1_align_wen_l;
|
187 |
|
|
wire t1_alignaddr_w2;
|
188 |
|
|
wire t2_rnd_wen_l;
|
189 |
|
|
wire t2_gsr_wsr_w2;
|
190 |
|
|
wire t2_siam_w2;
|
191 |
|
|
wire t2_align_wen_l;
|
192 |
|
|
wire t2_alignaddr_w2;
|
193 |
|
|
wire t3_rnd_wen_l;
|
194 |
|
|
wire t3_gsr_wsr_w2;
|
195 |
|
|
wire t3_siam_w2;
|
196 |
|
|
wire t3_align_wen_l;
|
197 |
|
|
wire t3_alignaddr_w2;
|
198 |
|
|
|
199 |
|
|
wire [2:0] siam_rnd;
|
200 |
|
|
wire [3:0] thr_w2;
|
201 |
|
|
wire [3:0] ctl_dp_thr_e;
|
202 |
|
|
wire [3:0] thr_fp;
|
203 |
|
|
wire gsr_addr_d;
|
204 |
|
|
wire gsr_addr_e;
|
205 |
|
|
wire wgsr_e;
|
206 |
|
|
wire wgsr_m;
|
207 |
|
|
wire wgsr_w;
|
208 |
|
|
wire wgsr_vld_m;
|
209 |
|
|
wire wgsr_vld_w;
|
210 |
|
|
wire wgsr_vld_w2;
|
211 |
|
|
wire wgsr_w2;
|
212 |
|
|
wire [2:0] gsr_rnd;
|
213 |
|
|
wire [1:0] fpu_rnd_next;
|
214 |
|
|
wire [2:0] gsr_align;
|
215 |
|
|
wire [2:0] gsr_align_d1;
|
216 |
|
|
|
217 |
|
|
wire [2:0] align_addr_data_w2;
|
218 |
|
|
wire [2:0] wgsr_align_offset_w;
|
219 |
|
|
wire [2:0] wgsr_rnd_w;
|
220 |
|
|
wire [2:0] wgsr_align_offset_w2;
|
221 |
|
|
wire [2:0] wgsr_rnd_w2;
|
222 |
|
|
|
223 |
|
|
wire [36:0] wsr_data_m;
|
224 |
|
|
wire [36:0] wsr_data_w;
|
225 |
|
|
|
226 |
|
|
|
227 |
|
|
//////////////////////////////////////
|
228 |
|
|
// VIS PIPELINE
|
229 |
|
|
//------------------------------------
|
230 |
|
|
// Note: rs2_ce, rs2_ue, rs1_ue will kill vis instruction
|
231 |
|
|
// in addition to any traps, etc.
|
232 |
|
|
// These are incorporated into the "kill" signals
|
233 |
|
|
// E: ren rs2
|
234 |
|
|
// M: ren rs1
|
235 |
|
|
// W: rs2 data ready, check rs2 ecc
|
236 |
|
|
// W2: rs1 data ready, check rs1 ecc
|
237 |
|
|
// W3: execute vis operation (result written to rs2/rd flop)
|
238 |
|
|
// W4: gen ecc and write to frf
|
239 |
|
|
|
240 |
|
|
dff_s visop_e2m(.din(issue_visop_e), .clk(clk), .q(visop_m), .si(), .so(), .se(se));
|
241 |
|
|
dff_s visop_m2w(.din(visop_m), .clk(clk), .q(visop_w), .si(), .so(), .se(se));
|
242 |
|
|
dff_s visop_w2w2(.din(visop_w_vld), .clk(clk), .q(visop_w2), .si(), .so(), .se(se));
|
243 |
|
|
dff_s visop_w22w3(.din(visop_w2_vld), .clk(clk), .q(visop_w3), .si(), .so(), .se(se));
|
244 |
|
|
|
245 |
|
|
assign issue_visop_e = visop_e | visop & rollback_c3;
|
246 |
|
|
// only check kills in w since they are accumulated into kill_w
|
247 |
|
|
assign visop_w_vld = visop_w & ~kill_w;
|
248 |
|
|
assign visop_w2_vld = visop_w2 & ~flush_w2 & ~rollback_rs2_w2;
|
249 |
|
|
assign visop_w3_vld = visop_w3 & ~ue_trap_w3 & ~rollback_rs1_w3;
|
250 |
|
|
|
251 |
|
|
assign vis_result = visop_w3_vld;
|
252 |
|
|
assign vis_wen_next = vis_result & ~siam & ~alignaddr;
|
253 |
|
|
|
254 |
|
|
////////////////////////////////////
|
255 |
|
|
// Decode opf
|
256 |
|
|
////////////////////////////////////
|
257 |
|
|
assign add = ~opf[8] & ~opf[7] & opf[6] & ~opf[5] & opf[4] & ~opf[3];
|
258 |
|
|
assign align = ~opf[8] & ~opf[7] & opf[6] & ~opf[5] & ~opf[4] & opf[3] & ~opf[2] & ~opf[1] & ~opf[0];
|
259 |
|
|
assign logic = ~opf[8] & ~opf[7] & opf[6] & opf[5];
|
260 |
|
|
assign siam = ~opf[8] & opf[7] & ~opf[6] & ~opf[5] & ~opf[4] & ~opf[3] & ~opf[2] & ~opf[1] & opf[0];
|
261 |
|
|
assign alignaddr = ~opf[8] & ~opf[7] & ~opf[6] & ~opf[5] & opf[4] & opf[3] & ~opf[2] & ~opf[0]; //alignaddress
|
262 |
|
|
|
263 |
|
|
assign illegal_vis_e = (visop_e & ~(add | align | logic | siam | alignaddr) |
|
264 |
|
|
illegal_rs1_e | illegal_rs2_e | illegal_siam_e);
|
265 |
|
|
assign rs1_check_nonzero_e = visop_e & (siam | (logic & (opf_log_zero | opf_log_one | opf_log_src2 | opf_log_not2)));
|
266 |
|
|
assign rs2_check_nonzero_e = visop_e & logic & (opf_log_zero | opf_log_one | opf_log_src1 | opf_log_not1);
|
267 |
|
|
assign illegal_rs1_e = (frs1_e[4:0] != 5'b00000) & rs1_check_nonzero_e;
|
268 |
|
|
assign illegal_rs2_e = (frs2_e[4:0] != 5'b00000) & rs2_check_nonzero_e;
|
269 |
|
|
assign illegal_siam_e = ((frd_e[4:0] != 5'b00000) | frs2_e[4] | frs2_e[3]) & siam & visop_e;
|
270 |
|
|
|
271 |
|
|
assign vis_nofrf_e = visop_e & (siam | alignaddr | opf_log_zero | opf_log_one);
|
272 |
|
|
|
273 |
|
|
// controls for add
|
274 |
|
|
// Make subtract come out of its own flop for loading purposes (very critical timing)
|
275 |
|
|
dff_s sub_dff(.din(opf[2]), .clk(clk), .q(ctl_vis_subtract), .se(se), .si(), .so());
|
276 |
|
|
assign ctl_vis_cin = opf[2];
|
277 |
|
|
assign ctl_vis_add32 = opf[1];
|
278 |
|
|
|
279 |
|
|
// controls for logic
|
280 |
|
|
assign opf_log_zero = ~opf[4] & ~opf[3] & ~opf[2] & ~opf[1];
|
281 |
|
|
assign opf_log_nor = ~opf[4] & ~opf[3] & ~opf[2] & opf[1];
|
282 |
|
|
assign opf_log_andnot2 = ~opf[4] & ~opf[3] & opf[2] & ~opf[1];
|
283 |
|
|
assign opf_log_not2 = ~opf[4] & ~opf[3] & opf[2] & opf[1];
|
284 |
|
|
assign opf_log_andnot1 = ~opf[4] & opf[3] & ~opf[2] & ~opf[1];
|
285 |
|
|
assign opf_log_not1 = ~opf[4] & opf[3] & ~opf[2] & opf[1];
|
286 |
|
|
assign opf_log_xor = ~opf[4] & opf[3] & opf[2] & ~opf[1];
|
287 |
|
|
assign opf_log_nand = ~opf[4] & opf[3] & opf[2] & opf[1];
|
288 |
|
|
assign opf_log_and = opf[4] & ~opf[3] & ~opf[2] & ~opf[1];
|
289 |
|
|
assign opf_log_xnor = opf[4] & ~opf[3] & ~opf[2] & opf[1];
|
290 |
|
|
assign opf_log_src1 = opf[4] & ~opf[3] & opf[2] & ~opf[1];
|
291 |
|
|
assign opf_log_ornot2 = opf[4] & ~opf[3] & opf[2] & opf[1];
|
292 |
|
|
assign opf_log_src2 = opf[4] & opf[3] & ~opf[2] & ~opf[1];
|
293 |
|
|
assign opf_log_ornot1 = opf[4] & opf[3] & ~opf[2] & opf[1];
|
294 |
|
|
assign opf_log_or = opf[4] & opf[3] & opf[2] & ~opf[1];
|
295 |
|
|
assign opf_log_one = opf[4] & opf[3] & opf[2] & opf[1];
|
296 |
|
|
|
297 |
|
|
// selects for logic mux
|
298 |
|
|
assign ctl_vis_log_sel_nand = opf_log_or | opf_log_nand | opf_log_ornot1 | opf_log_ornot2;
|
299 |
|
|
assign ctl_vis_log_sel_xor = opf_log_xor | opf_log_xnor;
|
300 |
|
|
assign ctl_vis_log_sel_nor = opf_log_and | opf_log_nor | opf_log_andnot1 | opf_log_andnot2;
|
301 |
|
|
assign ctl_vis_log_sel_pass = (opf_log_zero | opf_log_one | opf_log_src1 | opf_log_src2 |
|
302 |
|
|
opf_log_not1 | opf_log_not2);
|
303 |
|
|
|
304 |
|
|
assign invert_rs1_next = (opf_log_not1 | opf_log_or | opf_log_and |
|
305 |
|
|
opf_log_ornot2 | opf_log_andnot2);
|
306 |
|
|
assign invert_rs2_next = (opf_log_not2 | opf_log_or | opf_log_and |
|
307 |
|
|
opf_log_ornot1 | opf_log_andnot1 | opf_log_xnor);
|
308 |
|
|
dff_s invert_rs1_dff(.din(invert_rs1_next), .clk(clk), .q(ctl_vis_log_invert_rs1),
|
309 |
|
|
.se(se), .si(), .so());
|
310 |
|
|
dff_s invert_rs2_dff(.din(invert_rs2_next), .clk(clk), .q(ctl_vis_log_invert_rs2),
|
311 |
|
|
.se(se), .si(), .so());
|
312 |
|
|
// precalculate to help timing
|
313 |
|
|
assign log_pass_rs1_next = opf_log_src1 | opf_log_not1;
|
314 |
|
|
assign log_pass_rs2_next = opf_log_src2 | opf_log_not2;
|
315 |
|
|
dff_s #(2) log_pass_dff(.din({log_pass_rs1_next,log_pass_rs2_next}), .clk(clk),
|
316 |
|
|
.q({log_pass_rs1,log_pass_rs2}), .se(se), .si(), .so());
|
317 |
|
|
|
318 |
|
|
assign ctl_vis_log_pass_rs1 = log_pass_rs1;
|
319 |
|
|
assign ctl_vis_log_pass_rs2 = log_pass_rs2 & ~log_pass_rs1;
|
320 |
|
|
assign ctl_vis_log_constant = opf_log_one;
|
321 |
|
|
assign ctl_vis_log_pass_const = ~(ctl_vis_log_pass_rs1 | ctl_vis_log_pass_rs2);
|
322 |
|
|
|
323 |
|
|
// controls for falign
|
324 |
|
|
assign ctl_vis_align0 = ~gsr_align_d1[2] & ~gsr_align_d1[1];
|
325 |
|
|
assign ctl_vis_align2 = ~gsr_align_d1[2] & gsr_align_d1[1];
|
326 |
|
|
assign ctl_vis_align4 = gsr_align_d1[2] & ~gsr_align_d1[1];
|
327 |
|
|
assign ctl_vis_align6 = gsr_align_d1[2] & gsr_align_d1[1];
|
328 |
|
|
assign ctl_vis_align_odd = gsr_align_d1[0];
|
329 |
|
|
|
330 |
|
|
// controls for output mux
|
331 |
|
|
assign ctl_vis_sel_add = add;
|
332 |
|
|
assign ctl_vis_sel_align = align;
|
333 |
|
|
assign ctl_vis_sel_log = ~(add | align);
|
334 |
|
|
|
335 |
|
|
///////////////////////////////////////////////////////////
|
336 |
|
|
// GSR.alignaddr_offset, GSR.IM, GSR.IRND
|
337 |
|
|
///////////////////////////////////////////////////////////
|
338 |
|
|
|
339 |
|
|
mux4ds #(6) curr_gsr_mux(.dout({gsr_rnd[2:0], gsr_align[2:0]}),
|
340 |
|
|
.in0({t0_gsr_rnd[2:0], t0_gsr_align[2:0]}),
|
341 |
|
|
.in1({t1_gsr_rnd[2:0], t1_gsr_align[2:0]}),
|
342 |
|
|
.in2({t2_gsr_rnd[2:0], t2_gsr_align[2:0]}),
|
343 |
|
|
.in3({t3_gsr_rnd[2:0], t3_gsr_align[2:0]}),
|
344 |
|
|
.sel0(thr_fp[0]),
|
345 |
|
|
.sel1(thr_fp[1]),
|
346 |
|
|
.sel2(thr_fp[2]),
|
347 |
|
|
.sel3(thr_fp[3]));
|
348 |
|
|
mux4ds #(6) gsr_e_mux(.dout({gsr_rnd_e[2:0], gsr_align_e[2:0]}),
|
349 |
|
|
.in0({t0_gsr_rnd[2:0], t0_gsr_align[2:0]}),
|
350 |
|
|
.in1({t1_gsr_rnd[2:0], t1_gsr_align[2:0]}),
|
351 |
|
|
.in2({t2_gsr_rnd[2:0], t2_gsr_align[2:0]}),
|
352 |
|
|
.in3({t3_gsr_rnd[2:0], t3_gsr_align[2:0]}),
|
353 |
|
|
.sel0(ctl_dp_thr_e[0]),
|
354 |
|
|
.sel1(ctl_dp_thr_e[1]),
|
355 |
|
|
.sel2(ctl_dp_thr_e[2]),
|
356 |
|
|
.sel3(ctl_dp_thr_e[3]));
|
357 |
|
|
dff_s #(43) gsr_e2m(.din({dp_ctl_gsr_mask_e[31:0],gsr_rnd_e[2:0],
|
358 |
|
|
dp_ctl_gsr_scale_e[4:0],gsr_align_e[2:0]}), .clk(clk),
|
359 |
|
|
.q({ffu_exu_rsr_data_hi_m[31:0],ffu_exu_rsr_data_mid_m[2:0], ffu_exu_rsr_data_lo_m[7:0]}),
|
360 |
|
|
.se(se), .si(), .so());
|
361 |
|
|
dff_s #(3) gsr_align_dff(.din(gsr_align[2:0]), .clk(clk), .q(gsr_align_d1[2:0]), .se(se), .si(), .so());
|
362 |
|
|
|
363 |
|
|
// put in to help timing for sending to lsu
|
364 |
|
|
dff_s #(2) fpu_rnd_dff(.din(fpu_rnd_next[1:0]), .clk(clk), .q(fpu_rnd[1:0]), .si(), .so(), .se(se));
|
365 |
|
|
assign fpu_rnd_next[1:0] = (gsr_rnd[2])? gsr_rnd[1:0]: dp_ctl_fsr_rnd[1:0];
|
366 |
|
|
|
367 |
|
|
// if alignaddress_little then write the 2's complement
|
368 |
|
|
assign align_addr_data_w2[2:0] = (opf[1])? (~wgsr_align_offset_w2[2:0] + 3'b001):
|
369 |
|
|
wgsr_align_offset_w2[2:0];
|
370 |
|
|
|
371 |
|
|
assign gsr_addr_d = (ifu_tlu_sraddr_d[6:0] == 7'b0010011);
|
372 |
|
|
assign wgsr_e = exu_ffu_wsr_inst_e & gsr_addr_e;
|
373 |
|
|
dff_s gsr_addr_d2e(.din(gsr_addr_d), .clk(clk), .q(gsr_addr_e), .se(se), .si(), .so());
|
374 |
|
|
|
375 |
|
|
// need independent kill checks because this isn't killed by new fpop
|
376 |
|
|
assign wgsr_vld_m = wgsr_m & ~(thr_match_mw2 & flush_w2);
|
377 |
|
|
assign wgsr_vld_w = wgsr_w & ifu_tlu_inst_vld_w & ~(thr_match_ww2 & flush_w2);
|
378 |
|
|
assign wgsr_vld_w2 = wgsr_w2 & ~flush_w2;
|
379 |
|
|
dff_s wgsr_e2m(.din(wgsr_e), .clk(clk), .q(wgsr_m), .si(), .so(), .se(se));
|
380 |
|
|
dff_s wgsr_m2w(.din(wgsr_vld_m), .clk(clk), .q(wgsr_w), .si(), .so(), .se(se));
|
381 |
|
|
dff_s wgsr_w2w2(.din(wgsr_vld_w), .clk(clk), .q(wgsr_w2), .si(), .so(), .se(se));
|
382 |
|
|
|
383 |
|
|
assign thr_w2[3] = (tid_w2[1:0] == 2'b11);
|
384 |
|
|
assign thr_w2[2] = (tid_w2[1:0] == 2'b10);
|
385 |
|
|
assign thr_w2[1] = (tid_w2[1:0] == 2'b01);
|
386 |
|
|
assign thr_w2[0] = (tid_w2[1:0] == 2'b00);
|
387 |
|
|
assign ctl_dp_thr_e[3] = (tid_e[1:0] == 2'b11);
|
388 |
|
|
assign ctl_dp_thr_e[2] = (tid_e[1:0] == 2'b10);
|
389 |
|
|
assign ctl_dp_thr_e[1] = (tid_e[1:0] == 2'b01);
|
390 |
|
|
assign ctl_dp_thr_e[0] = (tid_e[1:0] == 2'b00);
|
391 |
|
|
assign thr_fp[3] = (tid[1:0] == 2'b11);
|
392 |
|
|
assign thr_fp[2] = (tid[1:0] == 2'b10);
|
393 |
|
|
assign thr_fp[1] = (tid[1:0] == 2'b01);
|
394 |
|
|
assign thr_fp[0] = (tid[1:0] == 2'b00);
|
395 |
|
|
|
396 |
|
|
assign t0_siam_w2 = thr_fp[0] & siam & visop_w2_vld;
|
397 |
|
|
assign t0_gsr_wsr_w2 = thr_w2[0] & wgsr_vld_w2;
|
398 |
|
|
assign t0_alignaddr_w2 = thr_fp[0] & alignaddr & visop_w2_vld;
|
399 |
|
|
assign t0_rnd_wen_l = ~(t0_gsr_wsr_w2 | t0_siam_w2);
|
400 |
|
|
assign t0_align_wen_l = ~(t0_gsr_wsr_w2 | t0_alignaddr_w2);
|
401 |
|
|
assign t1_siam_w2 = thr_fp[1] & siam & visop_w2_vld;
|
402 |
|
|
assign t1_gsr_wsr_w2 = thr_w2[1] & wgsr_vld_w2;
|
403 |
|
|
assign t1_alignaddr_w2 = thr_fp[1] & alignaddr & visop_w2_vld;
|
404 |
|
|
assign t1_rnd_wen_l = ~(t1_gsr_wsr_w2 | t1_siam_w2);
|
405 |
|
|
assign t1_align_wen_l = ~(t1_gsr_wsr_w2 | t1_alignaddr_w2);
|
406 |
|
|
assign t2_siam_w2 = thr_fp[2] & siam & visop_w2_vld;
|
407 |
|
|
assign t2_gsr_wsr_w2 = thr_w2[2] & wgsr_vld_w2;
|
408 |
|
|
assign t2_alignaddr_w2 = thr_fp[2] & alignaddr & visop_w2_vld;
|
409 |
|
|
assign t2_rnd_wen_l = ~(t2_gsr_wsr_w2 | t2_siam_w2);
|
410 |
|
|
assign t2_align_wen_l = ~(t2_gsr_wsr_w2 | t2_alignaddr_w2);
|
411 |
|
|
assign t3_siam_w2 = thr_fp[3] & siam & visop_w2_vld;
|
412 |
|
|
assign t3_gsr_wsr_w2 = thr_w2[3] & wgsr_vld_w2;
|
413 |
|
|
assign t3_alignaddr_w2 = thr_fp[3] & alignaddr & visop_w2_vld;
|
414 |
|
|
assign t3_rnd_wen_l = ~(t3_gsr_wsr_w2 | t3_siam_w2);
|
415 |
|
|
assign t3_align_wen_l = ~(t3_gsr_wsr_w2 | t3_alignaddr_w2);
|
416 |
|
|
|
417 |
|
|
assign ctl_dp_gsr_wsr_w2[3:0] = {t3_gsr_wsr_w2,t2_gsr_wsr_w2,t1_gsr_wsr_w2,t0_gsr_wsr_w2};
|
418 |
|
|
|
419 |
|
|
// Storage flops and muxes
|
420 |
|
|
mux3ds #(3) t0_rnd_mux(.dout(t0_gsr_rnd_next[2:0]),
|
421 |
|
|
.in0(t0_gsr_rnd[2:0]),
|
422 |
|
|
.in1(wgsr_rnd_w2[2:0]),
|
423 |
|
|
.in2(siam_rnd[2:0]),
|
424 |
|
|
.sel0(t0_rnd_wen_l),
|
425 |
|
|
.sel1(t0_gsr_wsr_w2),
|
426 |
|
|
.sel2(t0_siam_w2));
|
427 |
|
|
mux3ds #(3) t0_align_mux(.dout(t0_gsr_align_next[2:0]),
|
428 |
|
|
.in0(t0_gsr_align[2:0]),
|
429 |
|
|
.in1(wgsr_align_offset_w2[2:0]),
|
430 |
|
|
.in2(align_addr_data_w2[2:0]),
|
431 |
|
|
.sel0(t0_align_wen_l),
|
432 |
|
|
.sel1(t0_gsr_wsr_w2),
|
433 |
|
|
.sel2(t0_alignaddr_w2));
|
434 |
|
|
mux3ds #(3) t1_rnd_mux(.dout(t1_gsr_rnd_next[2:0]),
|
435 |
|
|
.in0(t1_gsr_rnd[2:0]),
|
436 |
|
|
.in1(wgsr_rnd_w2[2:0]),
|
437 |
|
|
.in2(siam_rnd[2:0]),
|
438 |
|
|
.sel0(t1_rnd_wen_l),
|
439 |
|
|
.sel1(t1_gsr_wsr_w2),
|
440 |
|
|
.sel2(t1_siam_w2));
|
441 |
|
|
mux3ds #(3) t1_align_mux(.dout(t1_gsr_align_next[2:0]),
|
442 |
|
|
.in0(t1_gsr_align[2:0]),
|
443 |
|
|
.in1(wgsr_align_offset_w2[2:0]),
|
444 |
|
|
.in2(align_addr_data_w2[2:0]),
|
445 |
|
|
.sel0(t1_align_wen_l),
|
446 |
|
|
.sel1(t1_gsr_wsr_w2),
|
447 |
|
|
.sel2(t1_alignaddr_w2));
|
448 |
|
|
mux3ds #(3) t2_rnd_mux(.dout(t2_gsr_rnd_next[2:0]),
|
449 |
|
|
.in0(t2_gsr_rnd[2:0]),
|
450 |
|
|
.in1(wgsr_rnd_w2[2:0]),
|
451 |
|
|
.in2(siam_rnd[2:0]),
|
452 |
|
|
.sel0(t2_rnd_wen_l),
|
453 |
|
|
.sel1(t2_gsr_wsr_w2),
|
454 |
|
|
.sel2(t2_siam_w2));
|
455 |
|
|
mux3ds #(3) t2_align_mux(.dout(t2_gsr_align_next[2:0]),
|
456 |
|
|
.in0(t2_gsr_align[2:0]),
|
457 |
|
|
.in1(wgsr_align_offset_w2[2:0]),
|
458 |
|
|
.in2(align_addr_data_w2[2:0]),
|
459 |
|
|
.sel0(t2_align_wen_l),
|
460 |
|
|
.sel1(t2_gsr_wsr_w2),
|
461 |
|
|
.sel2(t2_alignaddr_w2));
|
462 |
|
|
mux3ds #(3) t3_rnd_mux(.dout(t3_gsr_rnd_next[2:0]),
|
463 |
|
|
.in0(t3_gsr_rnd[2:0]),
|
464 |
|
|
.in1(wgsr_rnd_w2[2:0]),
|
465 |
|
|
.in2(siam_rnd[2:0]),
|
466 |
|
|
.sel0(t3_rnd_wen_l),
|
467 |
|
|
.sel1(t3_gsr_wsr_w2),
|
468 |
|
|
.sel2(t3_siam_w2));
|
469 |
|
|
mux3ds #(3) t3_align_mux(.dout(t3_gsr_align_next[2:0]),
|
470 |
|
|
.in0(t3_gsr_align[2:0]),
|
471 |
|
|
.in1(wgsr_align_offset_w2[2:0]),
|
472 |
|
|
.in2(align_addr_data_w2[2:0]),
|
473 |
|
|
.sel0(t3_align_wen_l),
|
474 |
|
|
.sel1(t3_gsr_wsr_w2),
|
475 |
|
|
.sel2(t3_alignaddr_w2));
|
476 |
|
|
|
477 |
|
|
|
478 |
|
|
dffr_s #(6) t0_gsr_dff(.din({t0_gsr_rnd_next[2:0], t0_gsr_align_next[2:0]}), .clk(clk),
|
479 |
|
|
.q({t0_gsr_rnd[2:0], t0_gsr_align[2:0]}), .se(se),
|
480 |
|
|
.si(), .so(), .rst(reset));
|
481 |
|
|
dffr_s #(6) t1_gsr_dff(.din({t1_gsr_rnd_next[2:0], t1_gsr_align_next[2:0]}), .clk(clk),
|
482 |
|
|
.q({t1_gsr_rnd[2:0], t1_gsr_align[2:0]}), .se(se),
|
483 |
|
|
.si(), .so(), .rst(reset));
|
484 |
|
|
dffr_s #(6) t2_gsr_dff(.din({t2_gsr_rnd_next[2:0], t2_gsr_align_next[2:0]}), .clk(clk),
|
485 |
|
|
.q({t2_gsr_rnd[2:0], t2_gsr_align[2:0]}), .se(se),
|
486 |
|
|
.si(), .so(), .rst(reset));
|
487 |
|
|
dffr_s #(6) t3_gsr_dff(.din({t3_gsr_rnd_next[2:0], t3_gsr_align_next[2:0]}), .clk(clk),
|
488 |
|
|
.q({t3_gsr_rnd[2:0], t3_gsr_align[2:0]}), .se(se),
|
489 |
|
|
.si(), .so(), .rst(reset));
|
490 |
|
|
|
491 |
|
|
dffre_s #(3) siam_rnd_dff(.din(ifu_ffu_rnd_e[2:0]), .clk(clk),
|
492 |
|
|
.q(siam_rnd), .se(se), .si(), .so(),
|
493 |
|
|
.rst(reset), .en(visop_e));
|
494 |
|
|
dff_s #(3) align_offset_dff1(.din(exu_ffu_gsr_align_m[2:0]), .clk(clk),
|
495 |
|
|
.q(wgsr_align_offset_w[2:0]), .se(se), .si(), .so());
|
496 |
|
|
dff_s #(3) align_offset_dff2(.din(wgsr_align_offset_w[2:0]), .clk(clk),
|
497 |
|
|
.q(wgsr_align_offset_w2[2:0]), .se(se), .si(), .so());
|
498 |
|
|
dff_s #(3) rnd_dff1(.din(exu_ffu_gsr_rnd_m[2:0]), .clk(clk),
|
499 |
|
|
.q(wgsr_rnd_w[2:0]), .se(se), .si(), .so());
|
500 |
|
|
dff_s #(3) rnd_dff2(.din(wgsr_rnd_w[2:0]), .clk(clk),
|
501 |
|
|
.q(wgsr_rnd_w2[2:0]), .se(se), .si(), .so());
|
502 |
|
|
assign wsr_data_m[36:0] = {exu_ffu_gsr_mask_m[31:0], exu_ffu_gsr_scale_m[4:0]};
|
503 |
|
|
dff_s #(37) wsr_data_m2w(.din(wsr_data_m[36:0]), .clk(clk), .q(wsr_data_w[36:0]),
|
504 |
|
|
.se(se), .si(), .so());
|
505 |
|
|
dff_s #(37) wsr_data_w2w2(.din(wsr_data_w[36:0]), .clk(clk), .q(ctl_dp_wsr_data_w2[36:0]),
|
506 |
|
|
.se(se), .si(), .so());
|
507 |
|
|
|
508 |
|
|
|
509 |
|
|
endmodule // sparc_ffu_ctl_visctl
|