1 |
2 |
dmitryr |
// ========== Copyright Header Begin ==========================================
|
2 |
|
|
//
|
3 |
|
|
// OpenSPARC T1 Processor File: sparc_exu_eclccr.v
|
4 |
|
|
// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
|
5 |
|
|
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
|
6 |
|
|
//
|
7 |
|
|
// The above named program is free software; you can redistribute it and/or
|
8 |
|
|
// modify it under the terms of the GNU General Public
|
9 |
|
|
// License version 2 as published by the Free Software Foundation.
|
10 |
|
|
//
|
11 |
|
|
// The above named program is distributed in the hope that it will be
|
12 |
|
|
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
|
|
// General Public License for more details.
|
15 |
|
|
//
|
16 |
|
|
// You should have received a copy of the GNU General Public
|
17 |
|
|
// License along with this work; if not, write to the Free Software
|
18 |
|
|
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
19 |
|
|
//
|
20 |
|
|
// ========== Copyright Header End ============================================
|
21 |
|
|
////////////////////////////////////////////////////////////////////////
|
22 |
|
|
/*
|
23 |
|
|
// Module Name: sparc_exu_eclccr
|
24 |
|
|
// Description: 4 bit condition code registers with forwarding. Takes
|
25 |
|
|
// the e_stage result and writes on the w stage.
|
26 |
|
|
*/
|
27 |
|
|
|
28 |
|
|
module sparc_exu_eclccr (/*AUTOARG*/
|
29 |
|
|
// Outputs
|
30 |
|
|
exu_ifu_cc_d, exu_tlu_ccr0_w, exu_tlu_ccr1_w, exu_tlu_ccr2_w,
|
31 |
|
|
exu_tlu_ccr3_w,
|
32 |
|
|
// Inputs
|
33 |
|
|
clk, se, alu_xcc_e, alu_icc_e, tid_d, thrdec_d, thr_match_dm,
|
34 |
|
|
thr_match_de, tid_w, thr_w, ifu_exu_kill_e, ifu_exu_setcc_d,
|
35 |
|
|
byp_ecl_wrccr_data_w, wb_ccr_wrccr_w, wb_ccr_setcc_g,
|
36 |
|
|
divcntl_ccr_cc_w2, wb_ccr_thr_g, tlu_exu_cwpccr_update_m,
|
37 |
|
|
tlu_exu_ccr_m, ifu_exu_inst_vld_w, ifu_tlu_flush_w, early_flush_w
|
38 |
|
|
) ;
|
39 |
|
|
input clk;
|
40 |
|
|
input se;
|
41 |
|
|
input [3:0] alu_xcc_e; // condition codes from the alu
|
42 |
|
|
input [3:0] alu_icc_e;
|
43 |
|
|
input [1:0] tid_d; // thread for each stage
|
44 |
|
|
input [3:0] thrdec_d; // decoded tid_d for mux select
|
45 |
|
|
input thr_match_dm;
|
46 |
|
|
input thr_match_de;
|
47 |
|
|
input [1:0] tid_w;
|
48 |
|
|
input [3:0] thr_w; // decoded tid_w
|
49 |
|
|
input ifu_exu_kill_e;
|
50 |
|
|
input ifu_exu_setcc_d;
|
51 |
|
|
input [7:0] byp_ecl_wrccr_data_w;// for the WRCCR operation (LSBs of
|
52 |
|
|
input wb_ccr_wrccr_w; // ALU result) + wen signal
|
53 |
|
|
input wb_ccr_setcc_g;
|
54 |
|
|
input [7:0] divcntl_ccr_cc_w2;
|
55 |
|
|
input [1:0] wb_ccr_thr_g;
|
56 |
|
|
input tlu_exu_cwpccr_update_m;
|
57 |
|
|
input [7:0] tlu_exu_ccr_m;
|
58 |
|
|
input ifu_exu_inst_vld_w;
|
59 |
|
|
input ifu_tlu_flush_w;
|
60 |
|
|
input early_flush_w;
|
61 |
|
|
|
62 |
|
|
output [7:0] exu_ifu_cc_d; // condition codes for current thread
|
63 |
|
|
output [7:0] exu_tlu_ccr0_w;
|
64 |
|
|
output [7:0] exu_tlu_ccr1_w;
|
65 |
|
|
output [7:0] exu_tlu_ccr2_w;
|
66 |
|
|
output [7:0] exu_tlu_ccr3_w;
|
67 |
|
|
|
68 |
|
|
wire [7:0] partial_cc_d; // partial bypassed ccr
|
69 |
|
|
wire [7:0] alu_cc_e; // alu combined condition codes
|
70 |
|
|
wire [7:0] alu_cc_m; // m stage alu ccs
|
71 |
|
|
wire [7:0] alu_cc_w;
|
72 |
|
|
wire [7:0] exu_ifu_cc_w; // writeback data
|
73 |
|
|
wire setcc_e; // from previous stage
|
74 |
|
|
wire setcc_m;
|
75 |
|
|
wire setcc_w;
|
76 |
|
|
wire valid_setcc_e; // after comparing with kill
|
77 |
|
|
wire valid_setcc_m;
|
78 |
|
|
wire valid_setcc_w;
|
79 |
|
|
wire setcc_w2;
|
80 |
|
|
wire [7:0] ccrin_thr0;
|
81 |
|
|
wire [7:0] ccrin_thr1;
|
82 |
|
|
wire [7:0] ccrin_thr2;
|
83 |
|
|
wire [7:0] ccrin_thr3;
|
84 |
|
|
wire [7:0] ccr_d;
|
85 |
|
|
wire [7:0] ccr_thr0;
|
86 |
|
|
wire [7:0] ccr_thr1;
|
87 |
|
|
wire [7:0] ccr_thr2;
|
88 |
|
|
wire [7:0] ccr_thr3;
|
89 |
|
|
wire use_alu_cc;
|
90 |
|
|
wire use_ccr;
|
91 |
|
|
wire use_cc_e;
|
92 |
|
|
wire use_cc_m;
|
93 |
|
|
wire use_cc_w;
|
94 |
|
|
wire [1:0] tid_dxorw;
|
95 |
|
|
wire thr_match_de;
|
96 |
|
|
wire thrmatch_w;
|
97 |
|
|
wire [1:0] thr_w2;
|
98 |
|
|
wire thr0_w2;
|
99 |
|
|
wire thr1_w2;
|
100 |
|
|
wire thr2_w2;
|
101 |
|
|
wire thr3_w2;
|
102 |
|
|
wire wen_thr0_w; // write enable for each input/thread
|
103 |
|
|
wire wen_thr0_w2;
|
104 |
|
|
wire wen_thr1_w;
|
105 |
|
|
wire wen_thr1_w2;
|
106 |
|
|
wire wen_thr2_w;
|
107 |
|
|
wire wen_thr2_w2;
|
108 |
|
|
wire wen_thr3_w;
|
109 |
|
|
wire wen_thr3_w2;
|
110 |
|
|
wire wen_thr0_l; // overall write enable for each thread
|
111 |
|
|
wire wen_thr1_l;
|
112 |
|
|
wire wen_thr2_l;
|
113 |
|
|
wire wen_thr3_l;
|
114 |
|
|
wire bypass_cc_w;
|
115 |
|
|
|
116 |
|
|
wire [7:0] ccr_m;
|
117 |
|
|
|
118 |
|
|
|
119 |
|
|
// D2E flops
|
120 |
|
|
dff_s dff_setcc_d2e(.din(ifu_exu_setcc_d), .clk(clk), .q(setcc_e),
|
121 |
|
|
.se(se), .si(), .so());
|
122 |
|
|
|
123 |
|
|
// E stage
|
124 |
|
|
assign alu_cc_e = {alu_xcc_e, alu_icc_e};
|
125 |
|
|
assign valid_setcc_e = setcc_e & ~ifu_exu_kill_e;
|
126 |
|
|
|
127 |
|
|
dff_s #(8) dff_cc_e2m(.din(alu_cc_e[7:0]), .clk(clk), .q(alu_cc_m[7:0]),
|
128 |
|
|
.se(se), .si(), .so());
|
129 |
|
|
dff_s dff_setcc_e2m(.din(valid_setcc_e), .clk(clk), .q(setcc_m),
|
130 |
|
|
.se(se), .si(), .so());
|
131 |
|
|
|
132 |
|
|
// M stage
|
133 |
|
|
assign valid_setcc_m = setcc_m | tlu_exu_cwpccr_update_m;
|
134 |
|
|
mux2ds #(8) mux_ccr_m(.dout(ccr_m[7:0]),
|
135 |
|
|
.in0(alu_cc_m[7:0]),
|
136 |
|
|
.in1(tlu_exu_ccr_m[7:0]),
|
137 |
|
|
.sel0(~tlu_exu_cwpccr_update_m),
|
138 |
|
|
.sel1(tlu_exu_cwpccr_update_m));
|
139 |
|
|
|
140 |
|
|
dff_s #(8) dff_cc_m2w(.din(ccr_m[7:0]), .clk(clk), .q(alu_cc_w[7:0]),
|
141 |
|
|
.se(se), .si(), .so());
|
142 |
|
|
dff_s dff_setcc_m2w(.din(valid_setcc_m), .clk(clk), .q(setcc_w),
|
143 |
|
|
.se(se), .si(), .so());
|
144 |
|
|
|
145 |
|
|
// W stage
|
146 |
|
|
assign bypass_cc_w = ifu_exu_inst_vld_w & setcc_w;
|
147 |
|
|
assign valid_setcc_w = ~ifu_tlu_flush_w & ~early_flush_w & ifu_exu_inst_vld_w & (setcc_w | wb_ccr_wrccr_w);
|
148 |
|
|
|
149 |
|
|
// mux with wrccr
|
150 |
|
|
assign use_alu_cc = ~(wb_ccr_wrccr_w);
|
151 |
|
|
mux2ds #(8) mux_ccrin_cc(.dout(exu_ifu_cc_w[7:0]), .sel0(wb_ccr_wrccr_w),
|
152 |
|
|
.sel1(use_alu_cc),
|
153 |
|
|
.in0(byp_ecl_wrccr_data_w[7:0]),
|
154 |
|
|
.in1(alu_cc_w[7:0]));
|
155 |
|
|
|
156 |
|
|
dff_s #(3) setcc_g2w2 (.din({wb_ccr_setcc_g, wb_ccr_thr_g[1:0]}), .clk(clk),
|
157 |
|
|
.q({setcc_w2, thr_w2[1:0]}),
|
158 |
|
|
.se(se), .si(), .so());
|
159 |
|
|
|
160 |
|
|
|
161 |
|
|
/////////////////////////
|
162 |
|
|
// Storage of ccr
|
163 |
|
|
/////////////////////////
|
164 |
|
|
`ifdef FPGA_SYN_1THREAD
|
165 |
|
|
|
166 |
|
|
assign thr0_w2 = ~thr_w2[1] & ~thr_w2[0];
|
167 |
|
|
assign wen_thr0_w = (thr_w[0] & valid_setcc_w & ~wen_thr0_w2);
|
168 |
|
|
assign wen_thr0_w2 = thr0_w2 & setcc_w2;
|
169 |
|
|
assign wen_thr0_l = ~(wen_thr0_w | wen_thr0_w2);
|
170 |
|
|
// mux between cc_w, cc_w2, old value, tlu value
|
171 |
|
|
mux3ds #(8) mux_ccrin0(.dout(ccrin_thr0[7:0]), .sel0(wen_thr0_w),
|
172 |
|
|
.sel1(wen_thr0_w2), .sel2(wen_thr0_l),
|
173 |
|
|
.in0(exu_ifu_cc_w[7:0]),
|
174 |
|
|
.in1(divcntl_ccr_cc_w2[7:0]), .in2(ccr_thr0[7:0]));
|
175 |
|
|
// store new value
|
176 |
|
|
dff_s #(8) dff_ccr_thr0(.din(ccrin_thr0[7:0]), .clk(clk), .q(ccr_thr0[7:0]),
|
177 |
|
|
.se(se), .si(), .so());
|
178 |
|
|
assign ccr_d[7:0] = ccr_thr0[7:0];
|
179 |
|
|
|
180 |
|
|
`else // !`ifdef FPGA_SYN_1THREAD
|
181 |
|
|
|
182 |
|
|
// decode thr_w2 for mux select
|
183 |
|
|
assign thr0_w2 = ~thr_w2[1] & ~thr_w2[0];
|
184 |
|
|
assign thr1_w2 = ~thr_w2[1] & thr_w2[0];
|
185 |
|
|
assign thr2_w2 = thr_w2[1] & ~thr_w2[0];
|
186 |
|
|
assign thr3_w2 = thr_w2[1] & thr_w2[0];
|
187 |
|
|
// enable input for each thread
|
188 |
|
|
assign wen_thr0_w = (thr_w[0] & valid_setcc_w & ~wen_thr0_w2);
|
189 |
|
|
assign wen_thr0_w2 = thr0_w2 & setcc_w2;
|
190 |
|
|
assign wen_thr0_l = ~(wen_thr0_w | wen_thr0_w2);
|
191 |
|
|
assign wen_thr1_w = (thr_w[1] & valid_setcc_w & ~wen_thr1_w2);
|
192 |
|
|
assign wen_thr1_w2 = (thr1_w2 & setcc_w2);
|
193 |
|
|
assign wen_thr1_l = ~(wen_thr1_w | wen_thr1_w2);
|
194 |
|
|
assign wen_thr2_w = (thr_w[2] & valid_setcc_w & ~wen_thr2_w2);
|
195 |
|
|
assign wen_thr2_w2 = (thr2_w2 & setcc_w2);
|
196 |
|
|
assign wen_thr2_l = ~(wen_thr2_w | wen_thr2_w2);
|
197 |
|
|
assign wen_thr3_w = (thr_w[3] & valid_setcc_w & ~wen_thr3_w2);
|
198 |
|
|
assign wen_thr3_w2 = (thr3_w2 & setcc_w2);
|
199 |
|
|
assign wen_thr3_l = ~(wen_thr3_w | wen_thr3_w2);
|
200 |
|
|
|
201 |
|
|
// mux between cc_w, cc_w2, old value, tlu value
|
202 |
|
|
mux3ds #(8) mux_ccrin0(.dout(ccrin_thr0[7:0]), .sel0(wen_thr0_w),
|
203 |
|
|
.sel1(wen_thr0_w2), .sel2(wen_thr0_l),
|
204 |
|
|
.in0(exu_ifu_cc_w[7:0]),
|
205 |
|
|
.in1(divcntl_ccr_cc_w2[7:0]), .in2(ccr_thr0[7:0]));
|
206 |
|
|
mux3ds #(8) mux_ccrin1(.dout(ccrin_thr1[7:0]), .sel0(wen_thr1_w),
|
207 |
|
|
.sel1(wen_thr1_w2), .sel2(wen_thr1_l),
|
208 |
|
|
.in0(exu_ifu_cc_w[7:0]),
|
209 |
|
|
.in1(divcntl_ccr_cc_w2[7:0]), .in2(ccr_thr1[7:0]));
|
210 |
|
|
mux3ds #(8) mux_ccrin2(.dout(ccrin_thr2[7:0]), .sel0(wen_thr2_w),
|
211 |
|
|
.sel1(wen_thr2_w2), .sel2(wen_thr2_l),
|
212 |
|
|
.in0(exu_ifu_cc_w[7:0]),
|
213 |
|
|
.in1(divcntl_ccr_cc_w2[7:0]), .in2(ccr_thr2[7:0]));
|
214 |
|
|
mux3ds #(8) mux_ccrin3(.dout(ccrin_thr3[7:0]), .sel0(wen_thr3_w),
|
215 |
|
|
.sel1(wen_thr3_w2), .sel2(wen_thr3_l),
|
216 |
|
|
.in0(exu_ifu_cc_w[7:0]),
|
217 |
|
|
.in1(divcntl_ccr_cc_w2[7:0]), .in2(ccr_thr3[7:0]));
|
218 |
|
|
|
219 |
|
|
// store new value
|
220 |
|
|
dff_s #(8) dff_ccr_thr0(.din(ccrin_thr0[7:0]), .clk(clk), .q(ccr_thr0[7:0]),
|
221 |
|
|
.se(se), .si(), .so());
|
222 |
|
|
dff_s #(8) dff_ccr_thr1(.din(ccrin_thr1[7:0]), .clk(clk), .q(ccr_thr1[7:0]),
|
223 |
|
|
.se(se), .si(), .so());
|
224 |
|
|
dff_s #(8) dff_ccr_thr2(.din(ccrin_thr2[7:0]), .clk(clk), .q(ccr_thr2[7:0]),
|
225 |
|
|
.se(se), .si(), .so());
|
226 |
|
|
dff_s #(8) dff_ccr_thr3(.din(ccrin_thr3[7:0]), .clk(clk), .q(ccr_thr3[7:0]),
|
227 |
|
|
.se(se), .si(), .so());
|
228 |
|
|
|
229 |
|
|
|
230 |
|
|
// mux between the 4 sets of ccrs
|
231 |
|
|
mux4ds #(8) mux_ccr_out(.dout(ccr_d[7:0]), .sel0(thrdec_d[0]),
|
232 |
|
|
.sel1(thrdec_d[1]), .sel2(thrdec_d[2]),
|
233 |
|
|
.sel3(thrdec_d[3]), .in0(ccr_thr0[7:0]),
|
234 |
|
|
.in1(ccr_thr1[7:0]), .in2(ccr_thr2[7:0]),
|
235 |
|
|
.in3(ccr_thr3[7:0]));
|
236 |
|
|
`endif // !`ifdef FPGA_SYN_1THREAD
|
237 |
|
|
|
238 |
|
|
// bypass the ccs to the output. Only alu result needs to be bypassed
|
239 |
|
|
assign exu_ifu_cc_d[7:0] = (use_cc_e)? alu_cc_e[7:0]: partial_cc_d[7:0];
|
240 |
|
|
mux3ds #(8) mux_ccr_bypass1(.dout(partial_cc_d[7:0]),
|
241 |
|
|
.sel0(use_ccr),
|
242 |
|
|
.sel1(use_cc_m),
|
243 |
|
|
.sel2(use_cc_w),
|
244 |
|
|
.in0(ccr_d[7:0]),
|
245 |
|
|
.in1(alu_cc_m[7:0]),
|
246 |
|
|
.in2(alu_cc_w[7:0]));
|
247 |
|
|
|
248 |
|
|
assign use_cc_e = valid_setcc_e & thr_match_de;
|
249 |
|
|
assign use_cc_m = setcc_m & thr_match_dm;
|
250 |
|
|
assign use_cc_w = bypass_cc_w & thrmatch_w & ~use_cc_m;
|
251 |
|
|
assign use_ccr = ~(use_cc_m | use_cc_w);
|
252 |
|
|
|
253 |
|
|
assign tid_dxorw = tid_w ^ tid_d;
|
254 |
|
|
|
255 |
|
|
assign thrmatch_w = ~(tid_dxorw[1] | tid_dxorw[0]);
|
256 |
|
|
|
257 |
|
|
// generate ccr_w for the tlu
|
258 |
|
|
assign exu_tlu_ccr0_w[7:0] = ccr_thr0[7:0];
|
259 |
|
|
assign exu_tlu_ccr1_w[7:0] = ccr_thr1[7:0];
|
260 |
|
|
assign exu_tlu_ccr2_w[7:0] = ccr_thr2[7:0];
|
261 |
|
|
assign exu_tlu_ccr3_w[7:0] = ccr_thr3[7:0];
|
262 |
|
|
|
263 |
|
|
|
264 |
|
|
endmodule // sparc_exu_eclccr
|