OpenCores
URL https://opencores.org/ocsvn/sparc64soc/sparc64soc/trunk

Subversion Repositories sparc64soc

[/] [sparc64soc/] [trunk/] [T1-CPU/] [lsu/] [lsu_stb_rwctl.v] - Blame information for rev 2

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 dmitryr
// ========== Copyright Header Begin ==========================================
2
// 
3
// OpenSPARC T1 Processor File: lsu_stb_rwctl.v
4
// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
5
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
6
// 
7
// The above named program is free software; you can redistribute it and/or
8
// modify it under the terms of the GNU General Public
9
// License version 2 as published by the Free Software Foundation.
10
// 
11
// The above named program is distributed in the hope that it will be 
12
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
// General Public License for more details.
15
// 
16
// You should have received a copy of the GNU General Public
17
// License along with this work; if not, write to the Free Software
18
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19
// 
20
// ========== Copyright Header End ============================================
21
///////////////////////////////////////////////////////////////////
22
/*
23
//  Description:  Control for Unified STB CAM/DATA of LSU
24
*/
25
////////////////////////////////////////////////////////////////////////
26
// Global header file includes
27
////////////////////////////////////////////////////////////////////////
28
`include  "sys.h" // system level definition file which contains the 
29
          // time scale definition
30
 
31
`include "iop.h"
32
 
33
////////////////////////////////////////////////////////////////////////
34
// Local header file includes / local defines
35
////////////////////////////////////////////////////////////////////////
36
 
37
module lsu_stb_rwctl (/*AUTOARG*/
38
   // Outputs
39
   so, lsu_stbctl_flush_pipe_w, stb_cam_wr_no_ivld_m,
40
   ld_rawp_st_ced_w2, stb_data_wr_ptr, stb_data_wptr_vld,
41
   stb_data_rd_ptr, stb_data_rptr_vld, stb_wdata_ramd_b75_b64,
42
   stb_cam_cm_tid, stb_ldst_byte_msk, stb_ldst_byte_msk_min,
43
   stb_cam_rw_ptr, stb_cam_wptr_vld, stb_cam_rptr_vld,
44
   lsu_st_sz_bhww_m, lsu_st_sz_dw_m, lsu_st_sz_bhw_m,
45
   lsu_st_sz_wdw_m, lsu_st_sz_b_m, lsu_st_sz_w_m, lsu_st_sz_hw_m,
46
   lsu_st_sz_hww_m, ld_rawp_st_ackid_w2, stb_flush_st_g,
47
   stb_cam_wvld_m, lsu_st_rq_type_m, lsu_stb_data_early_sel_e,
48
   lsu_stb_data_final_sel_m, lsu_ldquad_inst_m, stb_thrd_en_g,
49
   flsh_inst_m, lsu_stb_va_m, lsu_stb_empty_buf, lsu_spu_stb_empty,
50
   ifu_tlu_inst_vld_m_bf1, ifu_tlu_inst_vld_m_bf2, lsu_ifu_stbcnt0,
51
   lsu_ifu_stbcnt1, lsu_ifu_stbcnt2, lsu_ifu_stbcnt3,
52
   lsu_ffu_stb_full0, lsu_ffu_stb_full1, lsu_ffu_stb_full2,
53
   lsu_ffu_stb_full3,
54
   // Inputs
55
   rclk, rst_tri_en, si, se, ld_inst_vld_e, ldst_sz_e, st_inst_vld_e,
56
   stb_pcx_rptr0, stb_wrptr0, stb_pcx_rptr1, stb_wrptr1,
57
   stb_pcx_rptr2, stb_wrptr2, stb_pcx_rptr3, stb_wrptr3,
58
   stb_cam_hit_ptr, stb_cam_hit, lsu_ldst_va_m, sta_internal_m,
59
   ifu_tlu_thrid_e, tlu_exu_early_flush_pipe_w, lsu_ttype_vld_m2,
60
   ifu_lsu_flush_w, lsu_defr_trp_taken_g, ifu_lsu_casa_e,
61
   ifu_lsu_ldstub_e, ifu_lsu_swap_e, ifu_lsu_ldst_dbl_e,
62
   stb_state_ced0, stb_state_ced1, stb_state_ced2, stb_state_ced3,
63
   stb_ld_full_raw, stb_ld_partial_raw, stb_wrptr0_prev,
64
   stb_wrptr1_prev, stb_wrptr2_prev, stb_wrptr3_prev,
65
   ifu_lsu_alt_space_e, ifu_lsu_ldst_fp_e, lsu_quad_asi_e,
66
   lsu_st_rmo_m, lsu_bst_in_pipe_m, ffu_lsu_kill_fst_w,
67
   ffu_lsu_blk_st_e, ffu_lsu_blk_st_tid_m, ffu_lsu_blk_st_va_e,
68
   lsu_snap_blk_st_m, tlb_pgnum_b39_g, lsu_stb_empty,
69
   ifu_tlu_flsh_inst_e, stb_cam_mhit, ifu_tlu_inst_vld_m,
70
   lsu_st_pcx_rq_pick, lsu_st_pcx_rq_vld, stb_rdata_ramc_b8t0,
71
   lsu_stbcnt0, lsu_stbcnt1, lsu_stbcnt2, lsu_stbcnt3
72
   ) ;
73
 
74
input     rclk ;
75
//input     grst_l ;   
76
//input     arst_l ;   
77
   input  rst_tri_en;
78
 
79
   input  si;
80
   input  se;
81
   output so;
82
 
83
 
84
input     ld_inst_vld_e ;   // load in pipe.
85
input [1:0]   ldst_sz_e ;   // size of load.
86
input     st_inst_vld_e ;   // store in pipe.
87
// Currently bypass flop make request 
88
//input [3:0]   pcx_rq_for_stb ;  // pcx request rd of dfq - threaded
89
//input [2:0]   stb_dfq_rptr0 ;   // dfq rptr for stb0
90
input [2:0]   stb_pcx_rptr0 ;   // pcx rptr for stb0
91
input [2:0]   stb_wrptr0 ;    // wrt ptr - stb0
92
//input [2:0]   stb_dfq_rptr1 ;   // dfq rptr for stb1
93
input [2:0]   stb_pcx_rptr1 ;   // pcx rptr for stb1
94
input [2:0]   stb_wrptr1 ;    // wrt ptr - stb1
95
//input [2:0]   stb_dfq_rptr2 ;   // dfq rptr for stb2
96
input [2:0]   stb_pcx_rptr2 ;   // pcx rptr for stb2
97
input [2:0]   stb_wrptr2 ;    // wrt ptr - stb2
98
//input [2:0]   stb_dfq_rptr3 ;   // dfq rptr for stb3
99
input [2:0]   stb_pcx_rptr3 ;   // pcx rptr for stb3
100
input [2:0]   stb_wrptr3 ;    // wrt ptr - stb3
101
input [2:0]     stb_cam_hit_ptr ; // entry which hit
102
input     stb_cam_hit ;   // hit has occurred
103
//input [7:0]     stb_state_vld0 ;  // valid bits - stb0
104
//input [7:0]     stb_state_vld1 ;  // valid bits - stb1
105
//input [7:0]     stb_state_vld2 ;  // valid bits - stb2
106
//input [7:0]     stb_state_vld3 ;  // valid bits - stb3
107
input [9:0]    lsu_ldst_va_m ;
108
input     sta_internal_m ;   // internal stxa
109
input [1:0]   ifu_tlu_thrid_e ; // thread-id.
110
 
111
//   output     lsu_stbrwctl_flush_pipe_w ;  // tmp for tso_mon
112
   input      tlu_exu_early_flush_pipe_w;
113
   input      lsu_ttype_vld_m2;
114
 
115
   input      ifu_lsu_flush_w;
116
   input      lsu_defr_trp_taken_g;
117
   output     lsu_stbctl_flush_pipe_w;
118
 
119
 
120
input                   ifu_lsu_casa_e ;        // compare-swap instr
121
input                   ifu_lsu_ldstub_e ;      // ldstub
122
input                   ifu_lsu_swap_e ;        // swap
123
input     ifu_lsu_ldst_dbl_e; // ldst dbl, specifically for stquad.
124
//input   [63:0]          lsu_stb_st_data_g ;     // data to be written to stb
125
input [7:0]   stb_state_ced0 ;
126
input [7:0]   stb_state_ced1 ;
127
input [7:0]   stb_state_ced2 ;
128
input [7:0]   stb_state_ced3 ;
129
input [7:0]   stb_ld_full_raw ;
130
input [7:0]   stb_ld_partial_raw ;
131
input   [2:0]   stb_wrptr0_prev ;
132
input   [2:0]   stb_wrptr1_prev ;
133
input   [2:0]     stb_wrptr2_prev ;
134
input   [2:0]   stb_wrptr3_prev ;
135
input     ifu_lsu_alt_space_e ; // alt_space inst
136
input     ifu_lsu_ldst_fp_e ;
137
//input     tlb_cam_hit ;   // tlb cam hit - mstage
138
input     lsu_quad_asi_e ;  // quad ldst asi
139
//input  [3:0]      lsu_st_ack_rq_stb ;
140
//input     lsu_dtlb_bypass_e ;
141
input   lsu_st_rmo_m ;  // rmo st in m cycle.
142
input   lsu_bst_in_pipe_m ;     // 1st helper for bst.
143
input           ffu_lsu_kill_fst_w ;    // ecc error on st.
144
input           ffu_lsu_blk_st_e ;      // blk st helper signalled by ffu
145
input   [1:0]    ffu_lsu_blk_st_tid_m ;  // blk st tid - from ffu_lsu_data
146
input   [5:3]   ffu_lsu_blk_st_va_e ;   // bits 5:3 of va from increment
147
input           lsu_snap_blk_st_m ;             // snap blk st state
148
input           tlb_pgnum_b39_g ;
149
 
150
input   [3:0]   lsu_stb_empty ;         // thread's stb is empty
151
input           ifu_tlu_flsh_inst_e;
152
input           stb_cam_mhit ;
153
input           ifu_tlu_inst_vld_m ;
154
//input   [3:0]   lsu_st_pcx_rq_kill_w2 ;
155
 
156
input [3:0]   lsu_st_pcx_rq_pick ;
157
 
158
input         lsu_st_pcx_rq_vld ;
159
 
160
input   [8:0]    stb_rdata_ramc_b8t0 ;   // scan-only
161
 
162
output          stb_cam_wr_no_ivld_m ;
163
 
164
//output      ld_rawp_st_ced_g ;
165
output      ld_rawp_st_ced_w2 ;
166
output  [4:0]   stb_data_wr_ptr ; // write ptr - stb data
167
output      stb_data_wptr_vld ; // wr vld for stb data
168
output  [4:0]   stb_data_rd_ptr ; // rd ptr for stb data
169
output      stb_data_rptr_vld ; // rptr vld for stb data
170
output  [75:64]    stb_wdata_ramd_b75_b64 ;  // write data for DATA RAM. 
171
 
172
// partial or full raw required
173
output  [1:0]   stb_cam_cm_tid ;  // cam tid - stb cam
174
//output  [7:0]   stb_cam_sqsh_msk ;  // squash spurious hits
175
//output      stb_cam_vld ;
176
output  [7:0]   stb_ldst_byte_msk ; // byte mask for write/cam
177
output  [7:0]   stb_ldst_byte_msk_min ; // byte mask for write/cam for min path
178
 
179
//output  [3:0]   stb_rd_for_pcx_sel ;    // stb's st selected for read for pcx
180
output  [4:0]   stb_cam_rw_ptr ;        // rw ptr for shared stb cam port
181
output          stb_cam_wptr_vld ;      // wr vld for stb write   
182
output          stb_cam_rptr_vld ;      // rd vld for stb write   
183
 
184
 
185
//output      lsu_stb_pcx_rvld_d1 ; // stb has been read-delayby1cycle
186
//output      lsu_stb_dfq_rvld ;  // wr to dfq stb bypass ff
187
 
188
output                  lsu_st_sz_bhww_m ;      // byte or hword or word
189
output                  lsu_st_sz_dw_m ;        // double word
190
output                  lsu_st_sz_bhw_m ;       // byte or hword
191
output                  lsu_st_sz_wdw_m ;       // word or dword
192
output                  lsu_st_sz_b_m ;         // byte
193
output                  lsu_st_sz_w_m ;         // word
194
output                  lsu_st_sz_hw_m ;        // hword
195
output                  lsu_st_sz_hww_m ;       // hword or word
196
 
197
//output     ld_stb_full_raw_g ;
198
//output     ld_stb_partial_raw_g ;
199
//output  [3:0]   ld_stb_full_raw_g ;
200
//output  [3:0]   ld_stb_partial_raw_g ;
201
 
202
output  [2:0]   ld_rawp_st_ackid_w2 ;
203
 
204
//output  [2:0]   stb_dfq_rd_id ;   // stb entry being read for current thread for current thread
205
 
206
output  [3:0]     stb_flush_st_g ;  // st is flushed in cycle g
207
output  [3:0]     stb_cam_wvld_m ;
208
 
209
output  [2:1]   lsu_st_rq_type_m ;
210
 
211
output  [3:0]   lsu_stb_data_early_sel_e ;// select source of stb data.
212
output      lsu_stb_data_final_sel_m ;// select source of stb data.
213
 
214
output      lsu_ldquad_inst_m ; // stquad inst
215
//output      lsu_stdbl_inst_m ;  // stdbl inst
216
 
217
//output  [1:0]   lsu_stb_rd_tid ;  // thread for which stb read occurs
218
 
219
output  [3:0]    stb_thrd_en_g ; // thread id for current stb access
220
 
221
   output     flsh_inst_m;
222
 
223
   output [9:3] lsu_stb_va_m;
224
 
225
output  [3:0]    lsu_stb_empty_buf ;
226
output  [3:0]    lsu_spu_stb_empty ;
227
 
228
   output     ifu_tlu_inst_vld_m_bf1;
229
   output     ifu_tlu_inst_vld_m_bf2;
230
 
231
   input [3:0] lsu_stbcnt0;
232
   input [3:0] lsu_stbcnt1;
233
   input [3:0] lsu_stbcnt2;
234
   input [3:0] lsu_stbcnt3;
235
 
236
   output [3:0] lsu_ifu_stbcnt0;
237
   output [3:0] lsu_ifu_stbcnt1;
238
   output [3:0] lsu_ifu_stbcnt2;
239
   output [3:0] lsu_ifu_stbcnt3;
240
 
241
   output       lsu_ffu_stb_full0;
242
   output       lsu_ffu_stb_full1;
243
   output       lsu_ffu_stb_full2;
244
   output       lsu_ffu_stb_full3;
245
 
246
/*AUTOWIRE*/
247
// Beginning of automatic wires (for undeclared instantiated-module outputs)
248
// End of automatics
249
// Beginning of automatic wires (for undeclared instantiated-module outputs)
250
// End of automatics
251
//wire  [4:0] stb_dequeue_ptr ;
252
wire  [2:0] stb_wptr_prev ;
253
wire  [1:0] st_thrid_m,st_thrid_g ;
254
wire  [7:0] ld_any_raw_vld ;
255
wire  [7:0] ld_any_raw_vld_d1 ;
256
//wire    ld_raw_mhit ;
257
wire  [2:0] st_rq_type_m,st_rq_type_g ;
258
 
259
wire  [1:0] ldst_sz_m,ldst_sz_g, pipe_ldst_sz_m ;
260
wire    ldst_byte, ldst_hwrd, ldst_word, ldst_dwrd ;
261
wire  [7:0] ldst_byte_mask ;
262
wire  [2:0] stb_wptr ;
263
wire  [1:0] thrid_m,thrid_g ;
264
wire    ld_inst_vld_m, st_inst_vld_m ;
265
 
266
wire    ldst_dbl_m;
267
wire    atomic_m ;
268
wire    ldstub_m ;
269
wire    casa_m, casa_g ;
270
wire    swap_m;
271
wire    flush_st_g ;
272
wire    cam_wptr_vld_g ;
273
wire  [2:0] cam_wptr_d1 ;
274
 
275
wire  [2:0] stb_rdptr0,stb_rdptr1 ;
276
wire  [2:0] stb_rdptr2,stb_rdptr3 ;
277
 
278
//wire  [3:0] stb_rd_mask ;
279
wire  [3:0] stb_select_rptr ;
280
wire  [1:0] stb_rd_thrid ;
281
//wire    cam_vld_g ;
282
wire  [9:0]  ldst_va_m, pipe_ldst_va_m ;
283
wire  [3:0]  ldst_va_g ;
284
wire  [2:0] cam_wr_ptr ;
285
wire  thread0_m, thread1_m, thread2_m, thread3_m ;
286
wire  thread0_g, thread1_g, thread2_g, thread3_g ;
287
wire  [2:0]   ld_rawp_stb_id ;
288
 
289
//wire  rd_for_dfq_granted ;
290
wire  [7:0] stb_state_ced,stb_state_ced_d1 ;
291
//wire    stq_wr_en ;
292
//wire  [3:0] stq_wr_en_g ;
293
//wire  [3:0] stquad_vld ;
294
//wire  [2:0] stquad_ptr0,stquad_ptr1,stquad_ptr2,stquad_ptr3 ;
295
//wire  [3:0] ld_stq_hit_g ;
296
//wire  ldq_hit_g ;
297
//wire  [3:0] ldq_hit_g ;
298
wire  ldst_fp_m;
299
wire  ldstub_e,casa_e,ldst_dbl_e;
300
//wire  stb_data_final_sel_e ;
301
wire  alt_space_e,alt_space_m ;
302
wire  quad_asi_m ;
303
//wire  stquad_e, stquad_m ;
304
wire  stdbl_e ;
305
//wire  dfq_any_rq_for_stb ;
306
//wire  [3:0]   stb_rd_for_dfq ;  // read rq for dfq - threaded
307
wire    blkst_m,blkst_g ;
308
wire    stb_not_empty ;
309
 
310
   wire       clk;
311
   assign     clk = rclk;
312
 
313
//   wire       rst_l;
314
//   wire       stb_rwctl_rst_l;
315
 
316
//   dffrl_async rstff(.din (grst_l),
317
//                     .q   (stb_rwctl_rst_l),
318
//                     .clk (clk), .se(se), .si(), .so(),
319
//                     .rst_l (arst_l));
320
 
321
//=========================================================================================
322
//  MISC
323
//=========================================================================================
324
 
325
// Scan-only flops.
326
 
327
wire    [8:0]    stb_rdata_ramc_b8t0_so ;
328
dff_s #(9)  scmscan_ff (
329
        .din    (stb_rdata_ramc_b8t0[8:0]),
330
        .q      (stb_rdata_ramc_b8t0_so[8:0]),
331
        .clk    (clk),
332
        .se   (se),       .si (),          .so ()
333
        );
334
 
335
//=========================================================================================
336
//  INST_VLD_W GENERATION
337
//=========================================================================================
338
 
339
wire    flush_w_inst_vld_m ;
340
wire    lsu_inst_vld_w ;
341
wire    lsu_stbrwctl_flush_pipe_w;
342
 
343
//=======================================
344
//instaniate buffers
345
//======================================
346
 
347
   wire   ifu_tlu_inst_vld_m_bf0;
348
 
349
bw_u1_buf_10x UZfix_ifu_tlu_inst_vld_m_bf0 ( .a(ifu_tlu_inst_vld_m), .z(ifu_tlu_inst_vld_m_bf0) );
350
bw_u1_buf_30x UZfix_ifu_tlu_inst_vld_m_bf1 ( .a(ifu_tlu_inst_vld_m_bf0), .z(ifu_tlu_inst_vld_m_bf1) );
351
bw_u1_buf_20x UZfix_ifu_tlu_inst_vld_m_bf2 ( .a(ifu_tlu_inst_vld_m_bf0), .z(ifu_tlu_inst_vld_m_bf2) );
352
 
353
assign  flush_w_inst_vld_m =
354
        ifu_tlu_inst_vld_m_bf0 &
355
        ~(lsu_stbrwctl_flush_pipe_w & (thrid_m[1:0] == thrid_g[1:0])) ; // really lsu_flush_pipe_w
356
 
357
dff_s  stgw_ivld (
358
        .din    (flush_w_inst_vld_m),
359
        .q      (lsu_inst_vld_w),
360
        .clk    (clk),
361
        .se   (se),       .si (),          .so ()
362
        );
363
 
364
   wire other_flush_pipe_w;
365
   wire tlu_early_flush_pipe_w;
366
   assign tlu_early_flush_pipe_w = tlu_exu_early_flush_pipe_w;
367
 
368
assign  other_flush_pipe_w =
369
tlu_early_flush_pipe_w | (lsu_ttype_vld_m2 & lsu_inst_vld_w) |
370
lsu_defr_trp_taken_g ;
371
 
372
   wire lsu_flush_pipe_w;
373
 
374
assign  lsu_flush_pipe_w = other_flush_pipe_w | ifu_lsu_flush_w ;
375
assign  lsu_stbctl_flush_pipe_w = lsu_flush_pipe_w ;
376
assign  lsu_stbrwctl_flush_pipe_w = lsu_flush_pipe_w ;
377
 
378
//=========================================================================================
379
//  STB Array Addr/Ctl Generation
380
//=========================================================================================
381
 
382
assign  ldstub_e = ifu_lsu_ldstub_e ;
383
assign  casa_e   = ifu_lsu_casa_e ;
384
assign  ldst_dbl_e = ifu_lsu_ldst_dbl_e ;
385
 
386
assign  alt_space_e = ifu_lsu_alt_space_e ;
387
 
388
//assign  stdbl_e =  ldst_dbl_e & (~alt_space_e | (alt_space_e & ~lsu_quad_asi_e)) ;
389
assign  stdbl_e =  ldst_dbl_e ;
390
 
391
//   wire lsu_stdbl_inst_m;
392
 
393
//dff  stq_stgm (
394
//  .din  (stdbl_e), 
395
//  .q          (lsu_stdbl_inst_m),  
396
//  .clk  (clk), 
397
//  .se (se), .si (), .so ()
398
//  );
399
 
400
// This path can probably be eased.
401
assign  lsu_stb_data_early_sel_e[0] = ldstub_e  & ~rst_tri_en;
402
assign  lsu_stb_data_early_sel_e[1] = casa_e & ~rst_tri_en;
403
assign  lsu_stb_data_early_sel_e[2] = ~(ldstub_e | casa_e |  stdbl_e) | rst_tri_en;
404
assign  lsu_stb_data_early_sel_e[3] = stdbl_e & ~rst_tri_en ;
405
 
406
// modify for accepting bst data out of pipe.
407
//assign  stb_data_final_sel_e = ~(ldst_fp_e | ffu_lsu_blk_st_e) ;
408
 
409
/*dff  lsel_g (
410
  .din  (stb_data_final_sel_e),
411
  .q  (lsu_stb_data_final_sel_m),
412
  .clk  (clk),
413
  .se (se), .si (), .so ()
414
  );*/
415
 
416
assign  lsu_stb_data_final_sel_m = ~(ldst_fp_m | blkst_m) ;
417
 
418
wire    real_st_m ;
419
wire    flsh_inst_m, flsh_inst_g ;
420
// !!! could qualify st_inst_vld_e with stxa_internal !!!
421
dff_s #(13) stgm_vld  (
422
  .din  ({ld_inst_vld_e,st_inst_vld_e,ldst_sz_e[1:0],
423
    ifu_lsu_swap_e, ifu_lsu_ldstub_e, ifu_lsu_casa_e,ifu_lsu_ldst_dbl_e,
424
    ifu_tlu_thrid_e[1:0],ifu_lsu_ldst_fp_e,lsu_quad_asi_e,ifu_tlu_flsh_inst_e}),
425
  .q  ({ld_inst_vld_m,real_st_m,pipe_ldst_sz_m[1:0],
426
    swap_m,ldstub_m,casa_m,ldst_dbl_m,thrid_m[1:0],ldst_fp_m,quad_asi_m,flsh_inst_m}),
427
  .clk  (clk),
428
  .se   (se), .si (), .so ()
429
  );
430
 
431
assign  st_inst_vld_m = real_st_m | flsh_inst_m ;
432
 
433
// do we need ld/st unflushed ?
434
   wire sta_internal_g;
435
 
436
dff_s #(7) stgw_vld  (
437
  .din  ({sta_internal_m,
438
    casa_m, thrid_m[1:0],ldst_sz_m[1:0], flsh_inst_m}),
439
  .q    ({sta_internal_g,
440
    casa_g, thrid_g[1:0],ldst_sz_g[1:0], flsh_inst_g}),
441
  .clk  (clk),
442
  .se   (se), .si (), .so ()
443
  );
444
 
445
 
446
// stb-cam will be written by st at rising edge of g-stage.
447
// However, st can be flushed after write. To keep, the stb state consistent,
448
// The valid and write ptr will not be updated until the rising edge of w2.
449
 
450
wire    early_flush_cond_g,partial_flush_st_g ;
451
assign early_flush_cond_g =
452
(sta_internal_g | ~(lsu_inst_vld_w | blkst_g) | ffu_lsu_kill_fst_w) ;
453
assign  flush_st_g = (early_flush_cond_g | lsu_stbrwctl_flush_pipe_w) & cam_wptr_vld_g ;
454
 
455
//timing, send to stb_ctl and qualified by stb_cam_wvld_g (thread version of cam_wptr_vld_g)   
456
//assign        partial_flush_st_g = early_flush_cond_g & cam_wptr_vld_g ;
457
assign  partial_flush_st_g = early_flush_cond_g ;
458
 
459
assign  atomic_m = (casa_m | ldstub_m | swap_m) & st_inst_vld_m ;
460
 
461
// WRITE PTR VALID GENERATION.
462
 
463
// meant specifically to squash pcx_rq_for_stb.
464
assign  stb_cam_wr_no_ivld_m
465
  = (st_inst_vld_m | casa_m | ldstub_m | swap_m | blkst_m) ;
466
 
467
//bug3610 - kill cam write vld(==stb data write vld next cycle) to avoid datat read and write same cycle
468
//          to the same entry
469
wire  b2b_st_detect ;
470
 
471
assign  stb_cam_wptr_vld
472
  = (((st_inst_vld_m | atomic_m) & ifu_tlu_inst_vld_m_bf0) | blkst_m) & ~(flush_st_g & b2b_st_detect) ;
473
  //= ((st_inst_vld_m | atomic_m) & ifu_tlu_inst_vld_m_bf0) | blkst_m ;  // bug3610
474
  //= (st_inst_vld_m | atomic_m | (ldst_dbl_m & st_inst_vld_m) | blkst_m) ;
475
 
476
dff_s  wptr_g (
477
  .din  (stb_cam_wptr_vld), .q  (cam_wptr_vld_g),
478
  .clk  (clk),
479
  .se   (se), .si (), .so ()
480
  );
481
 
482
//flop move into mem cell (roll back)  
483
assign  stb_data_wptr_vld = cam_wptr_vld_g ;
484
 
485
// WRITE PTR GENERATION
486
 
487
// It is assumed that if there is a store in the pipe, there is a 
488
// free entry in the corresponding stb. Otherwise, the pipe would've
489
// stalled for the thread.      
490
 
491
// If a store-like inst has been flushed, then the old ptr has to be restored
492
// and used.  This is done within thread specific stb control
493
 
494
assign  thread0_m = ~st_thrid_m[1] & ~st_thrid_m[0] ;
495
assign  thread1_m = ~st_thrid_m[1] &  st_thrid_m[0] ;
496
assign  thread2_m =  st_thrid_m[1] & ~st_thrid_m[0] ;
497
assign  thread3_m =  st_thrid_m[1] &  st_thrid_m[0] ;
498
 
499
dff_s #(4) stgg_thrd (
500
  .din  ({thread0_m,thread1_m,thread2_m,thread3_m}),
501
  .q  ({thread0_g,thread1_g,thread2_g,thread3_g}),
502
  .clk  (clk),
503
  .se (se), .si (), .so ()
504
  );
505
 
506
assign  stb_thrd_en_g[0] = thread0_g ;
507
assign  stb_thrd_en_g[1] = thread1_g ;
508
assign  stb_thrd_en_g[2] = thread2_g ;
509
assign  stb_thrd_en_g[3] = thread3_g ;
510
 
511
//assign  stb_wptr[2:0] = 
512
//  thread0_m ? stb_wrptr0[2:0] :
513
//    thread1_m ? stb_wrptr1[2:0] :
514
//      thread2_m ? stb_wrptr2[2:0] :
515
//        thread3_m ? stb_wrptr3[2:0] : 3'bxxx ;
516
 
517
assign  stb_wptr[2:0] =
518
  (thread0_m ? stb_wrptr0[2:0] :  3'b000) |
519
  (thread1_m ? stb_wrptr1[2:0] :  3'b000) |
520
  (thread2_m ? stb_wrptr2[2:0] :  3'b000) |
521
  (thread3_m ? stb_wrptr3[2:0] :  3'b000) ;
522
 
523
assign  b2b_st_detect =   // detect back-to-back store
524
  (thread0_m & thread0_g) |
525
  (thread1_m & thread1_g) |
526
  (thread2_m & thread2_g) |
527
  (thread3_m & thread3_g) ;
528
 
529
assign  cam_wr_ptr[2:0] = (flush_st_g & b2b_st_detect) ? cam_wptr_d1[2:0] : stb_wptr[2:0] ;
530
 
531
dff_s #(3)  wptr_d1 (
532
  .din  (cam_wr_ptr[2:0]),  .q  (cam_wptr_d1[2:0]),
533
  .clk  (clk),
534
  .se (se), .si (), .so ()
535
  );
536
 
537
assign  stb_cam_wvld_m[0] = stb_cam_wptr_vld & thread0_m ;
538
assign  stb_cam_wvld_m[1] = stb_cam_wptr_vld & thread1_m ;
539
assign  stb_cam_wvld_m[2] = stb_cam_wptr_vld & thread2_m ;
540
assign  stb_cam_wvld_m[3] = stb_cam_wptr_vld & thread3_m ;
541
 
542
// contains potential flush conditions.
543
assign  stb_flush_st_g[0] = partial_flush_st_g ;
544
assign  stb_flush_st_g[1] = partial_flush_st_g ;
545
assign  stb_flush_st_g[2] = partial_flush_st_g ;
546
assign  stb_flush_st_g[3] = partial_flush_st_g ;
547
 
548
// stb-data has a delayed write in w2. Alignment of stb data will be done on write
549
// of 64b into stb. This allows write of stb cam and data to be done in the
550
// same cycle, and thus read can occur simultaneously for pcx. 
551
 
552
//mem cell change to bw_r_rf32x80, flop move into mem cell (roll back)
553
//flop outside mem cell
554
assign  stb_data_wr_ptr[4:0] =  {st_thrid_g[1:0],cam_wptr_d1[2:0]};
555
 
556
// RD PTR/VLD GENERATION
557
 
558
// stb read for dfq dumps data into a bypass flop. Thus a read for the dfq can occur
559
// if a thread's stb has an acked entry and the bypass flop is empty.
560
// stb read for pcx occurs on availability of queue entry. 
561
 
562
// Both dfq and pcx require a read of the cam and data. The reads
563
// can thus not happen when load that hits in the stb is in the w2 (change to W3)
564
// stage and a store is in the g-stage of the pipe. Both
565
// probabilities are low.
566
 
567
// ??Read for pcx takes priority over dfq. No deadlock can occur
568
// ??as at some point the pcx reads will be exhausted and the stb
569
// ??will have to drain itself. The stb is self-regulating in this regard.
570
 
571
// priority of stb read: ld_cam_hit (full raw bypass) > dfq > pcx 
572
 
573
//====================================================================================
574
//raw bypass timing 
575
//G/WB                          W2     W3                      W4
576
//cam_hit(from stb_cam output)  flop   stb_data rd_ptr/rd_vld  read STB_DATA/BYP
577
//====================================================================================
578
 
579
   wire [1:0] thrid_w2;
580
   wire [2:0] stb_cam_hit_ptr_w2;
581
   wire       stb_cam_hit_w2;
582
   wire       stb_cam_hit_w;
583
 
584
   //bug3503
585
   assign stb_cam_hit_w  =  stb_cam_hit & lsu_inst_vld_w & ~lsu_stbrwctl_flush_pipe_w;
586
 
587
dff_s #(6) stb_cam_hit_stg_w2 (
588
  .din  ({thrid_g[1:0],  stb_cam_hit_ptr[2:0],    stb_cam_hit_w   }),
589
  .q    ({thrid_w2[1:0], stb_cam_hit_ptr_w2[2:0], stb_cam_hit_w2}),
590
  .clk  (clk),
591
  .se   (se), .si (), .so ()
592
  );
593
 
594
// logic moved to qctl1
595
// pcx is making request for data in current cycle. Can be multi-hot.
596
//assign  pcx_any_rq_for_stb = |pcx_rq_for_stb[3:0] ;
597
//assign  pcx_any_rq_for_stb = 
598
//      (pcx_rq_for_stb[0] & ~lsu_st_pcx_rq_kill_w2[0]) | 
599
//      (pcx_rq_for_stb[1] & ~lsu_st_pcx_rq_kill_w2[1]) | 
600
//      (pcx_rq_for_stb[2] & ~lsu_st_pcx_rq_kill_w2[2]) | 
601
//      (pcx_rq_for_stb[3] & ~lsu_st_pcx_rq_kill_w2[3]) ; 
602
 
603
// ??ld-cam hit based read takes precedence
604
// ??Timing : This could be made pessimistic by using ld_inst_vld_g
605
 
606
//assign  stb_select_rptr[3:0] =  pcx_rq_for_stb[3:0] ;  // timing fix
607
assign  stb_select_rptr[3:0] =  lsu_st_pcx_rq_pick[3:0] ;
608
 
609
// This could be a critical path. Be careful !
610
//assign  stb_rdptr0[2:0] = ~dfq_any_rq_for_stb ? stb_pcx_rptr0[2:0] : stb_dfq_rptr0[2:0] ; 
611
assign  stb_rdptr0[2:0] = stb_pcx_rptr0[2:0] ;
612
assign  stb_rdptr1[2:0] = stb_pcx_rptr1[2:0] ;
613
assign  stb_rdptr2[2:0] = stb_pcx_rptr2[2:0] ;
614
assign  stb_rdptr3[2:0] = stb_pcx_rptr3[2:0] ;
615
 
616
// logic moved to qctl1
617
//wire  [1:0] stb_rd_tid ;
618
//
619
//assign  stb_rd_tid[0] = pcx_rq_for_stb[1] | pcx_rq_for_stb[3] ;
620
//assign  stb_rd_tid[1] = pcx_rq_for_stb[2] | pcx_rq_for_stb[3] ;
621
//   
622
//dff #(2) stbtid_stgd1 (
623
//  .din    (stb_rd_tid[1:0]),  .q  (lsu_stb_rd_tid[1:0]),
624
//  .clk    (clk), 
625
//  .se   (se), .si (), .so ()
626
//  );
627
 
628
//assign  stb_dfq_rd_id[2:0] = stb_data_rd_ptr[2:0] ; // or cam rd ptr
629
 
630
//timing fix:5/6/03
631
//bug4988 - change the prirority from 0->3 to 3->0; the reason is when select_rptr=0, the
632
//          default thread id(rptr[4:3])=thread0 but the default rptr[2:0]=thread3. If
633
//          thread0 and thread3 rptr are the same and the thread0 write is occuring, the
634
//          rptr[4:0] is same as wptr[4:0]
635
wire  [2:0]  stb_rdptr ;
636
//assign  stb_rdptr[2:0] = 
637
//  stb_select_rptr[0] ? stb_rdptr0[2:0] :
638
//    stb_select_rptr[1] ? stb_rdptr1[2:0] :
639
//      stb_select_rptr[2] ? stb_rdptr2[2:0] :
640
//                             stb_rdptr3[2:0] ;
641
 
642
//assign  stb_rdptr[2:0] = 
643
//  stb_select_rptr[3] ? stb_rdptr3[2:0] :
644
//    stb_select_rptr[2] ? stb_rdptr2[2:0] :
645
//      stb_select_rptr[1] ? stb_rdptr1[2:0] :
646
//                             stb_rdptr0[2:0] ;
647
 
648
assign  stb_rdptr[2:0] =
649
  (stb_select_rptr[3] ? stb_rdptr3[2:0] : 3'b0) |
650
  (stb_select_rptr[2] ? stb_rdptr2[2:0] : 3'b0) |
651
  (stb_select_rptr[1] ? stb_rdptr1[2:0] : 3'b0) |
652
  (stb_select_rptr[0] ? stb_rdptr0[2:0] : 3'b0) ;
653
 
654
//timing fix: 8/29/03 - remove the default select logic for stb_select_rptr since synthesis is forced to replace 
655
//            4to1 mux w/ and-or mux or 2to1 mux
656
//wire   stb_select_rptr_b3;
657
//assign stb_select_rptr_b3 =  ~|stb_select_rptr[2:0];
658
 
659
wire  [2:0]  stb_rdptr_l;
660
 
661
assign stb_rdptr_l[2:0] =  ~stb_rdptr[2:0] ;
662
//bw_u1_muxi41d_2x  UZsize_stb_rdptr_b0_mux(
663
//                  .z(stb_rdptr_l[0]), 
664
//                  .d0(stb_rdptr0[0]), 
665
//                  .d1(stb_rdptr1[0]), 
666
//                  .d2(stb_rdptr2[0]), 
667
//                  .d3(stb_rdptr3[0]), 
668
//                  .s0(stb_select_rptr[0]), 
669
//                  .s1(stb_select_rptr[1]), 
670
//                  .s2(stb_select_rptr[2]), 
671
//                  .s3(stb_select_rptr[3]));
672
//   
673
//bw_u1_muxi41d_2x  UZsize_stb_rdptr_b1_mux(
674
//                  .z(stb_rdptr_l[1]), 
675
//                  .d0(stb_rdptr0[1]), 
676
//                  .d1(stb_rdptr1[1]), 
677
//                  .d2(stb_rdptr2[1]), 
678
//                  .d3(stb_rdptr3[1]), 
679
//                  .s0(stb_select_rptr[0]), 
680
//                  .s1(stb_select_rptr[1]), 
681
//                  .s2(stb_select_rptr[2]), 
682
//                  .s3(stb_select_rptr[3]));
683
//   
684
//bw_u1_muxi41d_2x  UZsize_stb_rdptr_b2_mux(
685
//                  .z(stb_rdptr_l[2]), 
686
//                  .d0(stb_rdptr0[2]), 
687
//                  .d1(stb_rdptr1[2]), 
688
//                  .d2(stb_rdptr2[2]), 
689
//                  .d3(stb_rdptr3[2]), 
690
//                  .s0(stb_select_rptr[0]), 
691
//                  .s1(stb_select_rptr[1]), 
692
//                  .s2(stb_select_rptr[2]), 
693
//                  .s3(stb_select_rptr[3]));
694
//   
695
 
696
assign  stb_rd_thrid[0] = stb_select_rptr[1] | stb_select_rptr[3] ;
697
assign  stb_rd_thrid[1] = stb_select_rptr[2] | stb_select_rptr[3] ;
698
 
699
// read
700
// this mux will have to be accommodated in path !!! Talk to Satya. 
701
// Timing : This could be made pessimistic by using ld_inst_vld_g
702
 
703
// raw read STB at W3 (changed from W2)        
704
assign  stb_data_rd_ptr[4:0] = stb_cam_hit_w2 ?
705
        {thrid_w2[1:0],stb_cam_hit_ptr_w2[2:0]} :  // rd based on ld hit
706
        {stb_rd_thrid[1:0],~stb_rdptr_l[2:0]} ;       // rd for pcx or dfq
707
 
708
// Blk-st modification for thread.
709
assign  st_thrid_m[1:0] = blkst_m ? ffu_lsu_blk_st_tid_m[1:0] : thrid_m[1:0] ;
710
dff_s #(2)  stid_stgg (
711
  .din  (st_thrid_m[1:0]),
712
  .q    (st_thrid_g[1:0]),
713
  .clk  (clk),
714
  .se (se), .si (), .so ()
715
  );
716
 
717
//timing fix: 5/6/03
718
//assign  stb_cam_rw_ptr[4:0]  = stb_cam_wptr_vld ? 
719
//        {st_thrid_m[1:0],cam_wr_ptr[2:0]} :  // write
720
//        {stb_rd_thrid[1:0],stb_rdptr[2:0]} ;  // read
721
 
722
wire [2:0] cam_wr_ptr_l;
723
wire [1:0] stb_rd_thrid_l;
724
wire [1:0] st_thrid_m_l;
725
 
726
assign cam_wr_ptr_l[2:0]  =  ~cam_wr_ptr[2:0];
727
assign stb_rd_thrid_l[1:0]  =  ~stb_rd_thrid[1:0];
728
assign st_thrid_m_l[1:0]  =  ~st_thrid_m[1:0];
729
 
730
bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b0_mux(
731
                  .z(stb_cam_rw_ptr[0]),
732
                  .d0(stb_rdptr_l[0]),
733
                  .d1(cam_wr_ptr_l[0]),
734
                  .s(stb_cam_wptr_vld));
735
 
736
bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b1_mux(
737
                  .z(stb_cam_rw_ptr[1]),
738
                  .d0(stb_rdptr_l[1]),
739
                  .d1(cam_wr_ptr_l[1]),
740
                  .s(stb_cam_wptr_vld));
741
 
742
bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b2_mux(
743
                  .z(stb_cam_rw_ptr[2]),
744
                  .d0(stb_rdptr_l[2]),
745
                  .d1(cam_wr_ptr_l[2]),
746
                  .s(stb_cam_wptr_vld));
747
 
748
bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b3_mux(
749
                  .z(stb_cam_rw_ptr[3]),
750
                  .d0(stb_rd_thrid_l[0]),
751
                  .d1(st_thrid_m_l[0]),
752
                  .s(stb_cam_wptr_vld));
753
 
754
bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b4_mux(
755
                  .z(stb_cam_rw_ptr[4]),
756
                  .d0(stb_rd_thrid_l[1]),
757
                  .d1(st_thrid_m_l[1]),
758
                  .s(stb_cam_wptr_vld));
759
 
760
 
761
 
762
//raw read STB at W3 (not W2)
763
//timing fix: 9/2/03 - reduce fanout in stb_rwctl for lsu_st_pcx_rq_pick - gen separate signal for
764
//                     stb_cam_rptr_vld and stb_data_rptr_vld
765
 
766
//bug4988 - qual lsu_st_pcx_rq_vld w/ no write vld to stb_data. use stb_cam_wr_no_ivld_m instead of write vld.
767
//          this is the same signal used to kill pcx_rq_for_stb
768
//          stb_cam_rptr_vld is not set if stb_cam_wptr_vld=1
769
 
770
assign  stb_data_rptr_vld =
771
  //(|stb_select_rptr[3:0]) |  // pcx/dfq rd - timing fix
772
  //lsu_st_pcx_rq_vld |  // pcx/dfq rd  // bug4988
773
   (lsu_st_pcx_rq_vld & ~stb_cam_wr_no_ivld_m) |  // pcx/dfq rd
774
    stb_cam_hit_w2 ;         // cam hit requires read whether single or multiple
775
 
776
//raw read STB at W3 (not W2)      
777
//timing fix: 9/2/03 - reduce fanout in stb_rwctl for lsu_st_pcx_rq_pick - gen separate signal for
778
//                     stb_cam_rptr_vld and stb_data_rptr_vld
779
assign  stb_cam_rptr_vld =
780
  //((|stb_select_rptr[3:0]) & ~(stb_cam_hit_w2)) & // only pcx read  - timing fix
781
  (lsu_st_pcx_rq_vld & ~(stb_cam_hit_w2)) & // only pcx read 
782
      ~stb_cam_wptr_vld ;   // st,st-like write does not block
783
 
784
// lsu_stb_rd_vld_d1 - not used
785
//dff  stbrd_stgd1  (
786
//  .din    (stb_cam_rptr_vld), .q  (lsu_stb_rd_vld_d1),
787
//  .clk    (clk), 
788
//  .se   (se), .si (), .so ()
789
//  );
790
 
791
// logic moved to qctl1
792
//dff #(1)  prvld_stgd1 (
793
//  .din  (pcx_any_rq_for_stb), 
794
//  .q  (lsu_stb_pcx_rvld_d1),
795
//  .clk  (clk), 
796
//  .se (se), .si (), .so ()
797
//  );
798
 
799
assign  stb_cam_cm_tid[1:0] = thrid_m[1:0] ;
800
 
801
 
802
//=========================================================================================
803
//  BYTE MASK FORMATTING
804
//=========================================================================================
805
 
806
 
807
// Write/CAM Data for CAM RAM.
808
// Physical dword aligned addr - PA[39:3] (37b)
809
// Byte Mask - (8b)
810
// Total - 45b
811
 
812
//  | b7  |  b6 | b5  | b4  | b3  | b2  | b1  | b0  |
813
//  |   hw3 |   hw2 |   hw1 |   hw0 |
814
//  |     w1    |   w0    |
815
//  |       dw        | 
816
 
817
 
818
 
819
//dff  #(11) va_m (
820
//  .din    (exu_lsu_ldst_va_e[10:0]),  .q  (pipe_ldst_va_m[10:0]),
821
//  .clk    (clk), 
822
//  .se   (se), .si (), .so ()
823
//  );
824
 
825
assign pipe_ldst_va_m[9:0] = lsu_ldst_va_m[9:0];
826
 
827
// ldst_byte may not be needed
828
assign ldst_byte = ~ldst_sz_m[1] & ~ldst_sz_m[0] ;  // 00
829
assign ldst_hwrd = ~ldst_sz_m[1] &  ldst_sz_m[0] ;  // 01
830
assign ldst_word =  ldst_sz_m[1] & ~ldst_sz_m[0] ;  // 10
831
assign ldst_dwrd =  ldst_sz_m[1] &  ldst_sz_m[0] ;  // 11
832
 
833
// Note : dword term is common. 
834
assign ldst_byte_mask[0]  =
835
  ( ldst_va_m[2] &  ldst_va_m[1] &  ldst_va_m[0] )       |
836
  ( ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd)) |
837
  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_word))  |
838
  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ;
839
assign ldst_byte_mask[1]  =
840
  ( ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0])        |
841
  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_word))  |
842
  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ;
843
assign ldst_byte_mask[2]  =
844
  ( ldst_va_m[2] & ~ldst_va_m[1] &  ldst_va_m[0])         |
845
  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd | ldst_word))  |
846
  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ;
847
assign ldst_byte_mask[3]  =
848
  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0])       |
849
  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ;
850
assign ldst_byte_mask[4]  =
851
  (~ldst_va_m[2] &  ldst_va_m[1] &  ldst_va_m[0])        |
852
  (~ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd)) |
853
  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd | ldst_word)) ;
854
assign ldst_byte_mask[5]  =
855
  (~ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0])         |
856
  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] &  (ldst_dwrd | ldst_word))  ;
857
assign ldst_byte_mask[6]  =
858
  (~ldst_va_m[2] & ~ldst_va_m[1] &  ldst_va_m[0])     |
859
  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd | ldst_word | ldst_hwrd)) ;
860
assign ldst_byte_mask[7]  =
861
  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0])   ;
862
 
863
assign  stb_ldst_byte_msk[7:0]  = ldst_byte_mask[7:0];
864
 
865
   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b0 (.a(ldst_byte_mask[0]), .z(stb_ldst_byte_msk_min[0]));
866
   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b1 (.a(ldst_byte_mask[1]), .z(stb_ldst_byte_msk_min[1]));
867
   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b2 (.a(ldst_byte_mask[2]), .z(stb_ldst_byte_msk_min[2]));
868
   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b3 (.a(ldst_byte_mask[3]), .z(stb_ldst_byte_msk_min[3]));
869
   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b4 (.a(ldst_byte_mask[4]), .z(stb_ldst_byte_msk_min[4]));
870
   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b5 (.a(ldst_byte_mask[5]), .z(stb_ldst_byte_msk_min[5]));
871
   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b6 (.a(ldst_byte_mask[6]), .z(stb_ldst_byte_msk_min[6]));
872
   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b7 (.a(ldst_byte_mask[7]), .z(stb_ldst_byte_msk_min[7]));
873
 
874
 
875
// Generate selects to format st data
876
assign  lsu_st_sz_bhww_m = ldst_byte | ldst_hwrd | ldst_word ;      // byte or hword or word
877
assign  lsu_st_sz_dw_m   = ldst_dwrd ;            // double word
878
assign  lsu_st_sz_bhw_m  = ldst_byte | ldst_hwrd ;      // byte or hword
879
assign  lsu_st_sz_wdw_m  = ldst_word | ldst_dwrd ;      // word or dword
880
assign  lsu_st_sz_b_m    = ldst_byte ;            // byte
881
assign  lsu_st_sz_w_m    = ldst_word ;            // word
882
assign  lsu_st_sz_hw_m   = ldst_hwrd ;            // hword
883
assign  lsu_st_sz_hww_m  = ldst_hwrd | ldst_word ;      // hword or word
884
 
885
//=========================================================================================
886
//  BLK-ST HANDLING
887
//=========================================================================================
888
 
889
wire    blkst_m_tmp ;
890
dff_s  stgm_bst (
891
  .din (ffu_lsu_blk_st_e),
892
  .q   (blkst_m_tmp),
893
  .clk (clk),
894
  .se   (se),       .si (),          .so ()
895
);
896
 
897
assign  blkst_m = blkst_m_tmp & ~(real_st_m  | flsh_inst_m |
898
                ld_inst_vld_m) ; // Bug 3444
899
 
900
dff_s  stgg_bst (
901
  .din (blkst_m),
902
  .q   (blkst_g),
903
  .clk (clk),
904
  .se   (se),       .si (),          .so ()
905
);
906
 
907
wire    snap_blk_st_local_m ;
908
assign  snap_blk_st_local_m = lsu_snap_blk_st_m & ifu_tlu_inst_vld_m_bf0 ;
909
 
910
wire    [1:0]    bst_sz_m ;
911
wire    [9:0]    bst_va_m ;
912
// output to be used in m-stage.
913
dffe_s #(9) bst_state_m (
914
        .din    ({ldst_sz_m[1:0],ldst_va_m[9:6],ldst_va_m[2:0]}),
915
        .q      ({bst_sz_m[1:0],bst_va_m[9:6],bst_va_m[2:0]}),
916
        .en     (snap_blk_st_local_m),
917
        .clk    (clk),
918
        .se   (se),       .si (),          .so ()
919
        );
920
 
921
dff_s #(3)  bsva_stgm (
922
  .din    (ffu_lsu_blk_st_va_e[5:3]), .q (bst_va_m[5:3]),
923
  .clk    (clk),
924
  .se   (se), .si (), .so ()
925
  );
926
 
927
//assign        bst_va_m[5:3]   = ffu_lsu_blk_st_va_e[5:3] ;
928
 
929
//assign  ldst_va_m[10] =  pipe_ldst_va_m[10] ;
930
assign  ldst_va_m[9:0] = blkst_m ?  bst_va_m[9:0] : pipe_ldst_va_m[9:0] ;
931
 
932
assign  lsu_stb_va_m[9:3] = ldst_va_m[9:3] ;
933
 
934
assign  ldst_sz_m[1:0]   =  blkst_m ? bst_sz_m[1:0] : pipe_ldst_sz_m[1:0] ;
935
 
936
//=========================================================================================
937
//  WRITE DATA FOR DATA RAM
938
//=========================================================================================
939
 
940
// Write Data for DATA RAM.
941
// Data - (64b)
942
// (8b parity is generated on read)
943
// Rqtype - (3b)
944
// Size - (3b). 
945
// Addr - (3b). Lower 3b of 40b addr.
946
// (set index and way available from ctl state.
947
// Total - 73b.
948
 
949
// st-quad requires own encoding.
950
// assume does not have to be changed for blk-st
951
assign  st_rq_type_m[2:0] =
952
                casa_m ? 3'b010 :                       // cas pkt 1
953
                        (ldstub_m | swap_m) ? 3'b110 :  // ldstub/swap
954
                          //(stquad_m)  ? 3'b111 :  // stquad-pkt1
955
                                  3'b001 ;        // normal store or partial interrupt rq type
956
 
957
//assign  lsu_st_rq_type_m[2:0] = st_rq_type_m[2:0] ;
958
assign  lsu_st_rq_type_m[2:1] = st_rq_type_m[2:1] ;
959
 
960
// Need ASI decode
961
/*wire  lsu_stquad_inst_m ;
962
assign  lsu_stquad_inst_m = ldst_dbl_m & st_inst_vld_m & quad_asi_m ;
963
*/
964
 
965
wire    st_rmo_m,st_rmo_g ;
966
assign  st_rmo_m = lsu_st_rmo_m | blkst_m ; // binit and blk rmo stores.
967
dff_s #(9)  stgg_etc  (
968
  .din    ({ldst_va_m[3:0],st_rq_type_m[2:0],st_rmo_m,lsu_bst_in_pipe_m}),
969
  .q      ({ldst_va_g[3:0],st_rq_type_g[2:0],st_rmo_g,bst_in_pipe_g}),
970
  .clk    (clk),
971
  .se   (se), .si (), .so ()
972
  );
973
 
974
wire    bst_any_helper ;
975
assign  bst_any_helper = blkst_g | bst_in_pipe_g ; // Bug 3934
976
 
977
// Size will have to be changed to 2bits.
978
// 7 more bits could be added to data ram to save read of cam in providing dfq pkt !!! 
979
assign stb_wdata_ramd_b75_b64[75:64]   =
980
  {st_rmo_g,st_rq_type_g[2:0],flsh_inst_g,bst_any_helper,ldst_sz_g[1:0],ldst_va_g[3:0]};
981
        // Bug3395, 3934
982
 
983
//=========================================================================================
984
//  FULL/PARTIAL RAW CALCULATION
985
//=========================================================================================
986
 
987
// io load cannot bypass from stb. A stb hit results in an io-ld being treated
988
// as a partial-raw. (OR should it be serialized behind any io store ??)
989
wire    io_ld,io_ld_w2 ;
990
assign  io_ld = tlb_pgnum_b39_g ; // Bug 4362
991
 
992
// full-raw is squashed on multiple hits in stb. Treated like partial raw.
993
// Ensure that all ld and ld-like instructions signal ld_inst_vld. We can then
994
// remove qualification with ld_inst_vld_g.
995
/*assign  ld_stb_full_raw_g =
996
        (|stb_ld_full_raw[7:0]) & ~(stb_cam_mhit | ldq_hit_g | io_ld) ;
997
assign  ld_stb_full_raw_g[0] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g &
998
          ~(stb_cam_mhit | ldq_hit_g[0] | io_ld) & thread0_g ;
999
          //~(ld_raw_mhit | ld_stq_hit_g[0] | io_ld) & thread0_g ;
1000
assign  ld_stb_full_raw_g[1] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g &
1001
          ~(stb_cam_mhit | ldq_hit_g[1] | io_ld) & thread1_g ;
1002
assign  ld_stb_full_raw_g[2] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g &
1003
          ~(stb_cam_mhit | ldq_hit_g[2] | io_ld) & thread2_g ;
1004
assign  ld_stb_full_raw_g[3] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g &
1005
          ~(stb_cam_mhit | ldq_hit_g[3] | io_ld) & thread3_g ; */
1006
// Multiple full raws are also treated like a partial.
1007
/*assign  ld_stb_partial_raw_g =
1008
        ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g | (io_ld & stb_not_empty)) ;
1009
assign  ld_stb_partial_raw_g[0] =
1010
        ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[0] | (io_ld & stb_not_empty))
1011
          & ld_inst_vld_g & thread0_g ;
1012
assign  ld_stb_partial_raw_g[1] =
1013
        ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[1] | (io_ld & stb_not_empty))
1014
          & ld_inst_vld_g & thread1_g ;
1015
assign  ld_stb_partial_raw_g[2] =
1016
        ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[2] | (io_ld & stb_not_empty))
1017
          & ld_inst_vld_g & thread2_g ;
1018
assign  ld_stb_partial_raw_g[3] =
1019
        ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[3] | (io_ld & stb_not_empty))
1020
          & ld_inst_vld_g & thread3_g; */
1021
 
1022
//=========================================================================================
1023
//  STQ HANDLING
1024
//=========================================================================================
1025
 
1026
/*      REMOVE STQUAD */
1027
 
1028
//=========================================================================================
1029
//      LD QUAD HANDLING
1030
//=========================================================================================
1031
 
1032
dff_s  altsp_stgm (
1033
  .din    (alt_space_e), .q (alt_space_m),
1034
  .clk    (clk),
1035
  .se   (se), .si (), .so ()
1036
  );
1037
 
1038
assign  lsu_ldquad_inst_m = ldst_dbl_m & ld_inst_vld_m & quad_asi_m & alt_space_m ;
1039
 
1040
/*wire  ldquad_inst_g ;
1041
dff_s  ldq_stgg (
1042
  .din    (lsu_ldquad_inst_m), .q (ldquad_inst_g),
1043
  .clk    (clk),
1044
  .se   (se), .si (), .so ()
1045
  );
1046
 
1047
wire    ldq_stb_cam_hit ;
1048
assign  ldq_stb_cam_hit = stb_cam_hit & ldquad_inst_g ;
1049
// Terms can be made common.
1050
assign  ldq_hit_g = ldq_stb_cam_hit ; */
1051
/*assign  ldq_hit_g[0] = thread0_g & ldq_stb_cam_hit ;
1052
assign  ldq_hit_g[1] = thread1_g & ldq_stb_cam_hit ;
1053
assign  ldq_hit_g[2] = thread2_g & ldq_stb_cam_hit ;
1054
assign  ldq_hit_g[3] = thread3_g & ldq_stb_cam_hit ; */
1055
 
1056
//=========================================================================================
1057
//  STB MULTIPLE HIT GENERATION
1058
//=========================================================================================
1059
 
1060
// Multiple hits in stb is to be treated as a partial raw case. The ld however must wait
1061
// until the youngest store which hit exits the stb. A ptr needs to be calculated for this case.
1062
// A version of stb_wptr is used instead because it is easily available. (Would this have
1063
// any significant performance impact ? - No)
1064
 
1065
assign  ld_any_raw_vld[7:0] = stb_ld_full_raw[7:0] | stb_ld_partial_raw[7:0] ;
1066
 
1067
dff_s #(16)  stgw2_rvld (
1068
        .din    ({ld_any_raw_vld[7:0],stb_state_ced[7:0]}),
1069
        .q      ({ld_any_raw_vld_d1[7:0],stb_state_ced_d1[7:0]}),
1070
        .clk    (clk),
1071
        .se     (se),       .si (),          .so ()
1072
        );
1073
 
1074
 
1075
// This equation can be optimized for the grape flow.
1076
// This can be obtained from stb.
1077
/*assign  ld_raw_mhit =
1078
  (ld_any_raw_vld[7] & |(ld_any_raw_vld[6:0])) |
1079
  (ld_any_raw_vld[6] & |(ld_any_raw_vld[5:0])) |
1080
  (ld_any_raw_vld[5] & |(ld_any_raw_vld[4:0])) |
1081
  (ld_any_raw_vld[4] & |(ld_any_raw_vld[3:0])) |
1082
  (ld_any_raw_vld[3] & |(ld_any_raw_vld[2:0])) |
1083
  (ld_any_raw_vld[2] & |(ld_any_raw_vld[1:0])) |
1084
  (ld_any_raw_vld[1] &   ld_any_raw_vld[0]) ; */
1085
 
1086
//=========================================================================================
1087
//  STB Partial Raw ptr generation
1088
//=========================================================================================
1089
 
1090
// The loading on the raw output of the stb cam will be significant if the signal 
1091
// has to fan out to all 4 ctl blocks. That's why the control has to be localized.
1092
 
1093
// Using the ack bit may result in pessimistic issue of partial raw loads.
1094
// For a single partial raw or multiple hit case, detecting whether there is any
1095
// unacked store is sufficient. Calculation is for no unacked store.
1096
// Can we use cam_hit ptr instead !!!
1097
 
1098
//assign  ld_rawp_st_ced_w2 = (~(|(ld_any_raw_vld_d1[7:0] & ~stb_state_ced_d1[7:0]))) ;
1099
wire [2:0] wptr_prev ;
1100
assign  wptr_prev[2:0] = stb_wptr_prev[2:0] ;
1101
wire [7:0] wptr_dcd ; // Bug 4294
1102
assign  wptr_dcd[0] = ~wptr_prev[2] & ~wptr_prev[1] & ~wptr_prev[0] ;
1103
assign  wptr_dcd[1] = ~wptr_prev[2] & ~wptr_prev[1] &  wptr_prev[0] ;
1104
assign  wptr_dcd[2] = ~wptr_prev[2] &  wptr_prev[1] & ~wptr_prev[0] ;
1105
assign  wptr_dcd[3] = ~wptr_prev[2] &  wptr_prev[1] &  wptr_prev[0] ;
1106
assign  wptr_dcd[4] =  wptr_prev[2] & ~wptr_prev[1] & ~wptr_prev[0] ;
1107
assign  wptr_dcd[5] =  wptr_prev[2] & ~wptr_prev[1] &  wptr_prev[0] ;
1108
assign  wptr_dcd[6] =  wptr_prev[2] &  wptr_prev[1] & ~wptr_prev[0] ;
1109
assign  wptr_dcd[7] =  wptr_prev[2] &  wptr_prev[1] &  wptr_prev[0] ;
1110
 
1111
wire iold_st_ced_g,iold_st_ced_w2 ;
1112
assign  iold_st_ced_g = |(wptr_dcd[7:0] & stb_state_ced[7:0]) ;
1113
 
1114
dff_s #(2)   ioldced_stgw2  (
1115
  .din  ({iold_st_ced_g,io_ld}),
1116
  .q    ({iold_st_ced_w2,io_ld_w2}),
1117
  .clk  (clk),
1118
  .se   (se), .si (), .so ()
1119
  );
1120
 
1121
assign  ld_rawp_st_ced_w2 =
1122
        io_ld_w2 ? iold_st_ced_w2 :
1123
        (~(|(ld_any_raw_vld_d1[7:0] & ~stb_state_ced_d1[7:0]))) ;
1124
 
1125
// For the case of a single partial raw.
1126
assign  ld_rawp_stb_id[0] = stb_cam_hit_ptr[0] ;
1127
assign  ld_rawp_stb_id[1] = stb_cam_hit_ptr[1] ;
1128
assign  ld_rawp_stb_id[2] = stb_cam_hit_ptr[2] ;
1129
/*assign  ld_rawp_stb_id[0] = stb_ld_partial_raw[1] | stb_ld_partial_raw[3] |
1130
        stb_ld_partial_raw[5] | stb_ld_partial_raw[7] ;
1131
assign  ld_rawp_stb_id[1] = stb_ld_partial_raw[2] | stb_ld_partial_raw[3] |
1132
        stb_ld_partial_raw[6] | stb_ld_partial_raw[7] ;
1133
assign  ld_rawp_stb_id[2] = stb_ld_partial_raw[4] | stb_ld_partial_raw[5] |
1134
        stb_ld_partial_raw[6] | stb_ld_partial_raw[7] ; */
1135
 
1136
   wire [3:0] pipe_thread_g;
1137
   assign     pipe_thread_g[0] = ~thrid_g[1] & ~thrid_g[0];
1138
   assign     pipe_thread_g[1] = ~thrid_g[1] &  thrid_g[0];
1139
   assign     pipe_thread_g[2] =  thrid_g[1] & ~thrid_g[0];
1140
   assign     pipe_thread_g[3] =  thrid_g[1] &  thrid_g[0];
1141
 
1142
assign  stb_state_ced[7:0] =
1143
( pipe_thread_g[0] ? stb_state_ced0[7:0] : 8'b0 ) |
1144
( pipe_thread_g[1] ? stb_state_ced1[7:0] : 8'b0 ) |
1145
( pipe_thread_g[2] ? stb_state_ced2[7:0] : 8'b0 ) |
1146
( pipe_thread_g[3] ? stb_state_ced3[7:0] : 8'b0 );
1147
 
1148
assign  stb_wptr_prev[2:0] =
1149
  (pipe_thread_g[0] ? stb_wrptr0_prev[2:0] : 3'b0) |
1150
  (pipe_thread_g[1] ? stb_wrptr1_prev[2:0] : 3'b0) |
1151
  (pipe_thread_g[2] ? stb_wrptr2_prev[2:0] : 3'b0) |
1152
  (pipe_thread_g[3] ? stb_wrptr3_prev[2:0] : 3'b0);
1153
 
1154
assign  stb_not_empty  =
1155
  (pipe_thread_g[0]  & ~lsu_stb_empty[0] ) |
1156
  (pipe_thread_g[1]  & ~lsu_stb_empty[1] ) |
1157
  (pipe_thread_g[2]  & ~lsu_stb_empty[2] ) |
1158
  (pipe_thread_g[3]  & ~lsu_stb_empty[3] ) ;
1159
 
1160
assign  lsu_stb_empty_buf[3:0] = lsu_stb_empty[3:0] ;
1161
assign  lsu_spu_stb_empty[3:0] = lsu_stb_empty[3:0] ;
1162
 
1163
//wire ldstdbl_g ;
1164
// stdbl should be qualified with quad_asi_g !!!
1165
//assign  ldstdbl_g = ldst_dbl_g & (ld_inst_vld_g | st_inst_vld_g) & ~ldst_fp_g ;
1166
 
1167
// casa_g and stdbl_g may not be required.
1168
//assign  ld_rawp_st_ackid_g[2:0] = 
1169
//  (casa_g | ldstdbl_g | stb_cam_mhit | (io_ld & stb_not_empty))
1170
//  ? stb_wptr_prev[2:0] : ld_rawp_stb_id[2:0] ;
1171
 
1172
//===================================================
1173
//casa: need st-st order
1174
//st cam mhit: cannot figure out the youngest
1175
//io: side effect
1176
//remove int ldd and quad ldd, why need ldstdbl?
1177
//===================================================
1178
wire    [2:0]    ld_rawp_st_ackid_g ;
1179
 
1180
assign  ld_rawp_st_ackid_g[2:0] =
1181
  (casa_g | stb_cam_mhit | (io_ld & stb_not_empty))?
1182
   stb_wptr_prev[2:0] : ld_rawp_stb_id[2:0] ;
1183
 
1184
dff_s #(3)  rawpackid_w2 (
1185
  .din  (ld_rawp_st_ackid_g[2:0]),
1186
  .q    (ld_rawp_st_ackid_w2[2:0]),
1187
  .clk  (clk),
1188
  .se   (se), .si (), .so ()
1189
  );
1190
 
1191
 
1192
   assign lsu_ifu_stbcnt0[3:0] = lsu_stbcnt0[3:0] ;
1193
   assign lsu_ifu_stbcnt1[3:0] = lsu_stbcnt1[3:0] ;
1194
   assign lsu_ifu_stbcnt2[3:0] = lsu_stbcnt2[3:0] ;
1195
   assign lsu_ifu_stbcnt3[3:0] = lsu_stbcnt3[3:0] ;
1196
 
1197
   assign lsu_ffu_stb_full0 =    lsu_stbcnt0[3];
1198
   assign lsu_ffu_stb_full1 =    lsu_stbcnt1[3];
1199
   assign lsu_ffu_stb_full2 =    lsu_stbcnt2[3];
1200
   assign lsu_ffu_stb_full3 =    lsu_stbcnt3[3];
1201
 
1202
endmodule
1203
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.