OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [orpsocv2/] [boards/] [xilinx/] [ml501/] [rtl/] [verilog/] [xilinx_ddr2/] [ddr2_phy_calib.v] - Blame information for rev 412

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 412 julius
//*****************************************************************************
2
// DISCLAIMER OF LIABILITY
3
//
4
// This file contains proprietary and confidential information of
5
// Xilinx, Inc. ("Xilinx"), that is distributed under a license
6
// from Xilinx, and may be used, copied and/or disclosed only
7
// pursuant to the terms of a valid license agreement with Xilinx.
8
//
9
// XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION
10
// ("MATERIALS") "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
11
// EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING WITHOUT
12
// LIMITATION, ANY WARRANTY WITH RESPECT TO NONINFRINGEMENT,
13
// MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE. Xilinx
14
// does not warrant that functions included in the Materials will
15
// meet the requirements of Licensee, or that the operation of the
16
// Materials will be uninterrupted or error-free, or that defects
17
// in the Materials will be corrected. Furthermore, Xilinx does
18
// not warrant or make any representations regarding use, or the
19
// results of the use, of the Materials in terms of correctness,
20
// accuracy, reliability or otherwise.
21
//
22
// Xilinx products are not designed or intended to be fail-safe,
23
// or for use in any application requiring fail-safe performance,
24
// such as life-support or safety devices or systems, Class III
25
// medical devices, nuclear facilities, applications related to
26
// the deployment of airbags, or any other applications that could
27
// lead to death, personal injury or severe property or
28
// environmental damage (individually and collectively, "critical
29
// applications"). Customer assumes the sole risk and liability
30
// of any use of Xilinx products in critical applications,
31
// subject only to applicable laws and regulations governing
32
// limitations on product liability.
33
//
34
// Copyright 2006, 2007, 2008 Xilinx, Inc.
35
// All rights reserved.
36
//
37
// This disclaimer and copyright notice must be retained as part
38
// of this file at all times.
39
//*****************************************************************************
40
//   ____  ____
41
//  /   /\/   /
42
// /___/  \  /    Vendor: Xilinx
43
// \   \   \/     Version: 3.0
44
//  \   \         Application: MIG
45
//  /   /         Filename: ddr2_phy_calib.v
46
// /___/   /\     Date Last Modified: $Date: 2008/12/23 14:26:00 $
47
// \   \  /  \    Date Created: Thu Aug 10 2006
48
//  \___\/\___\
49
//
50
//Device: Virtex-5
51
//Design Name: DDR2
52
//Purpose:
53
//   This module handles calibration after memory initialization.
54
//Reference:
55
//Revision History:
56
//*****************************************************************************
57
 
58
`timescale 1ns/1ps
59
 
60
module ddr2_phy_calib #
61
  (
62
   // Following parameters are for 72-bit RDIMM design (for ML561 Reference 
63
   // board design). Actual values may be different. Actual parameters values 
64
   // are passed from design top module ddr2_mig module. Please refer to
65
   // the ddr2_mig module for actual values.
66
   parameter DQ_WIDTH      = 72,
67
   parameter DQ_BITS       = 7,
68
   parameter DQ_PER_DQS    = 8,
69
   parameter DQS_BITS      = 4,
70
   parameter DQS_WIDTH     = 9,
71
   parameter ADDITIVE_LAT  = 0,
72
   parameter CAS_LAT       = 5,
73
   parameter REG_ENABLE    = 1,
74
   parameter CLK_PERIOD    = 3000,
75
   parameter SIM_ONLY      = 0,
76
   parameter DEBUG_EN      = 0
77
   )
78
  (
79
   input                                   clk,
80
   input                                   clkdiv,
81
   input                                   rstdiv,
82
   input [3:0]                             calib_start,
83
   input                                   ctrl_rden,
84
   input                                   phy_init_rden,
85
   input [DQ_WIDTH-1:0]                    rd_data_rise,
86
   input [DQ_WIDTH-1:0]                    rd_data_fall,
87
   input                                   calib_ref_done,
88
   output reg [3:0]                        calib_done,
89
   output reg                              calib_ref_req,
90
   output [DQS_WIDTH-1:0]                  calib_rden,
91
   output reg [DQS_WIDTH-1:0]              calib_rden_sel,
92
   output reg                              dlyrst_dq,
93
   output reg [DQ_WIDTH-1:0]               dlyce_dq,
94
   output reg [DQ_WIDTH-1:0]               dlyinc_dq,
95
   output reg                              dlyrst_dqs,
96
   output reg [DQS_WIDTH-1:0]              dlyce_dqs,
97
   output reg [DQS_WIDTH-1:0]              dlyinc_dqs,
98
   output reg [DQS_WIDTH-1:0]              dlyrst_gate,
99
   output reg [DQS_WIDTH-1:0]              dlyce_gate,
100
   output reg [DQS_WIDTH-1:0]              dlyinc_gate,
101
   output [DQS_WIDTH-1:0]                  en_dqs,
102
   output [DQS_WIDTH-1:0]                  rd_data_sel,
103
   // Debug signals (optional use)
104
   input                                   dbg_idel_up_all,
105
   input                                   dbg_idel_down_all,
106
   input                                   dbg_idel_up_dq,
107
   input                                   dbg_idel_down_dq,
108
   input                                   dbg_idel_up_dqs,
109
   input                                   dbg_idel_down_dqs,
110
   input                                   dbg_idel_up_gate,
111
   input                                   dbg_idel_down_gate,
112
   input [DQ_BITS-1:0]                     dbg_sel_idel_dq,
113
   input                                   dbg_sel_all_idel_dq,
114
   input [DQS_BITS:0]                      dbg_sel_idel_dqs,
115
   input                                   dbg_sel_all_idel_dqs,
116
   input [DQS_BITS:0]                      dbg_sel_idel_gate,
117
   input                                   dbg_sel_all_idel_gate,
118
   output [3:0]                            dbg_calib_done,
119
   output [3:0]                            dbg_calib_err,
120
   output [(6*DQ_WIDTH)-1:0]               dbg_calib_dq_tap_cnt,
121
   output [(6*DQS_WIDTH)-1:0]              dbg_calib_dqs_tap_cnt,
122
   output [(6*DQS_WIDTH)-1:0]              dbg_calib_gate_tap_cnt,
123
   output [DQS_WIDTH-1:0]                  dbg_calib_rd_data_sel,
124
   output [(5*DQS_WIDTH)-1:0]              dbg_calib_rden_dly,
125
   output [(5*DQS_WIDTH)-1:0]              dbg_calib_gate_dly
126
   );
127
 
128
  // minimum time (in IDELAY taps) for which capture data must be stable for
129
  // algorithm to consider
130
  localparam MIN_WIN_SIZE = 5;
131
  // IDEL_SET_VAL = (# of cycles - 1) to wait after changing IDELAY value
132
  // we only have to wait enough for input with new IDELAY value to
133
  // propagate through pipeline stages.
134
  localparam IDEL_SET_VAL = 3'b111;
135
  // # of clock cycles to delay read enable to determine if read data pattern
136
  // is correct for stage 3/4 (RDEN, DQS gate) calibration
137
  localparam CALIB_RDEN_PIPE_LEN = 31;
138
  // translate CAS latency into number of clock cycles for read valid delay
139
  // determination. Really only needed for CL = 2.5 (set to 2)
140
  localparam CAS_LAT_RDEN = (CAS_LAT == 25) ? 2 : CAS_LAT;
141
  // an SRL32 is used to delay CTRL_RDEN to generate read valid signal. This
142
  // is min possible value delay through SRL32 can be
143
  localparam RDEN_BASE_DELAY = CAS_LAT_RDEN + ADDITIVE_LAT + REG_ENABLE;
144
  // an SRL32 is used to delay the CTRL_RDEN from the read postamble DQS
145
  // gate. This is min possible value the SRL32 delay can be:
146
  //  - Delay from end of deassertion of CTRL_RDEN to last falling edge of
147
  //    read burst = 3.5 (CTRL_RDEN -> CAS delay) + 3 (min CAS latency) = 6.5
148
  //  - Minimum time for DQS gate circuit to be generated:
149
  //      * 1 cyc to register CTRL_RDEN from controller
150
  //      * 1 cyc after RDEN_CTRL falling edge
151
  //      * 1 cyc min through SRL32
152
  //      * 1 cyc through SRL32 output flop
153
  //      * 0 (<1) cyc of synchronization to DQS domain via IDELAY
154
  //      * 1 cyc of delay through IDDR to generate CE to DQ IDDR's
155
  //    Total = 5 cyc < 6.5 cycles
156
  //    The total should be less than 5.5 cycles to account prop delays
157
  //    adding one cycle to the synchronization time via the IDELAY.
158
  //    NOTE: Value differs because of optional pipeline register added
159
  //      for case of RDEN_BASE_DELAY > 3 to improve timing
160
  localparam GATE_BASE_DELAY = RDEN_BASE_DELAY - 3;
161
  localparam GATE_BASE_INIT = (GATE_BASE_DELAY <= 1) ? 0 : GATE_BASE_DELAY;
162
  // used for RDEN calibration: difference between shift value used during
163
  // calibration, and shift value for actual RDEN SRL. Only applies when
164
  // RDEN edge is immediately captured by CLKDIV0. If not (depends on phase
165
  // of CLK0 and CLKDIV0 when RDEN is asserted), then add 1 to this value.
166
  localparam CAL3_RDEN_SRL_DLY_DELTA = 6;
167
  // fix minimum value of DQS to be 1 to handle the case where's there's only
168
  // one DQS group. We could also enforce that user always inputs minimum
169
  // value of 1 for DQS_BITS (even when DQS_WIDTH=1). Leave this as safeguard
170
  // Assume we don't have to do this for DQ, DQ_WIDTH always > 1
171
  localparam DQS_BITS_FIX = (DQS_BITS == 0) ? 1 : DQS_BITS;
172
  // how many taps to "pre-delay" DQ before stg 1 calibration - not needed for
173
  // current calibration, but leave for debug
174
  localparam DQ_IDEL_INIT = 6'b000000;
175
  // # IDELAY taps per bit time (i.e. half cycle). Limit to 63.
176
  localparam integer BIT_TIME_TAPS = (CLK_PERIOD/150 < 64) ?
177
             CLK_PERIOD/150 : 63;
178
 
179
  // used in various places during stage 4 cal: (1) determines maximum taps
180
  // to increment when finding right edge, (2) amount to decrement after
181
  // finding left edge, (3) amount to increment after finding right edge
182
  localparam CAL4_IDEL_BIT_VAL = (BIT_TIME_TAPS >= 6'b100000) ?
183
             6'b100000 : BIT_TIME_TAPS;
184
 
185
  localparam CAL1_IDLE                   = 4'h0;
186
  localparam CAL1_INIT                   = 4'h1;
187
  localparam CAL1_INC_IDEL               = 4'h2;
188
  localparam CAL1_FIND_FIRST_EDGE        = 4'h3;
189
  localparam CAL1_FIRST_EDGE_IDEL_WAIT   = 4'h4;
190
  localparam CAL1_FOUND_FIRST_EDGE_WAIT  = 4'h5;
191
  localparam CAL1_FIND_SECOND_EDGE       = 4'h6;
192
  localparam CAL1_SECOND_EDGE_IDEL_WAIT  = 4'h7;
193
  localparam CAL1_CALC_IDEL              = 4'h8;
194
  localparam CAL1_DEC_IDEL               = 4'h9;
195
  localparam CAL1_DONE                   = 4'hA;
196
 
197
  localparam CAL2_IDLE                    = 4'h0;
198
  localparam CAL2_INIT                    = 4'h1;
199
  localparam CAL2_INIT_IDEL_WAIT          = 4'h2;
200
  localparam CAL2_FIND_EDGE_POS           = 4'h3;
201
  localparam CAL2_FIND_EDGE_IDEL_WAIT_POS = 4'h4;
202
  localparam CAL2_FIND_EDGE_NEG           = 4'h5;
203
  localparam CAL2_FIND_EDGE_IDEL_WAIT_NEG = 4'h6;
204
  localparam CAL2_DEC_IDEL                = 4'h7;
205
  localparam CAL2_DONE                    = 4'h8;
206
 
207
  localparam CAL3_IDLE                    = 3'h0;
208
  localparam CAL3_INIT                    = 3'h1;
209
  localparam CAL3_DETECT                  = 3'h2;
210
  localparam CAL3_RDEN_PIPE_CLR_WAIT      = 3'h3;
211
  localparam CAL3_DONE                    = 3'h4;
212
 
213
  localparam CAL4_IDLE                    = 3'h0;
214
  localparam CAL4_INIT                    = 3'h1;
215
  localparam CAL4_FIND_WINDOW             = 3'h2;
216
  localparam CAL4_FIND_EDGE               = 3'h3;
217
  localparam CAL4_IDEL_WAIT               = 3'h4;
218
  localparam CAL4_RDEN_PIPE_CLR_WAIT      = 3'h5;
219
  localparam CAL4_ADJ_IDEL                = 3'h6;
220
  localparam CAL4_DONE                    = 3'h7;
221
 
222
  integer                        i, j;
223
 
224
  reg [5:0]                      cal1_bit_time_tap_cnt;
225
  reg [1:0]                      cal1_data_chk_last;
226
  reg                            cal1_data_chk_last_valid;
227
  reg [1:0]                      cal1_data_chk_r;
228
  reg                            cal1_dlyce_dq;
229
  reg                            cal1_dlyinc_dq;
230
  reg                            cal1_dqs_dq_init_phase;
231
  reg                            cal1_detect_edge;
232
  reg                            cal1_detect_stable;
233
  reg                            cal1_found_second_edge;
234
  reg                            cal1_found_rising;
235
  reg                            cal1_found_window;
236
  reg                            cal1_first_edge_done;
237
  reg [5:0]                      cal1_first_edge_tap_cnt;
238
  reg [6:0]                      cal1_idel_dec_cnt;
239
  reg [5:0]                      cal1_idel_inc_cnt;
240
  reg [5:0]                      cal1_idel_max_tap;
241
  reg                            cal1_idel_max_tap_we;
242
  reg [5:0]                      cal1_idel_tap_cnt;
243
  reg                            cal1_idel_tap_limit_hit;
244
  reg [6:0]                      cal1_low_freq_idel_dec;
245
  reg                            cal1_ref_req;
246
  wire                           cal1_refresh;
247
  reg [3:0]                      cal1_state;
248
  reg [3:0]                      cal1_window_cnt;
249
  reg                            cal2_curr_sel;
250
  wire                           cal2_detect_edge;
251
  reg                            cal2_dlyce_dqs;
252
  reg                            cal2_dlyinc_dqs;
253
  reg [5:0]                      cal2_idel_dec_cnt;
254
  reg [5:0]                      cal2_idel_tap_cnt;
255
  reg [5:0]                      cal2_idel_tap_limit;
256
  reg                            cal2_idel_tap_limit_hit;
257
  reg                            cal2_rd_data_fall_last_neg;
258
  reg                            cal2_rd_data_fall_last_pos;
259
  reg                            cal2_rd_data_last_valid_neg;
260
  reg                            cal2_rd_data_last_valid_pos;
261
  reg                            cal2_rd_data_rise_last_neg;
262
  reg                            cal2_rd_data_rise_last_pos;
263
  reg [DQS_WIDTH-1:0]            cal2_rd_data_sel;
264
  wire                           cal2_rd_data_sel_edge;
265
  reg [DQS_WIDTH-1:0]            cal2_rd_data_sel_r;
266
  reg                            cal2_ref_req;
267
  reg [3:0]                      cal2_state;
268
  reg                            cal3_data_match;
269
  reg                            cal3_data_match_stgd;
270
  wire                           cal3_data_valid;
271
  wire                           cal3_match_found;
272
  wire [4:0]                     cal3_rden_dly;
273
  reg [4:0]                      cal3_rden_srl_a;
274
  reg [2:0]                      cal3_state;
275
  wire                           cal4_data_good;
276
  reg                            cal4_data_match;
277
  reg                            cal4_data_match_stgd;
278
  wire                           cal4_data_valid;
279
  reg                            cal4_dlyce_gate;
280
  reg                            cal4_dlyinc_gate;
281
  reg                            cal4_dlyrst_gate;
282
  reg [4:0]                      cal4_gate_srl_a;
283
  reg [5:0]                      cal4_idel_adj_cnt;
284
  reg                            cal4_idel_adj_inc;
285
  reg                            cal4_idel_bit_tap;
286
  reg [5:0]                      cal4_idel_tap_cnt;
287
  reg                            cal4_idel_max_tap;
288
  reg [4:0]                      cal4_rden_srl_a;
289
  reg                            cal4_ref_req;
290
  reg                            cal4_seek_left;
291
  reg                            cal4_stable_window;
292
  reg [2:0]                      cal4_state;
293
  reg [3:0]                      cal4_window_cnt;
294
  reg [3:0]                      calib_done_tmp;         // only for stg1/2/4
295
  reg                            calib_ctrl_gate_pulse_r;
296
  reg                            calib_ctrl_rden;
297
  reg                            calib_ctrl_rden_r;
298
  wire                           calib_ctrl_rden_negedge;
299
  reg                            calib_ctrl_rden_negedge_r;
300
  reg [3:0]                      calib_done_r;
301
  reg [3:0]                      calib_err;
302
  reg [1:0]                      calib_err_2;
303
  wire                           calib_init_gate_pulse;
304
  reg                            calib_init_gate_pulse_r;
305
  reg                            calib_init_gate_pulse_r1;
306
  reg                            calib_init_rden;
307
  reg                            calib_init_rden_r;
308
  reg [4:0]                      calib_rden_srl_a;
309
  wire [4:0]                     calib_rden_srl_a_r;
310
  reg [(5*DQS_WIDTH)-1:0]        calib_rden_dly;
311
  reg                            calib_rden_edge_r;
312
  reg [4:0]                      calib_rden_pipe_cnt;
313
  wire                           calib_rden_srl_out;
314
  wire                           calib_rden_srl_out_r;
315
  reg                            calib_rden_srl_out_r1;
316
  reg                            calib_rden_valid;
317
  reg                            calib_rden_valid_stgd;
318
  reg [DQ_BITS-1:0]              count_dq;
319
  reg [DQS_BITS_FIX-1:0]         count_dqs;
320
  reg [DQS_BITS_FIX-1:0]         count_gate;
321
  reg [DQS_BITS_FIX-1:0]         count_rden;
322
  reg                            ctrl_rden_r;
323
  wire                           dlyce_or;
324
  reg [(5*DQS_WIDTH)-1:0]        gate_dly;
325
  wire [(5*DQS_WIDTH)-1:0]       gate_dly_r;
326
  wire                           gate_srl_in;
327
  wire [DQS_WIDTH-1:0]           gate_srl_out;
328
  wire [DQS_WIDTH-1:0]           gate_srl_out_r;
329
  reg [2:0]                      idel_set_cnt;
330
  wire                           idel_set_wait;
331
  reg [DQ_BITS-1:0]              next_count_dq;
332
  reg [DQS_BITS_FIX-1:0]         next_count_dqs;
333
  reg [DQS_BITS_FIX-1:0]         next_count_gate;
334
  reg                            phy_init_rden_r;
335
  reg                            phy_init_rden_r1;
336
  reg [DQ_WIDTH-1:0]             rd_data_fall_1x_r;
337
  reg [DQS_WIDTH-1:0]            rd_data_fall_1x_r1;
338
  reg [DQS_WIDTH-1:0]            rd_data_fall_2x_r;
339
  wire [DQS_WIDTH-1:0]           rd_data_fall_chk_q1;
340
  wire [DQS_WIDTH-1:0]           rd_data_fall_chk_q2;
341
  reg [DQ_WIDTH-1:0]             rd_data_rise_1x_r;
342
  reg [DQS_WIDTH-1:0]            rd_data_rise_1x_r1;
343
  reg [DQS_WIDTH-1:0]            rd_data_rise_2x_r;
344
  wire [DQS_WIDTH-1:0]           rd_data_rise_chk_q1;
345
  wire [DQS_WIDTH-1:0]           rd_data_rise_chk_q2;
346
  reg                            rdd_fall_q1;
347
  reg                            rdd_fall_q1_r;
348
  reg                            rdd_fall_q1_r1;
349
  reg                            rdd_fall_q2;
350
  reg                            rdd_fall_q2_r;
351
  reg                            rdd_rise_q1;
352
  reg                            rdd_rise_q1_r;
353
  reg                            rdd_rise_q1_r1;
354
  reg                            rdd_rise_q2;
355
  reg                            rdd_rise_q2_r;
356
  reg [DQS_BITS_FIX-1:0]         rdd_mux_sel;
357
  reg                            rden_dec;
358
  reg [(5*DQS_WIDTH)-1:0]        rden_dly;
359
  wire [(5*DQS_WIDTH)-1:0]       rden_dly_r;
360
  reg [4:0]                      rden_dly_0;
361
  reg                            rden_inc;
362
  reg [DQS_WIDTH-1:0]            rden_mux;
363
  wire [DQS_WIDTH-1:0]           rden_srl_out;
364
 
365
  // Debug
366
  integer                        x;
367
  reg [5:0]                      dbg_dq_tap_cnt [DQ_WIDTH-1:0];
368
  reg [5:0]                      dbg_dqs_tap_cnt [DQS_WIDTH-1:0];
369
  reg [5:0]                      dbg_gate_tap_cnt [DQS_WIDTH-1:0];
370
 
371
  //***************************************************************************
372
  // Debug output ("dbg_phy_calib_*")
373
  // NOTES:
374
  //  1. All debug outputs coming out of PHY_CALIB are clocked off CLKDIV0,
375
  //     although they are also static after calibration is complete. This
376
  //     means the user can either connect them to a Chipscope ILA, or to
377
  //     either a sync/async VIO input block. Using an async VIO has the
378
  //     advantage of not requiring these paths to meet cycle-to-cycle timing.
379
  //  2. The widths of most of these debug buses are dependent on the # of
380
  //     DQS/DQ bits (e.g. dq_tap_cnt width = 6 * (# of DQ bits)
381
  // SIGNAL DESCRIPTION:
382
  //  1. calib_done:   4 bits - each one asserted as each phase of calibration
383
  //                   is completed.
384
  //  2. calib_err:    4 bits - each one asserted when a calibration error
385
  //                   encountered for that stage. Some of these bits may not
386
  //                   be used (not all cal stages report an error).
387
  //  3. dq_tap_cnt:   final IDELAY tap counts for all DQ IDELAYs
388
  //  4. dqs_tap_cnt:  final IDELAY tap counts for all DQS IDELAYs
389
  //  5. gate_tap_cnt: final IDELAY tap counts for all DQS gate
390
  //                   synchronization IDELAYs
391
  //  6. rd_data_sel:  final read capture MUX (either "positive" or "negative"
392
  //                   edge capture) settings for all DQS groups
393
  //  7. rden_dly:     related to # of cycles after issuing a read until when
394
  //                   read data is valid - for all DQS groups
395
  //  8. gate_dly:     related to # of cycles after issuing a read until when
396
  //                   clock enable for all DQ's is deasserted to prevent
397
  //                   effect of DQS postamble glitch - for all DQS groups
398
  //***************************************************************************
399
 
400
  //*****************************************************************
401
  // Record IDELAY tap values by "snooping" IDELAY control signals
402
  //*****************************************************************
403
 
404
  // record DQ IDELAY tap values
405
  genvar dbg_dq_tc_i;
406
  generate
407
    for (dbg_dq_tc_i = 0; dbg_dq_tc_i < DQ_WIDTH;
408
         dbg_dq_tc_i = dbg_dq_tc_i + 1) begin: gen_dbg_dq_tap_cnt
409
      assign dbg_calib_dq_tap_cnt[(6*dbg_dq_tc_i)+5:(6*dbg_dq_tc_i)]
410
               = dbg_dq_tap_cnt[dbg_dq_tc_i];
411
      always @(posedge clkdiv)
412
        if (rstdiv | dlyrst_dq)
413
          dbg_dq_tap_cnt[dbg_dq_tc_i] <= 6'b000000;
414
        else
415
          if (dlyce_dq[dbg_dq_tc_i])
416
            if (dlyinc_dq[dbg_dq_tc_i])
417
              dbg_dq_tap_cnt[dbg_dq_tc_i]
418
                <= dbg_dq_tap_cnt[dbg_dq_tc_i] + 1;
419
            else
420
              dbg_dq_tap_cnt[dbg_dq_tc_i]
421
                <= dbg_dq_tap_cnt[dbg_dq_tc_i] - 1;
422
    end
423
  endgenerate
424
 
425
  // record DQS IDELAY tap values
426
  genvar dbg_dqs_tc_i;
427
  generate
428
    for (dbg_dqs_tc_i = 0; dbg_dqs_tc_i < DQS_WIDTH;
429
         dbg_dqs_tc_i = dbg_dqs_tc_i + 1) begin: gen_dbg_dqs_tap_cnt
430
      assign dbg_calib_dqs_tap_cnt[(6*dbg_dqs_tc_i)+5:(6*dbg_dqs_tc_i)]
431
               = dbg_dqs_tap_cnt[dbg_dqs_tc_i];
432
      always @(posedge clkdiv)
433
        if (rstdiv | dlyrst_dqs)
434
          dbg_dqs_tap_cnt[dbg_dqs_tc_i] <= 6'b000000;
435
        else
436
          if (dlyce_dqs[dbg_dqs_tc_i])
437
            if (dlyinc_dqs[dbg_dqs_tc_i])
438
              dbg_dqs_tap_cnt[dbg_dqs_tc_i]
439
                <= dbg_dqs_tap_cnt[dbg_dqs_tc_i] + 1;
440
            else
441
              dbg_dqs_tap_cnt[dbg_dqs_tc_i]
442
                <= dbg_dqs_tap_cnt[dbg_dqs_tc_i] - 1;
443
    end
444
  endgenerate
445
 
446
  // record DQS gate IDELAY tap values
447
  genvar dbg_gate_tc_i;
448
  generate
449
    for (dbg_gate_tc_i = 0; dbg_gate_tc_i < DQS_WIDTH;
450
         dbg_gate_tc_i = dbg_gate_tc_i + 1) begin: gen_dbg_gate_tap_cnt
451
      assign dbg_calib_gate_tap_cnt[(6*dbg_gate_tc_i)+5:(6*dbg_gate_tc_i)]
452
               = dbg_gate_tap_cnt[dbg_gate_tc_i];
453
      always @(posedge clkdiv)
454
        if (rstdiv | dlyrst_gate[dbg_gate_tc_i])
455
          dbg_gate_tap_cnt[dbg_gate_tc_i] <= 6'b000000;
456
        else
457
          if (dlyce_gate[dbg_gate_tc_i])
458
            if (dlyinc_gate[dbg_gate_tc_i])
459
              dbg_gate_tap_cnt[dbg_gate_tc_i]
460
                <= dbg_gate_tap_cnt[dbg_gate_tc_i] + 1;
461
            else
462
              dbg_gate_tap_cnt[dbg_gate_tc_i]
463
                <= dbg_gate_tap_cnt[dbg_gate_tc_i] - 1;
464
    end
465
  endgenerate
466
 
467
  assign dbg_calib_done        = calib_done;
468
  assign dbg_calib_err         = calib_err;
469
  assign dbg_calib_rd_data_sel = cal2_rd_data_sel;
470
  assign dbg_calib_rden_dly    = rden_dly;
471
  assign dbg_calib_gate_dly    = gate_dly;
472
 
473
  //***************************************************************************
474
  // Read data pipelining, and read data "ISERDES" data width expansion
475
  //***************************************************************************
476
 
477
  // For all data bits, register incoming capture data to slow clock to improve
478
  // timing. Adding single pipeline stage does not affect functionality (as
479
  // long as we make sure to wait extra clock cycle after changing DQ IDELAY)
480
  // Also note in this case that we're "missing" every other clock cycle's
481
  // worth of data capture since we're sync'ing to the slow clock. This is
482
  // fine for stage 1 and stage 2 cal, but not for stage 3 and 4 (see below
483
  // for different circuit to handle those stages)
484
  always @(posedge clkdiv) begin
485
    rd_data_rise_1x_r <= rd_data_rise;
486
    rd_data_fall_1x_r <= rd_data_fall;
487
  end
488
 
489
  // For every DQ_PER_DQS bit, generate what is essentially a ISERDES-type
490
  // data width expander. Will need this for stage 3 and 4 cal, where we need
491
  // to compare data over consecutive clock cycles. We can also use this for
492
  // stage 2 as well (stage 2 doesn't require every bit to be looked at, only
493
  // one bit per DQS group)
494
  genvar rdd_i;
495
  generate
496
    for (rdd_i = 0; rdd_i < DQS_WIDTH; rdd_i = rdd_i + 1) begin: gen_rdd
497
      // first stage: keep data in fast clk domain. Store data over two
498
      // consecutive clock cycles for rise/fall data for proper transfer
499
      // to slow clock domain
500
      always @(posedge clk) begin
501
        rd_data_rise_2x_r[rdd_i] <= rd_data_rise[(rdd_i*DQ_PER_DQS)];
502
        rd_data_fall_2x_r[rdd_i] <= rd_data_fall[(rdd_i*DQ_PER_DQS)];
503
      end
504
      // second stage, register first stage to slow clock domain, 2nd stage
505
      // consists of both these flops, and the rd_data_rise_1x_r flops
506
      always @(posedge clkdiv) begin
507
        rd_data_rise_1x_r1[rdd_i] <= rd_data_rise_2x_r[rdd_i];
508
        rd_data_fall_1x_r1[rdd_i] <= rd_data_fall_2x_r[rdd_i];
509
      end
510
      // now we have four outputs - representing rise/fall outputs over last
511
      // 2 fast clock cycles. However, the ordering these represent can either
512
      // be: (1) Q2 = data @ time = n, Q1 = data @ time = n+1, or (2)
513
      // Q2 = data @ time = n - 1, Q1 = data @ time = n (and data at [Q1,Q2]
514
      // is "staggered") - leave it up to the stage of calibration using this
515
      // to figure out which is which, if they care at all (e.g. stage 2 cal
516
      // doesn't care about the ordering)
517
      assign rd_data_rise_chk_q1[rdd_i]
518
               = rd_data_rise_1x_r[(rdd_i*DQ_PER_DQS)];
519
      assign rd_data_rise_chk_q2[rdd_i]
520
               = rd_data_rise_1x_r1[rdd_i];
521
      assign rd_data_fall_chk_q1[rdd_i]
522
               = rd_data_fall_1x_r[(rdd_i*DQ_PER_DQS)];
523
      assign rd_data_fall_chk_q2[rdd_i]
524
               = rd_data_fall_1x_r1[rdd_i];
525
    end
526
  endgenerate
527
 
528
  //*****************************************************************
529
  // Outputs of these simplified ISERDES circuits then feed MUXes based on
530
  // which DQ the current calibration algorithm needs to look at
531
  //*****************************************************************
532
 
533
  // generate MUX control; assume that adding an extra pipeline stage isn't
534
  // an issue - whatever stage cal logic is using output of MUX will wait
535
  // enough time after changing it
536
  always @(posedge clkdiv) begin
537
    (* full_case, parallel_case *) case (calib_done[2:0])
538
      3'b001: rdd_mux_sel <= next_count_dqs;
539
      3'b011: rdd_mux_sel <= count_rden;
540
      3'b111: rdd_mux_sel <= next_count_gate;
541
    endcase
542
  end
543
 
544
  always @(posedge clkdiv) begin
545
    rdd_rise_q1 <= rd_data_rise_chk_q1[rdd_mux_sel];
546
    rdd_rise_q2 <= rd_data_rise_chk_q2[rdd_mux_sel];
547
    rdd_fall_q1 <= rd_data_fall_chk_q1[rdd_mux_sel];
548
    rdd_fall_q2 <= rd_data_fall_chk_q2[rdd_mux_sel];
549
  end
550
 
551
  //***************************************************************************
552
  // Demultiplexor to control (reset, increment, decrement) IDELAY tap values
553
  //   For DQ:
554
  //     STG1: for per-bit-deskew, only inc/dec the current DQ. For non-per
555
  //       deskew, increment all bits in the current DQS set
556
  //     STG2: inc/dec all DQ's in the current DQS set.
557
  // NOTE: Nice to add some error checking logic here (or elsewhere in the
558
  //       code) to check if logic attempts to overflow tap value
559
  //***************************************************************************
560
 
561
  // don't use DLYRST to reset value of IDELAY after reset. Need to change this
562
  // if we want to allow user to recalibrate after initial reset
563
  always @(posedge clkdiv)
564
    if (rstdiv) begin
565
      dlyrst_dq <= 1'b1;
566
      dlyrst_dqs <= 1'b1;
567
    end else begin
568
      dlyrst_dq <= 1'b0;
569
      dlyrst_dqs <= 1'b0;
570
    end
571
 
572
  always @(posedge clkdiv) begin
573
    if (rstdiv) begin
574
      dlyce_dq   <= 'b0;
575
      dlyinc_dq  <= 'b0;
576
      dlyce_dqs  <= 'b0;
577
      dlyinc_dqs <= 'b0;
578
    end else begin
579
      dlyce_dq   <= 'b0;
580
      dlyinc_dq  <= 'b0;
581
      dlyce_dqs  <= 'b0;
582
      dlyinc_dqs <= 'b0;
583
 
584
      // stage 1 cal: change only specified DQ
585
      if (cal1_dlyce_dq) begin
586
        if (SIM_ONLY == 0) begin
587
          dlyce_dq[count_dq] <= 1'b1;
588
          dlyinc_dq[count_dq] <= cal1_dlyinc_dq;
589
        end else begin
590
          // if simulation, then calibrate only first DQ, apply results
591
          // to all DQs (i.e. assume delay on all DQs is the same)
592
          for (i = 0; i < DQ_WIDTH; i = i + 1) begin: loop_sim_dq_dly
593
            dlyce_dq[i] <= 1'b1;
594
            dlyinc_dq[i] <= cal1_dlyinc_dq;
595
          end
596
        end
597
      end else if (cal2_dlyce_dqs) begin
598
        // stage 2 cal: change DQS and all corresponding DQ's
599
        if (SIM_ONLY == 0) begin
600
          dlyce_dqs[count_dqs] <= 1'b1;
601
          dlyinc_dqs[count_dqs] <= cal2_dlyinc_dqs;
602
          for (i = 0; i < DQ_PER_DQS; i = i + 1) begin: loop_dqs_dly
603
            dlyce_dq[(DQ_PER_DQS*count_dqs)+i] <= 1'b1;
604
            dlyinc_dq[(DQ_PER_DQS*count_dqs)+i] <= cal2_dlyinc_dqs;
605
          end
606
        end else begin
607
          for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_dqs_dly
608
            // if simulation, then calibrate only first DQS
609
            dlyce_dqs[i] <= 1'b1;
610
            dlyinc_dqs[i] <= cal2_dlyinc_dqs;
611
            for (j = 0; j < DQ_PER_DQS; j = j + 1) begin: loop_sim_dq_dqs_dly
612
              dlyce_dq[(DQ_PER_DQS*i)+j] <= 1'b1;
613
              dlyinc_dq[(DQ_PER_DQS*i)+j] <= cal2_dlyinc_dqs;
614
            end
615
          end
616
        end
617
      end else if (DEBUG_EN != 0) begin
618
        // DEBUG: allow user to vary IDELAY tap settings
619
        // For DQ IDELAY taps
620
        if (dbg_idel_up_all || dbg_idel_down_all ||
621
            dbg_sel_all_idel_dq) begin
622
          for (x = 0; x < DQ_WIDTH; x = x + 1) begin: loop_dly_inc_dq
623
            dlyce_dq[x] <= dbg_idel_up_all | dbg_idel_down_all |
624
                           dbg_idel_up_dq  | dbg_idel_down_dq;
625
            dlyinc_dq[x] <= dbg_idel_up_all | dbg_idel_up_dq;
626
          end
627
        end else begin
628
          dlyce_dq <= 'b0;
629
          dlyce_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq |
630
                                       dbg_idel_down_dq;
631
          dlyinc_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq;
632
        end
633
        // For DQS IDELAY taps
634
        if (dbg_idel_up_all || dbg_idel_down_all ||
635
            dbg_sel_all_idel_dqs) begin
636
          for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_dqs
637
            dlyce_dqs[x] <= dbg_idel_up_all | dbg_idel_down_all |
638
                            dbg_idel_up_dqs | dbg_idel_down_dqs;
639
            dlyinc_dqs[x] <= dbg_idel_up_all | dbg_idel_up_dqs;
640
          end
641
        end else begin
642
          dlyce_dqs <= 'b0;
643
          dlyce_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs |
644
                                         dbg_idel_down_dqs;
645
          dlyinc_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs;
646
        end
647
      end
648
    end
649
  end
650
 
651
  // GATE synchronization is handled directly by Stage 4 calibration FSM
652
  always @(posedge clkdiv)
653
    if (rstdiv) begin
654
      dlyrst_gate <= {DQS_WIDTH{1'b1}};
655
      dlyce_gate  <= {DQS_WIDTH{1'b0}};
656
      dlyinc_gate <= {DQS_WIDTH{1'b0}};
657
    end else begin
658
      dlyrst_gate <= {DQS_WIDTH{1'b0}};
659
      dlyce_gate  <= {DQS_WIDTH{1'b0}};
660
      dlyinc_gate <= {DQS_WIDTH{1'b0}};
661
 
662
      if (cal4_dlyrst_gate) begin
663
        if (SIM_ONLY == 0)
664
          dlyrst_gate[count_gate] <= 1'b1;
665
        else
666
          for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly_rst
667
            dlyrst_gate[i] <= 1'b1;
668
          end
669
      end
670
 
671
      if (cal4_dlyce_gate) begin
672
        if (SIM_ONLY == 0) begin
673
          dlyce_gate[count_gate]  <= 1'b1;
674
          dlyinc_gate[count_gate] <= cal4_dlyinc_gate;
675
        end else begin
676
          // if simulation, then calibrate only first gate
677
          for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly
678
            dlyce_gate[i]  <= 1'b1;
679
            dlyinc_gate[i] <= cal4_dlyinc_gate;
680
          end
681
        end
682
      end else if (DEBUG_EN != 0) begin
683
        // DEBUG: allow user to vary IDELAY tap settings
684
        if (dbg_idel_up_all || dbg_idel_down_all ||
685
            dbg_sel_all_idel_gate) begin
686
          for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_gate
687
            dlyce_gate[x] <= dbg_idel_up_all | dbg_idel_down_all |
688
                             dbg_idel_up_gate | dbg_idel_down_gate;
689
            dlyinc_gate[x] <= dbg_idel_up_all | dbg_idel_up_gate;
690
          end
691
        end else begin
692
          dlyce_gate <= {DQS_WIDTH{1'b0}};
693
          dlyce_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate |
694
                                           dbg_idel_down_gate;
695
          dlyinc_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate;
696
        end
697
      end
698
    end
699
 
700
  //***************************************************************************
701
  // signal to tell calibration state machines to wait and give IDELAY time to
702
  // settle after it's value is changed (both time for IDELAY chain to settle,
703
  // and for settled output to propagate through ISERDES). For general use: use
704
  // for any calibration state machines that modify any IDELAY.
705
  // Should give at least enough time for IDELAY output to settle (technically
706
  // for V5, this should be "glitchless" when IDELAY taps are changed, so don't
707
  // need any time here), and also time for new data to propagate through both
708
  // ISERDES and the "RDD" MUX + associated pipelining
709
  // For now, give very "generous" delay - doesn't really matter since only
710
  // needed during calibration
711
  //***************************************************************************
712
 
713
  // determine if calibration polarity has changed
714
  always @(posedge clkdiv)
715
    cal2_rd_data_sel_r   <= cal2_rd_data_sel;
716
 
717
  assign cal2_rd_data_sel_edge = |(cal2_rd_data_sel ^ cal2_rd_data_sel_r);
718
 
719
  // combine requests to modify any of the IDELAYs into one. Also when second
720
  // stage capture "edge" polarity is changed (IDELAY isn't changed in this
721
  // case, but use the same counter to stall cal logic)
722
  assign dlyce_or = cal1_dlyce_dq |
723
                    cal2_dlyce_dqs |
724
                    cal2_rd_data_sel_edge |
725
                    cal4_dlyce_gate |
726
                    cal4_dlyrst_gate;
727
 
728
  // SYN_NOTE: Can later recode to avoid combinational path
729
  assign idel_set_wait = dlyce_or || (idel_set_cnt != IDEL_SET_VAL);
730
 
731
  always @(posedge clkdiv)
732
    if (rstdiv)
733
      idel_set_cnt <= 4'b0000;
734
    else if (dlyce_or)
735
      idel_set_cnt <= 4'b0000;
736
    else if (idel_set_cnt != IDEL_SET_VAL)
737
      idel_set_cnt <= idel_set_cnt + 1;
738
 
739
  // generate request to PHY_INIT logic to issue auto-refresh
740
  // used by certain states to force prech/auto-refresh part way through
741
  // calibration to avoid a tRAS violation (which will happen if that
742
  // stage of calibration lasts long enough). This signal must meet the
743
  // following requirements: (1) only transition from 0->1 when the refresh
744
  // request is needed, (2) stay at 1 and only transition 1->0 when
745
  // CALIB_REF_DONE is asserted
746
  always @(posedge clkdiv)
747
    if (rstdiv)
748
      calib_ref_req <= 1'b0;
749
    else
750
      calib_ref_req <= cal1_ref_req | cal2_ref_req  | cal4_ref_req;
751
 
752
  // stage 1 calibration requests auto-refresh every 4 bits
753
  generate
754
    if (DQ_BITS < 2) begin: gen_cal1_refresh_dq_lte4
755
      assign cal1_refresh = 1'b0;
756
    end else begin: gen_cal1_refresh_dq_gt4
757
      assign cal1_refresh = (next_count_dq[1:0] == 2'b00);
758
    end
759
  endgenerate
760
 
761
  //***************************************************************************
762
  // First stage calibration: DQ-DQS
763
  // Definitions:
764
  //  edge: detected when varying IDELAY, and current capture data != prev
765
  //    capture data
766
  //  valid bit window: detected when current capture data == prev capture
767
  //    data for more than half the bit time
768
  //  starting conditions for DQS-DQ phase:
769
  //    case 1: when DQS starts somewhere in rising edge bit window, or
770
  //      on the right edge of the rising bit window.
771
  //    case 2: when DQS starts somewhere in falling edge bit window, or
772
  //      on the right edge of the falling bit window.
773
  // Algorithm Description:
774
  //  1. Increment DQ IDELAY until we find an edge.
775
  //  2. While we're finding the first edge, note whether a valid bit window
776
  //     has been detected before we found an edge. If so, then figure out if
777
  //     this is the rising or falling bit window. If rising, then our starting
778
  //     DQS-DQ phase is case 1. If falling, then it's case 2. If don't detect
779
  //     a valid bit window, then we must have started on the edge of a window.
780
  //     Need to wait until later on to decide which case we are.
781
  //       - Store FIRST_EDGE IDELAY value
782
  //  3. Now look for second edge.
783
  //  4. While we're finding the second edge, note whether valid bit window
784
  //     is detected. If so, then use to, along with results from (2) to figure
785
  //     out what the starting case is. If in rising bit window, then we're in
786
  //     case 2. If falling, then case 1.
787
  //       - Store SECOND_EDGE IDELAY value
788
  //     NOTES:
789
  //       a. Finding two edges allows us to calculate the bit time (although
790
  //          not the "same" bit time polarity - need to investigate this
791
  //          more).
792
  //       b. If we run out of taps looking for the second edge, then the bit
793
  //       time must be too long (>= 2.5ns, and DQS-DQ starting phase must be
794
  //       case 1).
795
  //  5. Calculate absolute amount to delay DQ as:
796
  //       If second edge found, and case 1:
797
  //         - DQ_IDELAY = FIRST_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
798
  //       If second edge found, and case 2:
799
  //         - DQ_IDELAY = SECOND_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
800
  //       If second edge not found, then need to make an approximation on
801
  //       how much to shift by (should be okay, because we have more timing
802
  //       margin):
803
  //         - DQ_IDELAY = FIRST_EDGE - 0.5 * (bit_time)
804
  //     NOTE: Does this account for either case 1 or case 2?????
805
  //     NOTE: It's also possible even when we find the second edge, that
806
  //           to instead just use half the bit time to subtract from either
807
  //           FIRST or SECOND_EDGE. Finding the actual bit time (which is
808
  //           what (SECOND_EDGE - FIRST_EDGE) is, is slightly more accurate,
809
  //           since it takes into account duty cycle distortion.
810
  //  6. Repeat for each DQ in current DQS set.
811
  //***************************************************************************
812
 
813
  //*****************************************************************
814
  // for first stage calibration - used for checking if DQS is aligned to the
815
  // particular DQ, such that we're in the data valid window. Basically, this
816
  // is one giant MUX.
817
  //  = [falling data, rising data]
818
  //  = [0, 1] = rising DQS aligned in proper (rising edge) bit window
819
  //  = [1, 0] = rising DQS aligned in wrong (falling edge) bit window
820
  //  = [0, 0], or [1,1] = in uncertain region between windows
821
  //*****************************************************************
822
 
823
  // SYN_NOTE: May have to split this up into multiple levels - MUX can get
824
  //  very wide - as wide as the data bus width
825
  always @(posedge clkdiv)
826
    cal1_data_chk_r <= {rd_data_fall_1x_r[next_count_dq],
827
                       rd_data_rise_1x_r[next_count_dq]};
828
 
829
  //*****************************************************************
830
  // determine when an edge has occurred - when either the current value
831
  // is different from the previous latched value or when the DATA_CHK
832
  // outputs are the same (rare, but indicates that we're at an edge)
833
  // This is only valid when the IDELAY output and propagation of the
834
  // data through the capture flops has had a chance to settle out.
835
  //*****************************************************************
836
 
837
  // write CAL1_DETECT_EDGE and CAL1_DETECT_STABLE in such a way that
838
  // if X's are captured on the bus during functional simulation, that
839
  // the logic will register this as an edge detected. Do this to allow
840
  // use of this HDL with Denali memory models (Denali models drive DQ
841
  // to X's on both edges of the data valid window to simulate jitter)
842
  // This is only done for functional simulation purposes. **Should not**
843
  // make the final synthesized logic more complicated, but it does make
844
  // the HDL harder to understand b/c we have to "phrase" the logic
845
  // slightly differently than when not worrying about X's
846
  always @(*) begin
847
    // no edge found if: (1) we have recorded prev edge, and rise
848
    // data == fall data, (2) we haven't yet recorded prev edge, but
849
    // rise/fall data is equal to either [0,1] or [1,0] (i.e. rise/fall
850
    // data isn't either X's, or [0,0] or [1,1], which indicates we're
851
    // in the middle of an edge, since normally rise != fall data for stg1)
852
    if ((cal1_data_chk_last_valid &&
853
         (cal1_data_chk_r == cal1_data_chk_last)) ||
854
        (!cal1_data_chk_last_valid &&
855
         ((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10))))
856
      cal1_detect_edge = 1'b0;
857
    else
858
      cal1_detect_edge = 1'b1;
859
  end
860
 
861
  always @(*) begin
862
    // assert if we've found a region where data valid window is stable
863
    // over consecutive IDELAY taps, and either rise/fall = [1,0], or [0,1]
864
    if ((cal1_data_chk_last_valid &&
865
         (cal1_data_chk_r == cal1_data_chk_last)) &&
866
        ((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10)))
867
      cal1_detect_stable <= 1'b1;
868
    else
869
      cal1_detect_stable <= 1'b0;
870
  end
871
 
872
  //*****************************************************************
873
  // Find valid window: keep track of how long we've been in the same data
874
  // window. If it's been long enough, then declare that we've found a valid
875
  // window. Also returns whether we found a rising or falling window (only
876
  // valid when found_window is asserted)
877
  //*****************************************************************
878
 
879
  always @(posedge clkdiv) begin
880
    if (cal1_state == CAL1_INIT) begin
881
      cal1_window_cnt   <= 4'b0000;
882
      cal1_found_window <= 1'b0;
883
      cal1_found_rising <= 1'bx;
884
    end else if (!cal1_data_chk_last_valid) begin
885
      // if we haven't stored a previous value of CAL1_DATA_CHK (or it got
886
      // invalidated because we detected an edge, and are now looking for the
887
      // second edge), then make sure FOUND_WINDOW deasserted on following
888
      // clock edge (to avoid finding a false window immediately after finding
889
      // an edge). Note that because of jitter, it's possible to not find an
890
      // edge at the end of the IDELAY increment settling time, but to find an
891
      // edge on the next clock cycle (e.g. during CAL1_FIND_FIRST_EDGE)
892
      cal1_window_cnt   <= 4'b0000;
893
      cal1_found_window <= 1'b0;
894
      cal1_found_rising <= 1'bx;
895
    end else if (((cal1_state == CAL1_FIRST_EDGE_IDEL_WAIT) ||
896
                  (cal1_state == CAL1_SECOND_EDGE_IDEL_WAIT)) &&
897
                 !idel_set_wait) begin
898
      // while finding the first and second edges, see if we can detect a
899
      // stable bit window (occurs over MIN_WIN_SIZE number of taps). If
900
      // so, then we're away from an edge, and can conclusively determine the
901
      // starting DQS-DQ phase.
902
      if (cal1_detect_stable) begin
903
        cal1_window_cnt <= cal1_window_cnt + 1;
904
        if (cal1_window_cnt == MIN_WIN_SIZE-1) begin
905
          cal1_found_window <= 1'b1;
906
          if (cal1_data_chk_r == 2'b01)
907
            cal1_found_rising <= 1'b1;
908
          else
909
            cal1_found_rising <= 1'b0;
910
        end
911
      end else begin
912
        // otherwise, we're not in a data valid window, reset the window
913
        // counter, and indicate we're not currently in window. This should
914
        // happen by design at least once after finding the first edge.
915
        cal1_window_cnt <= 4'b0000;
916
        cal1_found_window <= 1'b0;
917
        cal1_found_rising <= 1'bx;
918
      end
919
    end
920
  end
921
 
922
  //*****************************************************************
923
  // keep track of edge tap counts found, and whether we've
924
  // incremented to the maximum number of taps allowed
925
  //*****************************************************************
926
 
927
  always @(posedge clkdiv)
928
    if (cal1_state == CAL1_INIT) begin
929
      cal1_idel_tap_limit_hit   <= 1'b0;
930
      cal1_idel_tap_cnt   <= 6'b000000;
931
    end else if (cal1_dlyce_dq) begin
932
      if (cal1_dlyinc_dq) begin
933
        cal1_idel_tap_cnt <= cal1_idel_tap_cnt + 1;
934
        cal1_idel_tap_limit_hit <= (cal1_idel_tap_cnt == 6'b111110);
935
      end else begin
936
        cal1_idel_tap_cnt <= cal1_idel_tap_cnt - 1;
937
        cal1_idel_tap_limit_hit <= 1'b0;
938
      end
939
    end
940
 
941
  //*****************************************************************
942
  // Pipeline for better timing - amount to decrement by if second
943
  // edge not found
944
  //*****************************************************************
945
  // if only one edge found (possible for low frequencies), then:
946
  //  1. Assume starting DQS-DQ phase has DQS in DQ window (aka "case 1")
947
  //  2. We have to decrement by (63 - first_edge_tap_cnt) + (BIT_TIME_TAPS/2)
948
  //     (i.e. decrement by 63-first_edge_tap_cnt to get to right edge of
949
  //     DQ window. Then decrement again by (BIT_TIME_TAPS/2) to get to center
950
  //     of DQ window.
951
  //  3. Clamp the above value at 63 to ensure we don't underflow IDELAY
952
  //     (note: clamping happens in the CAL1 state machine)
953
  always @(posedge clkdiv)
954
    cal1_low_freq_idel_dec
955
      <= (7'b0111111 - {1'b0, cal1_first_edge_tap_cnt}) +
956
         (BIT_TIME_TAPS/2);
957
 
958
  //*****************************************************************
959
  // Keep track of max taps used during stage 1, use this to limit
960
  // the number of taps that can be used in stage 2
961
  //*****************************************************************
962
 
963
  always @(posedge clkdiv)
964
    if (rstdiv) begin
965
      cal1_idel_max_tap    <= 6'b000000;
966
      cal1_idel_max_tap_we <= 1'b0;
967
    end else begin
968
      // pipeline latch enable for CAL1_IDEL_MAX_TAP - we have plenty
969
      // of time, tap count gets updated, then dead cycles waiting for
970
      // IDELAY output to settle
971
      cal1_idel_max_tap_we <= (cal1_idel_max_tap < cal1_idel_tap_cnt);
972
      // record maximum # of taps used for stg 1 cal
973
      if ((cal1_state == CAL1_DONE) && cal1_idel_max_tap_we)
974
        cal1_idel_max_tap <= cal1_idel_tap_cnt;
975
    end
976
 
977
  //*****************************************************************
978
 
979
  always @(posedge clkdiv)
980
    if (rstdiv) begin
981
      calib_done[0]            <= 1'b0;
982
      calib_done_tmp[0]        <= 1'bx;
983
      calib_err[0]             <= 1'b0;
984
      count_dq                 <= {DQ_BITS{1'b0}};
985
      next_count_dq            <= {DQ_BITS{1'b0}};
986
      cal1_bit_time_tap_cnt    <= 6'bxxxxxx;
987
      cal1_data_chk_last       <= 2'bxx;
988
      cal1_data_chk_last_valid <= 1'bx;
989
      cal1_dlyce_dq            <= 1'b0;
990
      cal1_dlyinc_dq           <= 1'b0;
991
      cal1_dqs_dq_init_phase   <= 1'bx;
992
      cal1_first_edge_done     <= 1'bx;
993
      cal1_found_second_edge   <= 1'bx;
994
      cal1_first_edge_tap_cnt  <= 6'bxxxxxx;
995
      cal1_idel_dec_cnt        <= 7'bxxxxxxx;
996
      cal1_idel_inc_cnt        <= 6'bxxxxxx;
997
      cal1_ref_req             <= 1'b0;
998
      cal1_state               <= CAL1_IDLE;
999
    end else begin
1000
      // default values for all "pulse" outputs
1001
      cal1_ref_req        <= 1'b0;
1002
      cal1_dlyce_dq       <= 1'b0;
1003
      cal1_dlyinc_dq      <= 1'b0;
1004
 
1005
      case (cal1_state)
1006
        CAL1_IDLE: begin
1007
          count_dq      <= {DQ_BITS{1'b0}};
1008
          next_count_dq <= {DQ_BITS{1'b0}};
1009
          if (calib_start[0]) begin
1010
            calib_done[0] <= 1'b0;
1011
            calib_done_tmp[0] <= 1'b0;
1012
            cal1_state    <= CAL1_INIT;
1013
          end
1014
        end
1015
 
1016
        CAL1_INIT: begin
1017
          cal1_data_chk_last_valid <= 1'b0;
1018
          cal1_found_second_edge <= 1'b0;
1019
          cal1_dqs_dq_init_phase <= 1'b0;
1020
          cal1_idel_inc_cnt      <= 6'b000000;
1021
          cal1_state <= CAL1_INC_IDEL;
1022
        end
1023
 
1024
        // increment DQ IDELAY so that either: (1) DQS starts somewhere in
1025
        // first rising DQ window, or (2) DQS starts in first falling DQ
1026
        // window. The amount to shift is frequency dependent (and is either
1027
        // precalculated by MIG or possibly adjusted by the user)
1028
        CAL1_INC_IDEL:
1029
          if ((cal1_idel_inc_cnt == DQ_IDEL_INIT) && !idel_set_wait) begin
1030
            cal1_state <= CAL1_FIND_FIRST_EDGE;
1031
          end else if (cal1_idel_inc_cnt != DQ_IDEL_INIT) begin
1032
            cal1_idel_inc_cnt <= cal1_idel_inc_cnt + 1;
1033
            cal1_dlyce_dq <= 1'b1;
1034
            cal1_dlyinc_dq <= 1'b1;
1035
          end
1036
 
1037
        // look for first edge
1038
        CAL1_FIND_FIRST_EDGE: begin
1039
          // Determine DQS-DQ phase if we can detect enough of a valid window
1040
          if (cal1_found_window)
1041
            cal1_dqs_dq_init_phase <= ~cal1_found_rising;
1042
          // find first edge - if found then record position
1043
          if (cal1_detect_edge) begin
1044
            cal1_state <= CAL1_FOUND_FIRST_EDGE_WAIT;
1045
            cal1_first_edge_done   <= 1'b0;
1046
            cal1_first_edge_tap_cnt <= cal1_idel_tap_cnt;
1047
            cal1_data_chk_last_valid <= 1'b0;
1048
          end else begin
1049
            // otherwise, store the current value of DATA_CHK, increment
1050
            // DQ IDELAY, and compare again
1051
            cal1_state <= CAL1_FIRST_EDGE_IDEL_WAIT;
1052
            cal1_data_chk_last <= cal1_data_chk_r;
1053
            // avoid comparing against DATA_CHK_LAST for previous iteration
1054
            cal1_data_chk_last_valid <= 1'b1;
1055
            cal1_dlyce_dq <= 1'b1;
1056
            cal1_dlyinc_dq <= 1'b1;
1057
          end
1058
        end
1059
 
1060
        // wait for DQ IDELAY to settle
1061
        CAL1_FIRST_EDGE_IDEL_WAIT:
1062
          if (!idel_set_wait)
1063
            cal1_state <= CAL1_FIND_FIRST_EDGE;
1064
 
1065
        // delay state between finding first edge and looking for second
1066
        // edge. Necessary in order to invalidate CAL1_FOUND_WINDOW before
1067
        // starting to look for second edge
1068
        CAL1_FOUND_FIRST_EDGE_WAIT:
1069
          cal1_state <= CAL1_FIND_SECOND_EDGE;
1070
 
1071
        // Try and find second edge
1072
        CAL1_FIND_SECOND_EDGE: begin
1073
          // When looking for 2nd edge, first make sure data stabilized (by
1074
          // detecting valid data window) - needed to avoid false edges
1075
          if (cal1_found_window) begin
1076
            cal1_first_edge_done <= 1'b1;
1077
            cal1_dqs_dq_init_phase <= cal1_found_rising;
1078
          end
1079
          // exit if run out of taps to increment
1080
          if (cal1_idel_tap_limit_hit)
1081
            cal1_state <= CAL1_CALC_IDEL;
1082
          else begin
1083
            // found second edge, record the current edge count
1084
            if (cal1_first_edge_done && cal1_detect_edge) begin
1085
              cal1_state <= CAL1_CALC_IDEL;
1086
              cal1_found_second_edge <= 1'b1;
1087
              cal1_bit_time_tap_cnt <= cal1_idel_tap_cnt -
1088
                                       cal1_first_edge_tap_cnt + 1;
1089
            end else begin
1090
              cal1_state <= CAL1_SECOND_EDGE_IDEL_WAIT;
1091
              cal1_data_chk_last <= cal1_data_chk_r;
1092
              cal1_data_chk_last_valid <= 1'b1;
1093
              cal1_dlyce_dq <= 1'b1;
1094
              cal1_dlyinc_dq <= 1'b1;
1095
            end
1096
          end
1097
        end
1098
 
1099
        // wait for DQ IDELAY to settle, then store ISERDES output
1100
        CAL1_SECOND_EDGE_IDEL_WAIT:
1101
          if (!idel_set_wait)
1102
            cal1_state <= CAL1_FIND_SECOND_EDGE;
1103
 
1104
        // pipeline delay state to calculate amount to decrement DQ IDELAY
1105
        // NOTE: We're calculating the amount to decrement by, not the
1106
        //  absolute setting for DQ IDELAY
1107
        CAL1_CALC_IDEL: begin
1108
          // if two edges found
1109
          if (cal1_found_second_edge)
1110
            // case 1: DQS was in DQ window to start with. First edge found
1111
            // corresponds to left edge of DQ rising window. Backup by 1.5*BT
1112
            // NOTE: In this particular case, it is possible to decrement
1113
            //  "below 0" in the case where DQS delay is less than 0.5*BT,
1114
            //  need to limit decrement to prevent IDELAY tap underflow
1115
            if (!cal1_dqs_dq_init_phase)
1116
              cal1_idel_dec_cnt <= {1'b0, cal1_bit_time_tap_cnt} +
1117
                                   {1'b0, (cal1_bit_time_tap_cnt >> 1)};
1118
            // case 2: DQS was in wrong DQ window (in DQ falling window).
1119
            // First edge found is right edge of DQ rising window. Second
1120
            // edge is left edge of DQ rising window. Backup by 0.5*BT
1121
            else
1122
              cal1_idel_dec_cnt <= {1'b0, (cal1_bit_time_tap_cnt >> 1)};
1123
          // if only one edge found - assume will always be case 1 - DQS in
1124
          // DQS window. Case 2 only possible if path delay on DQS > 5ns
1125
          else
1126
            cal1_idel_dec_cnt <= cal1_low_freq_idel_dec;
1127
          cal1_state <= CAL1_DEC_IDEL;
1128
        end
1129
 
1130
        // decrement DQ IDELAY for final adjustment
1131
        CAL1_DEC_IDEL:
1132
          // once adjustment is complete, we're done with calibration for
1133
          // this DQ, now return to IDLE state and repeat for next DQ
1134
          // Add underflow protection for case of 2 edges found and DQS
1135
          // starting in DQ window (see comments for above state) - note we
1136
          // have to take into account delayed value of CAL1_IDEL_TAP_CNT -
1137
          // gets updated one clock cycle after CAL1_DLYCE/INC_DQ
1138
          if ((cal1_idel_dec_cnt == 7'b0000000) ||
1139
              (cal1_dlyce_dq && (cal1_idel_tap_cnt == 6'b000001))) begin
1140
            cal1_state <= CAL1_DONE;
1141
            // stop when all DQ's calibrated, or DQ[0] cal'ed (for sim)
1142
            if ((count_dq == DQ_WIDTH-1) || (SIM_ONLY != 0))
1143
              calib_done_tmp[0] <= 1'b1;
1144
            else
1145
              // need for VHDL simulation to prevent out-of-index error
1146
              next_count_dq <= count_dq + 1;
1147
          end else begin
1148
            // keep decrementing until final tap count reached
1149
            cal1_idel_dec_cnt <= cal1_idel_dec_cnt - 1;
1150
            cal1_dlyce_dq <= 1'b1;
1151
            cal1_dlyinc_dq <= 1'b0;
1152
          end
1153
 
1154
        // delay state to allow count_dq and DATA_CHK to point to the next
1155
        // DQ bit (allows us to potentially begin checking for an edge on
1156
        // next DQ right away).
1157
        CAL1_DONE:
1158
          if (!idel_set_wait) begin
1159
            count_dq <= next_count_dq;
1160
            if (calib_done_tmp[0]) begin
1161
              calib_done[0] <= 1'b1;
1162
              cal1_state <= CAL1_IDLE;
1163
            end else begin
1164
              // request auto-refresh after every 8-bits calibrated to
1165
              // avoid tRAS violation
1166
              if (cal1_refresh) begin
1167
                cal1_ref_req <= 1'b1;
1168
                if (calib_ref_done)
1169
                  cal1_state <= CAL1_INIT;
1170
              end else
1171
                // if no need this time for refresh, proceed to next bit
1172
                cal1_state <= CAL1_INIT;
1173
            end
1174
          end
1175
      endcase
1176
    end
1177
 
1178
  //***************************************************************************
1179
  // Second stage calibration: DQS-FPGA Clock
1180
  // Algorithm Description:
1181
  //  1. Assumes a training pattern that will produce a pattern oscillating at
1182
  //     half the core clock frequency each on rise and fall outputs, and such
1183
  //     that rise and fall outputs are 180 degrees out of phase from each
1184
  //     other. Note that since the calibration logic runs at half the speed
1185
  //     of the interface, expect that data sampled with the slow clock always
1186
  //     to be constant (either always = 1, or = 0, and rise data != fall data)
1187
  //     unless we cross the edge of the data valid window
1188
  //  2. Start by setting RD_DATA_SEL = 0. This selects the rising capture data
1189
  //     sync'ed to rising edge of core clock, and falling edge data sync'ed
1190
  //     to falling edge of core clock
1191
  //  3. Start looking for an edge. An edge is defined as either: (1) a
1192
  //     change in capture value or (2) an invalid capture value (e.g. rising
1193
  //     data != falling data for that same clock cycle).
1194
  //  4. If an edge is found, go to step (6). If edge hasn't been found, then
1195
  //     set RD_DATA_SEL = 1, and try again.
1196
  //  5. If no edge is found, then increment IDELAY and return to step (3)
1197
  //  6. If an edge if found, then invert RD_DATA_SEL - this shifts the
1198
  //     capture point 180 degrees from the edge of the window (minus duty
1199
  //     cycle distortion, delay skew between rising/falling edge capture
1200
  //     paths, etc.)
1201
  //  7. If no edge is found by CAL2_IDEL_TAP_LIMIT (= 63 - # taps used for
1202
  //     stage 1 calibration), then decrement IDELAY (without reinverting
1203
  //     RD_DATA_SEL) by CAL2_IDEL_TAP_LIMIT/2. This guarantees we at least
1204
  //     have CAL2_IDEL_TAP_LIMIT/2 of slack both before and after the
1205
  //     capture point (not optimal, but best we can do not having found an
1206
  //     of the window). This happens only for very low frequencies.
1207
  //  8. Repeat for each DQS group.
1208
  //  NOTE: Step 6 is not optimal. A better (and perhaps more complicated)
1209
  //   algorithm might be to find both edges of the data valid window (using
1210
  //   the same polarity of RD_DATA_SEL), and then decrement to the midpoint.
1211
  //***************************************************************************
1212
 
1213
  // RD_DATA_SEL should be tagged with FROM-TO (multi-cycle) constraint in
1214
  // UCF file to relax timing. This net is "pseudo-static" (after value is
1215
  // changed, FSM waits number of cycles before using the output).
1216
  // Note that we are adding one clock cycle of delay (to isolate it from
1217
  // the other logic CAL2_RD_DATA_SEL feeds), make sure FSM waits long
1218
  // enough to compensate (by default it does, it waits a few cycles more
1219
  // than minimum # of clock cycles)
1220
  genvar rd_i;
1221
  generate
1222
    for (rd_i = 0; rd_i < DQS_WIDTH; rd_i = rd_i+1) begin: gen_rd_data_sel
1223
      FDRSE u_ff_rd_data_sel
1224
        (
1225
         .Q   (rd_data_sel[rd_i]),
1226
         .C   (clkdiv),
1227
         .CE  (1'b1),
1228
         .D   (cal2_rd_data_sel[rd_i]),
1229
         .R   (1'b0),
1230
         .S   (1'b0)
1231
         ) /* synthesis syn_preserve = 1 */
1232
           /* synthesis syn_replicate = 0 */;
1233
    end
1234
  endgenerate
1235
 
1236
  //*****************************************************************
1237
  // Max number of taps used for stg2 cal dependent on number of taps
1238
  // used for stg1 (give priority to stg1 cal - let it use as many
1239
  // taps as it needs - the remainder of the IDELAY taps can be used
1240
  // by stg2)
1241
  //*****************************************************************
1242
 
1243
  always @(posedge clkdiv)
1244
    cal2_idel_tap_limit <= 6'b111111 - cal1_idel_max_tap;
1245
 
1246
  //*****************************************************************
1247
  // second stage calibration uses readback pattern of "1100" (i.e.
1248
  // 1st rising = 1, 1st falling = 1, 2nd rising = 0, 2nd falling = 0)
1249
  // only look at the first bit of each DQS group
1250
  //*****************************************************************
1251
 
1252
  // deasserted when captured data has changed since IDELAY was
1253
  // incremented, or when we're right on the edge (i.e. rise data =
1254
  // fall data).
1255
  assign cal2_detect_edge =
1256
    ((((rdd_rise_q1 != cal2_rd_data_rise_last_pos) ||
1257
       (rdd_fall_q1 != cal2_rd_data_fall_last_pos)) &&
1258
      cal2_rd_data_last_valid_pos && (!cal2_curr_sel)) ||
1259
     (((rdd_rise_q1 != cal2_rd_data_rise_last_neg) ||
1260
       (rdd_fall_q1 != cal2_rd_data_fall_last_neg)) &&
1261
      cal2_rd_data_last_valid_neg && (cal2_curr_sel)) ||
1262
     (rdd_rise_q1 != rdd_fall_q1));
1263
 
1264
  //*****************************************************************
1265
  // keep track of edge tap counts found, and whether we've
1266
  // incremented to the maximum number of taps allowed
1267
  // NOTE: Assume stage 2 cal always increments the tap count (never
1268
  //       decrements) when searching for edge of the data valid window
1269
  //*****************************************************************
1270
 
1271
  always @(posedge clkdiv)
1272
    if (cal2_state == CAL2_INIT) begin
1273
      cal2_idel_tap_limit_hit <= 1'b0;
1274
      cal2_idel_tap_cnt <= 6'b000000;
1275
    end else if (cal2_dlyce_dqs) begin
1276
      cal2_idel_tap_cnt <= cal2_idel_tap_cnt + 1;
1277
      cal2_idel_tap_limit_hit <= (cal2_idel_tap_cnt ==
1278
                                  cal2_idel_tap_limit - 1);
1279
    end
1280
 
1281
  //*****************************************************************
1282
 
1283
  always @(posedge clkdiv)
1284
    if (rstdiv) begin
1285
      calib_done[1]               <= 1'b0;
1286
      calib_done_tmp[1]           <= 1'bx;
1287
      calib_err[1]                <= 1'b0;
1288
      count_dqs                   <= 'b0;
1289
      next_count_dqs              <= 'b0;
1290
      cal2_dlyce_dqs              <= 1'b0;
1291
      cal2_dlyinc_dqs             <= 1'b0;
1292
      cal2_idel_dec_cnt           <= 6'bxxxxxx;
1293
      cal2_rd_data_last_valid_neg <= 1'bx;
1294
      cal2_rd_data_last_valid_pos <= 1'bx;
1295
      cal2_rd_data_sel            <= 'b0;
1296
      cal2_ref_req                <= 1'b0;
1297
      cal2_state                  <= CAL2_IDLE;
1298
    end else begin
1299
      cal2_ref_req      <= 1'b0;
1300
      cal2_dlyce_dqs    <= 1'b0;
1301
      cal2_dlyinc_dqs   <= 1'b0;
1302
 
1303
      case (cal2_state)
1304
        CAL2_IDLE: begin
1305
          count_dqs      <= 'b0;
1306
          next_count_dqs <= 'b0;
1307
          if (calib_start[1]) begin
1308
            cal2_rd_data_sel  <= {DQS_WIDTH{1'b0}};
1309
            calib_done[1]     <= 1'b0;
1310
            calib_done_tmp[1] <= 1'b0;
1311
            cal2_state        <= CAL2_INIT;
1312
          end
1313
        end
1314
 
1315
        // Pass through this state every time we calibrate a new DQS group
1316
        CAL2_INIT: begin
1317
          cal2_curr_sel <= 1'b0;
1318
          cal2_rd_data_last_valid_neg <= 1'b0;
1319
          cal2_rd_data_last_valid_pos <= 1'b0;
1320
          cal2_state <= CAL2_INIT_IDEL_WAIT;
1321
        end
1322
 
1323
        // Stall state only used if calibration run more than once. Can take
1324
        // this state out if design never runs calibration more than once.
1325
        // We need this state to give time for MUX'ed data to settle after
1326
        // resetting RD_DATA_SEL
1327
        CAL2_INIT_IDEL_WAIT:
1328
          if (!idel_set_wait)
1329
            cal2_state <= CAL2_FIND_EDGE_POS;
1330
 
1331
        // Look for an edge - first check "positive-edge" stage 2 capture
1332
        CAL2_FIND_EDGE_POS: begin
1333
          // if found an edge, then switch to the opposite edge stage 2
1334
          // capture and we're done - no need to decrement the tap count,
1335
          // since switching to the opposite edge will shift the capture
1336
          // point by 180 degrees
1337
          if (cal2_detect_edge) begin
1338
            cal2_curr_sel <= 1'b1;
1339
            cal2_state <= CAL2_DONE;
1340
            // set all DQS groups to be the same for simulation
1341
            if (SIM_ONLY != 0)
1342
              cal2_rd_data_sel <= {DQS_WIDTH{1'b1}};
1343
            else
1344
              cal2_rd_data_sel[count_dqs] <= 1'b1;
1345
            if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
1346
              calib_done_tmp[1] <= 1'b1;
1347
            else
1348
              // MIG 2.1: Fix for simulation out-of-bounds error when
1349
              // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)  
1350
              next_count_dqs <= count_dqs + 1;
1351
          end else begin
1352
            // otherwise, invert polarity of stage 2 capture and look for
1353
            // an edge with opposite capture clock polarity
1354
            cal2_curr_sel <= 1'b1;
1355
            cal2_rd_data_sel[count_dqs] <= 1'b1;
1356
            cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_POS;
1357
            cal2_rd_data_rise_last_pos  <= rdd_rise_q1;
1358
            cal2_rd_data_fall_last_pos  <= rdd_fall_q1;
1359
            cal2_rd_data_last_valid_pos <= 1'b1;
1360
          end
1361
        end
1362
 
1363
        // Give time to switch from positive-edge to negative-edge second
1364
        // stage capture (need time for data to filter though pipe stages)
1365
        CAL2_FIND_EDGE_IDEL_WAIT_POS:
1366
          if (!idel_set_wait)
1367
            cal2_state <= CAL2_FIND_EDGE_NEG;
1368
 
1369
        // Look for an edge - check "negative-edge" stage 2 capture
1370
        CAL2_FIND_EDGE_NEG:
1371
          if (cal2_detect_edge) begin
1372
            cal2_curr_sel <= 1'b0;
1373
            cal2_state <= CAL2_DONE;
1374
            // set all DQS groups to be the same for simulation
1375
            if (SIM_ONLY != 0)
1376
              cal2_rd_data_sel <= {DQS_WIDTH{1'b0}};
1377
            else
1378
              cal2_rd_data_sel[count_dqs] <= 1'b0;
1379
            if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
1380
              calib_done_tmp[1] <= 1'b1;
1381
            else
1382
              // MIG 2.1: Fix for simulation out-of-bounds error when
1383
              // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
1384
              next_count_dqs <= count_dqs + 1;
1385
          end else if (cal2_idel_tap_limit_hit) begin
1386
            // otherwise, if we've run out of taps, then immediately
1387
            // backoff by half # of taps used - that's our best estimate
1388
            // for optimal calibration point. Doesn't matter whether which
1389
            // polarity we're using for capture (we don't know which one is
1390
            // best to use)
1391
            cal2_idel_dec_cnt <= {1'b0, cal2_idel_tap_limit[5:1]};
1392
            cal2_state <= CAL2_DEC_IDEL;
1393
            if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
1394
              calib_done_tmp[1] <= 1'b1;
1395
            else
1396
              // MIG 2.1: Fix for simulation out-of-bounds error when
1397
              // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
1398
              next_count_dqs <= count_dqs + 1;
1399
          end else begin
1400
            // otherwise, increment IDELAY, and start looking for edge again
1401
            cal2_curr_sel <= 1'b0;
1402
            cal2_rd_data_sel[count_dqs] <= 1'b0;
1403
            cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_NEG;
1404
            cal2_rd_data_rise_last_neg  <= rdd_rise_q1;
1405
            cal2_rd_data_fall_last_neg  <= rdd_fall_q1;
1406
            cal2_rd_data_last_valid_neg <= 1'b1;
1407
            cal2_dlyce_dqs  <= 1'b1;
1408
            cal2_dlyinc_dqs <= 1'b1;
1409
          end
1410
 
1411
        CAL2_FIND_EDGE_IDEL_WAIT_NEG:
1412
          if (!idel_set_wait)
1413
            cal2_state <= CAL2_FIND_EDGE_POS;
1414
 
1415
        // if no edge found, then decrement by half # of taps used
1416
        CAL2_DEC_IDEL: begin
1417
          if (cal2_idel_dec_cnt == 6'b000000)
1418
            cal2_state <= CAL2_DONE;
1419
          else begin
1420
            cal2_idel_dec_cnt <= cal2_idel_dec_cnt - 1;
1421
            cal2_dlyce_dqs  <= 1'b1;
1422
            cal2_dlyinc_dqs <= 1'b0;
1423
          end
1424
        end
1425
 
1426
        // delay state to allow count_dqs and ISERDES data to point to next
1427
        // DQ bit (DQS group) before going to INIT
1428
        CAL2_DONE:
1429
          if (!idel_set_wait) begin
1430
            count_dqs <= next_count_dqs;
1431
            if (calib_done_tmp[1]) begin
1432
              calib_done[1] <= 1'b1;
1433
              cal2_state <= CAL2_IDLE;
1434
            end else begin
1435
              // request auto-refresh after every DQS group calibrated to
1436
              // avoid tRAS violation
1437
              cal2_ref_req <= 1'b1;
1438
              if (calib_ref_done)
1439
                cal2_state <= CAL2_INIT;
1440
            end
1441
          end
1442
      endcase
1443
    end
1444
 
1445
  //***************************************************************************
1446
  // Stage 3 calibration: Read Enable
1447
  // Description:
1448
  // read enable calibration determines the "round-trip" time (in # of CLK0
1449
  // cycles) between when a read command is issued by the controller, and
1450
  // when the corresponding read data is synchronized by into the CLK0 domain
1451
  // this is a long delay chain to delay read enable signal from controller/
1452
  // initialization logic (i.e. this is used for both initialization and
1453
  // during normal controller operation). Stage 3 calibration logic decides
1454
  // which delayed version is appropriate to use (which is affected by the
1455
  // round trip delay of DQ/DQS) as a "valid" signal to tell rest of logic
1456
  // when the captured data output from ISERDES is valid.
1457
  //***************************************************************************
1458
 
1459
  //*****************************************************************
1460
  // Delay chains: Use shift registers
1461
  // Two sets of delay chains are used:
1462
  //  1. One to delay RDEN from PHY_INIT module for calibration
1463
  //     purposes (delay required for RDEN for calibration is different
1464
  //     than during normal operation)
1465
  //  2. One per DQS group to delay RDEN from controller for normal
1466
  //     operation - the value to delay for each DQS group can be different
1467
  //     as is determined during calibration
1468
  //*****************************************************************
1469
 
1470
  //*****************************************************************
1471
  // First delay chain, use only for calibration
1472
  // input = asserted on rising edge of RDEN from PHY_INIT module
1473
  //*****************************************************************
1474
 
1475
  always @(posedge clk) begin
1476
    ctrl_rden_r       <= ctrl_rden;
1477
    phy_init_rden_r   <= phy_init_rden;
1478
    phy_init_rden_r1  <= phy_init_rden_r;
1479
    calib_rden_edge_r <= phy_init_rden_r & ~phy_init_rden_r1;
1480
  end
1481
 
1482
  // Calibration shift register used for both Stage 3 and Stage 4 cal
1483
  // (not strictly necessary for stage 4, but use as an additional check
1484
  // to make sure we're checking for correct data on the right clock cycle)
1485
  always @(posedge clkdiv)
1486
    if (!calib_done[2])
1487
      calib_rden_srl_a <= cal3_rden_srl_a;
1488
    else
1489
      calib_rden_srl_a <= cal4_rden_srl_a;
1490
 
1491
  // Flops for targetting of multi-cycle path in UCF
1492
  genvar cal_rden_ff_i;
1493
  generate
1494
    for (cal_rden_ff_i = 0; cal_rden_ff_i < 5;
1495
         cal_rden_ff_i = cal_rden_ff_i+1) begin: gen_cal_rden_dly
1496
      FDRSE u_ff_cal_rden_dly
1497
        (
1498
         .Q   (calib_rden_srl_a_r[cal_rden_ff_i]),
1499
         .C   (clkdiv),
1500
         .CE  (1'b1),
1501
         .D   (calib_rden_srl_a[cal_rden_ff_i]),
1502
         .R   (1'b0),
1503
         .S   (1'b0)
1504
         ) /* synthesis syn_preserve = 1 */
1505
           /* synthesis syn_replicate = 0 */;
1506
    end
1507
  endgenerate
1508
 
1509
  SRLC32E u_calib_rden_srl
1510
    (
1511
     .Q   (calib_rden_srl_out),
1512
     .Q31 (),
1513
     .A   (calib_rden_srl_a_r),
1514
     .CE  (1'b1),
1515
     .CLK (clk),
1516
     .D   (calib_rden_edge_r)
1517
     );
1518
 
1519
  FDRSE u_calib_rden_srl_out_r
1520
    (
1521
         .Q   (calib_rden_srl_out_r),
1522
         .C   (clk),
1523
         .CE  (1'b1),
1524
         .D   (calib_rden_srl_out),
1525
         .R   (1'b0),
1526
         .S   (1'b0)
1527
     ) /* synthesis syn_preserve = 1 */;
1528
 
1529
  // convert to CLKDIV domain. Two version are generated because we need
1530
  // to be able to tell exactly which fast (clk) clock cycle the read
1531
  // enable was asserted in. Only one of CALIB_DATA_VALID or
1532
  // CALIB_DATA_VALID_STGD will be asserted for any given shift value
1533
  always @(posedge clk)
1534
    calib_rden_srl_out_r1 <= calib_rden_srl_out_r;
1535
 
1536
  always @(posedge clkdiv) begin
1537
    calib_rden_valid      <= calib_rden_srl_out_r;
1538
    calib_rden_valid_stgd <= calib_rden_srl_out_r1;
1539
  end
1540
 
1541
  //*****************************************************************
1542
  // Second set of delays chain, use for normal reads
1543
  // input = RDEN from controller
1544
  //*****************************************************************
1545
 
1546
  // Flops for targetting of multi-cycle path in UCF
1547
  genvar rden_ff_i;
1548
  generate
1549
    for (rden_ff_i = 0; rden_ff_i < 5*DQS_WIDTH;
1550
         rden_ff_i = rden_ff_i+1) begin: gen_rden_dly
1551
      FDRSE u_ff_rden_dly
1552
        (
1553
         .Q   (rden_dly_r[rden_ff_i]),
1554
         .C   (clkdiv),
1555
         .CE  (1'b1),
1556
         .D   (rden_dly[rden_ff_i]),
1557
         .R   (1'b0),
1558
         .S   (1'b0)
1559
         ) /* synthesis syn_preserve = 1 */
1560
           /* synthesis syn_replicate = 0 */;
1561
    end
1562
  endgenerate
1563
 
1564
  // NOTE: Comment this section explaining purpose of SRL's
1565
  genvar rden_i;
1566
  generate
1567
    for (rden_i = 0; rden_i < DQS_WIDTH; rden_i = rden_i + 1) begin: gen_rden
1568
      SRLC32E u_rden_srl
1569
        (
1570
         .Q   (rden_srl_out[rden_i]),
1571
         .Q31 (),
1572
         .A   ({rden_dly_r[(rden_i*5)+4],
1573
                rden_dly_r[(rden_i*5)+3],
1574
                rden_dly_r[(rden_i*5)+2],
1575
                rden_dly_r[(rden_i*5)+1],
1576
                rden_dly_r[(rden_i*5)]}),
1577
         .CE  (1'b1),
1578
         .CLK (clk),
1579
         .D   (ctrl_rden_r)
1580
         );
1581
      FDRSE u_calib_rden_r
1582
        (
1583
         .Q   (calib_rden[rden_i]),
1584
         .C   (clk),
1585
         .CE  (1'b1),
1586
         .D   (rden_srl_out[rden_i]),
1587
         .R   (1'b0),
1588
         .S   (1'b0)
1589
         ) /* synthesis syn_preserve = 1 */;
1590
    end
1591
  endgenerate
1592
 
1593
  //*****************************************************************
1594
  // indicates that current received data is the correct pattern. Check both
1595
  // rising and falling data for first DQ in each DQS group. Note that
1596
  // we're checking using a pipelined version of read data, so need to take
1597
  // this inherent delay into account in determining final read valid delay
1598
  // Data is written to the memory in the following order (first -> last):
1599
  //   0x1, 0xE, 0xE, 0x1, 0x1, 0xE, 0xE, 0x1
1600
  // Looking just at LSb, expect data in sequence (in binary):
1601
  //   1, 0, 0, 1, 1, 0, 0, 1
1602
  // Check for the presence of the first 7 words, and compensate read valid
1603
  // delay accordingly. Don't check last falling edge data, it may be
1604
  // corrupted by the DQS tri-state glitch at end of read postamble
1605
  // (glitch protection not yet active until stage 4 cal)
1606
  //*****************************************************************
1607
 
1608
  always @(posedge clkdiv) begin
1609
    rdd_rise_q1_r  <= rdd_rise_q1;
1610
    rdd_fall_q1_r  <= rdd_fall_q1;
1611
    rdd_rise_q2_r  <= rdd_rise_q2;
1612
    rdd_fall_q2_r  <= rdd_fall_q2;
1613
    rdd_rise_q1_r1 <= rdd_rise_q1_r;
1614
    rdd_fall_q1_r1 <= rdd_fall_q1_r;
1615
  end
1616
 
1617
  always @(posedge clkdiv) begin
1618
    // For the following sequence from memory:
1619
    //   rise[0], fall[0], rise[1], fall[1]
1620
    // if data is aligned out of fabric ISERDES:
1621
    //   RDD_RISE_Q2 = rise[0]
1622
    //   RDD_FALL_Q2 = fall[0]
1623
    //   RDD_RISE_Q1 = rise[1]
1624
    //   RDD_FALL_Q1 = fall[1]
1625
    cal3_data_match <= ((rdd_rise_q2_r == 1) &&
1626
                        (rdd_fall_q2_r == 0) &&
1627
                        (rdd_rise_q1_r == 0) &&
1628
                        (rdd_fall_q1_r == 1) &&
1629
                        (rdd_rise_q2   == 1) &&
1630
                        (rdd_fall_q2   == 0) &&
1631
                        (rdd_rise_q1   == 0));
1632
 
1633
    // if data is staggered out of fabric ISERDES:
1634
    //   RDD_RISE_Q1_R = rise[0]
1635
    //   RDD_FALL_Q1_R = fall[0]
1636
    //   RDD_RISE_Q2   = rise[1]
1637
    //   RDD_FALL_Q2   = fall[1]
1638
    cal3_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
1639
                             (rdd_fall_q1_r1 == 0) &&
1640
                             (rdd_rise_q2_r  == 0) &&
1641
                             (rdd_fall_q2_r  == 1) &&
1642
                             (rdd_rise_q1_r  == 1) &&
1643
                             (rdd_fall_q1_r  == 0) &&
1644
                             (rdd_rise_q2    == 0));
1645
  end
1646
 
1647
  assign cal3_rden_dly = cal3_rden_srl_a - CAL3_RDEN_SRL_DLY_DELTA;
1648
  assign cal3_data_valid = (calib_rden_valid | calib_rden_valid_stgd);
1649
  assign cal3_match_found
1650
    = ((calib_rden_valid && cal3_data_match) ||
1651
       (calib_rden_valid_stgd && cal3_data_match_stgd));
1652
 
1653
  // when calibrating, check to see which clock cycle (after the read is
1654
  // issued) does the expected data pattern arrive. Record this result
1655
  // NOTE: Can add error checking here in case valid data not found on any
1656
  //  of the available pipeline stages
1657
  always @(posedge clkdiv) begin
1658
    if (rstdiv) begin
1659
      cal3_rden_srl_a <= 5'bxxxxx;
1660
      cal3_state      <= CAL3_IDLE;
1661
      calib_done[2]   <= 1'b0;
1662
      calib_err_2[0]  <= 1'b0;
1663
      count_rden      <= {DQS_WIDTH{1'b0}};
1664
      rden_dly        <= {5*DQS_WIDTH{1'b0}};
1665
    end else begin
1666
 
1667
      case (cal3_state)
1668
        CAL3_IDLE: begin
1669
          count_rden <= {DQS_WIDTH{1'b0}};
1670
          if (calib_start[2]) begin
1671
            calib_done[2] <= 1'b0;
1672
            cal3_state    <= CAL3_INIT;
1673
          end
1674
        end
1675
 
1676
        CAL3_INIT: begin
1677
          cal3_rden_srl_a <= RDEN_BASE_DELAY;
1678
          // let SRL pipe clear after loading initial shift value
1679
          cal3_state      <= CAL3_RDEN_PIPE_CLR_WAIT;
1680
        end
1681
 
1682
        CAL3_DETECT:
1683
          if (cal3_data_valid)
1684
            // if match found at the correct clock cycle
1685
            if (cal3_match_found) begin
1686
 
1687
              // For simulation, load SRL addresses for all DQS with same value
1688
              if (SIM_ONLY != 0) begin
1689
                for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_rden_dly
1690
                  rden_dly[(i*5)]   <= cal3_rden_dly[0];
1691
                  rden_dly[(i*5)+1] <= cal3_rden_dly[1];
1692
                  rden_dly[(i*5)+2] <= cal3_rden_dly[2];
1693
                  rden_dly[(i*5)+3] <= cal3_rden_dly[3];
1694
                  rden_dly[(i*5)+4] <= cal3_rden_dly[4];
1695
                end
1696
              end else begin
1697
                rden_dly[(count_rden*5)]   <= cal3_rden_dly[0];
1698
                rden_dly[(count_rden*5)+1] <= cal3_rden_dly[1];
1699
                rden_dly[(count_rden*5)+2] <= cal3_rden_dly[2];
1700
                rden_dly[(count_rden*5)+3] <= cal3_rden_dly[3];
1701
                rden_dly[(count_rden*5)+4] <= cal3_rden_dly[4];
1702
              end
1703
 
1704
              // Use for stage 4 calibration
1705
              calib_rden_dly[(count_rden*5)]   <= cal3_rden_srl_a[0];
1706
              calib_rden_dly[(count_rden*5)+1] <= cal3_rden_srl_a[1];
1707
              calib_rden_dly[(count_rden*5)+2] <= cal3_rden_srl_a[2];
1708
              calib_rden_dly[(count_rden*5)+3] <= cal3_rden_srl_a[3];
1709
              calib_rden_dly[(count_rden*5)+4] <= cal3_rden_srl_a[4];
1710
              cal3_state <= CAL3_DONE;
1711
            end else begin
1712
              // If we run out of stages to shift, without finding correct
1713
              // result, the stop and assert error
1714
              if (cal3_rden_srl_a == 5'b11111) begin
1715
                calib_err_2[0] <= 1'b1;
1716
                cal3_state   <= CAL3_IDLE;
1717
              end else begin
1718
                // otherwise, increase the shift value and try again
1719
                cal3_rden_srl_a <= cal3_rden_srl_a + 1;
1720
                cal3_state      <= CAL3_RDEN_PIPE_CLR_WAIT;
1721
              end
1722
            end
1723
 
1724
        // give additional time for RDEN_R pipe to clear from effects of
1725
        // previous pipeline or IDELAY tap change
1726
        CAL3_RDEN_PIPE_CLR_WAIT:
1727
          if (calib_rden_pipe_cnt == 5'b00000)
1728
              cal3_state <= CAL3_DETECT;
1729
 
1730
        CAL3_DONE: begin
1731
          if ((count_rden == DQS_WIDTH-1) || (SIM_ONLY != 0)) begin
1732
            calib_done[2] <= 1'b1;
1733
            cal3_state    <= CAL3_IDLE;
1734
          end else begin
1735
            count_rden    <= count_rden + 1;
1736
            cal3_state    <= CAL3_INIT;
1737
          end
1738
        end
1739
      endcase
1740
    end
1741
  end
1742
 
1743
  //*****************************************************************
1744
  // Last part of stage 3 calibration - compensate for differences
1745
  // in delay between different DQS groups. Assume that in the worst
1746
  // case, DQS groups can only differ by one clock cycle. Data for
1747
  // certain DQS groups must be delayed by one clock cycle.
1748
  // NOTE: May need to increase allowable variation to greater than
1749
  //  one clock cycle in certain customer designs.
1750
  // Algorithm is:
1751
  //   1. Record shift delay value for DQS[0]
1752
  //   2. Compare each DQS[x] delay value to that of DQS[0]:
1753
  //     - If different, than record this fact (RDEN_MUX)
1754
  //     - If greater than DQS[0], set RDEN_INC. Assume greater by
1755
  //       one clock cycle only - this is a key assumption, assume no
1756
  //       more than a one clock cycle variation.
1757
  //     - If less than DQS[0], set RDEN_DEC
1758
  //   3. After calibration is complete, set control for DQS group
1759
  //      delay (CALIB_RDEN_SEL):
1760
  //     - If RDEN_DEC = 1, then assume that DQS[0] is the lowest
1761
  //       delay (and at least one other DQS group has a higher
1762
  //       delay).
1763
  //     - If RDEN_INC = 1, then assume that DQS[0] is the highest
1764
  //       delay (and that all other DQS groups have the same or
1765
  //       lower delay).
1766
  //     - If both RDEN_INC and RDEN_DEC = 1, then flag error
1767
  //       (variation is too high for this algorithm to handle)
1768
  //*****************************************************************
1769
 
1770
  always @(posedge clkdiv) begin
1771
    if (rstdiv) begin
1772
      calib_err_2[1] <= 1'b0;
1773
      calib_rden_sel <= {DQS_WIDTH{1'bx}};
1774
      rden_dec       <= 1'b0;
1775
      rden_dly_0     <= 5'bxxxxx;
1776
      rden_inc       <= 1'b0;
1777
      rden_mux       <= {DQS_WIDTH{1'b0}};
1778
    end else begin
1779
      // if a match if found, then store the value of rden_dly
1780
      if (!calib_done[2]) begin
1781
        if ((cal3_state == CAL3_DETECT) && cal3_match_found) begin
1782
          // store the value for DQS[0] as a reference
1783
          if (count_rden == 0) begin
1784
            // for simulation, RDEN calibration only happens for DQS[0]
1785
            // set RDEN_MUX for all DQS groups to be the same as DQS[0]
1786
            if (SIM_ONLY != 0)
1787
              rden_mux <= {DQS_WIDTH{1'b0}};
1788
            else begin
1789
              // otherwise, load values for DQS[0]
1790
              rden_dly_0  <= cal3_rden_srl_a;
1791
              rden_mux[0] <= 1'b0;
1792
            end
1793
          end else if (SIM_ONLY == 0) begin
1794
            // for all other DQS groups, compare RDEN_DLY delay value with
1795
            // that of DQS[0]
1796
            if (rden_dly_0 != cal3_rden_srl_a) begin
1797
              // record that current DQS group has a different delay
1798
              // than DQS[0] (the "reference" DQS group)
1799
              rden_mux[count_rden] <= 1'b1;
1800
              if (rden_dly_0 > cal3_rden_srl_a)
1801
                rden_inc <= 1'b1;
1802
              else if (rden_dly_0 < cal3_rden_srl_a)
1803
                rden_dec <= 1'b1;
1804
              // otherwise, if current DQS group has same delay as DQS[0],
1805
              // then rden_mux[count_rden] remains at 0 (since rden_mux
1806
              // array contents initialized to 0)
1807
            end
1808
          end
1809
        end
1810
      end else begin
1811
        // Otherwise - if we're done w/ stage 2 calibration:
1812
        // set final value for RDEN data delay
1813
        // flag error if there's more than one cycle variation from DQS[0]
1814
        calib_err_2[1] <= (rden_inc && rden_dec);
1815
        if (rden_inc)
1816
          // if DQS[0] delay represents max delay
1817
          calib_rden_sel <= ~rden_mux;
1818
        else
1819
          // if DQS[0] delay represents min delay (or all the delays are
1820
          // the same between DQS groups)
1821
          calib_rden_sel <= rden_mux;
1822
      end
1823
    end
1824
  end
1825
 
1826
  // flag error for stage 3 if appropriate
1827
  always @(posedge clkdiv)
1828
    calib_err[2] <= calib_err_2[0] | calib_err_2[1];
1829
 
1830
  //***************************************************************************
1831
  // Stage 4 calibration: DQS gate
1832
  //***************************************************************************
1833
 
1834
  //*****************************************************************
1835
  // indicates that current received data is the correct pattern. Same as
1836
  // for READ VALID calibration, except that the expected data sequence is
1837
  // different since DQS gate is asserted after the 6th word.
1838
  // Data sequence:
1839
  //  Arrives from memory (at FPGA input) (R, F): 1 0 0 1 1 0 0 1
1840
  //  After gating the sequence looks like: 1 0 0 1 1 0 1 0 (7th word =
1841
  //   5th word, 8th word = 6th word)
1842
  // What is the gate timing is off? Need to make sure we can distinquish
1843
  // between the results of correct vs. incorrect gate timing. We also use
1844
  // the "read_valid" signal from stage 3 calibration to help us determine
1845
  // when to check for a valid sequence for stage 4 calibration (i.e. use
1846
  // CAL4_DATA_VALID in addition to CAL4_DATA_MATCH/CAL4_DATA_MATCH_STGD)
1847
  // Note that since the gate signal from the CLK0 domain is synchronized
1848
  // to the falling edge of DQS, that the effect of the gate will only be
1849
  // seen starting with a rising edge data (although it is possible
1850
  // the GATE IDDR output could go metastable and cause a unexpected result
1851
  // on the first rising and falling edges after the gate is enabled). 
1852
  // Also note that the actual DQS glitch can come more than 0.5*tCK after 
1853
  // the last falling edge of DQS and the constraint for this path is can 
1854
  // be > 0.5*tCK; however, this means when calibrating, the output of the 
1855
  // GATE IDDR may miss the setup time requirement of the rising edge flop 
1856
  // and only meet it for the falling edge flop. Therefore the rising
1857
  // edge data immediately following the assertion of the gate can either
1858
  // be a 1 or 0 (can rely on either)
1859
  // As the timing on the gate is varied, we expect to see (sequence of
1860
  // captured read data shown below):
1861
  //       - 1 0 0 1 1 0 0 1 (gate is really early, starts and ends before
1862
  //                          read burst even starts)
1863
  //       - x 0 0 1 1 0 0 1 (gate pulse starts before the burst, and ends
1864
  //       - x y 0 1 1 0 0 1  sometime during the burst; x,y = 0, or 1, but 
1865
  //       - x y x 1 1 0 0 1  all bits that show an x are the same value, 
1866
  //       - x y x y 1 0 0 1  and y are the same value)
1867
  //       - x y x y x 0 0 1
1868
  //       - x y x y x y 0 1 (gate starts just before start of burst)
1869
  //       - 1 0 x 0 x 0 x 0 (gate starts after 1st falling word. The "x"
1870
  //                          represents possiblity that gate may not disable
1871
  //                          clock for 2nd rising word in time)
1872
  //       - 1 0 0 1 x 1 x 1 (gate starts after 2nd falling word)
1873
  //       - 1 0 0 1 1 0 x 0 (gate starts after 3rd falling word - GOOD!!)
1874
  //       - 1 0 0 1 1 0 0 1 (gate starts after burst is already done)
1875
  //*****************************************************************
1876
 
1877
  assign cal4_data_valid = calib_rden_valid | calib_rden_valid_stgd;
1878
  assign cal4_data_good  = (calib_rden_valid &
1879
                            cal4_data_match) |
1880
                           (calib_rden_valid_stgd &
1881
                            cal4_data_match_stgd);
1882
 
1883
  always @(posedge clkdiv) begin
1884
    // if data is aligned out of fabric ISERDES:
1885
    cal4_data_match <= ((rdd_rise_q2_r == 1) &&
1886
                        (rdd_fall_q2_r == 0) &&
1887
                        (rdd_rise_q1_r == 0) &&
1888
                        (rdd_fall_q1_r == 1) &&
1889
                        (rdd_rise_q2   == 1) &&
1890
                        (rdd_fall_q2   == 0) &&
1891
                        // MIG 2.1: Last rising edge data value not
1892
                        // guaranteed to be certain value at higher
1893
                        // frequencies
1894
                        // (rdd_rise_q1   == 0) &&
1895
                        (rdd_fall_q1   == 0));
1896
    // if data is staggered out of fabric ISERDES:
1897
    cal4_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
1898
                             (rdd_fall_q1_r1 == 0) &&
1899
                             (rdd_rise_q2_r  == 0) &&
1900
                             (rdd_fall_q2_r  == 1) &&
1901
                             (rdd_rise_q1_r  == 1) &&
1902
                             (rdd_fall_q1_r  == 0) &&
1903
                             // MIG 2.1: Last rising edge data value not
1904
                             // guaranteed to be certain value at higher
1905
                             // frequencies
1906
                             // (rdd_rise_q2    == 0) &&
1907
                             (rdd_fall_q2    == 0));
1908
  end
1909
 
1910
  //*****************************************************************
1911
  // DQS gate enable generation:
1912
  // This signal gets synchronized to DQS domain, and drives IDDR
1913
  // register that in turn asserts/deasserts CE to all 4 or 8 DQ
1914
  // IDDR's in that DQS group.
1915
  //   1. During normal (post-cal) operation, this is only for 2 clock
1916
  //      cycles following the end of a burst. Check for falling edge
1917
  //      of RDEN. But must also make sure NOT assert for a read-idle-
1918
  //      read (two non-consecutive reads, separated by exactly one
1919
  //      idle cycle) - in this case, don't assert the gate because:
1920
  //      (1) we don't have enough time to deassert the gate before the
1921
  //          first rising edge of DQS for second burst (b/c of fact
1922
  //          that DQS gate is generated in the fabric only off rising
1923
  //          edge of CLK0 - if we somehow had an ODDR in fabric, we
1924
  //          could pull this off, (2) assumption is that the DQS glitch
1925
  //          will not rise enough to cause a glitch because the
1926
  //          post-amble of the first burst is followed immediately by
1927
  //          the pre-amble of the next burst
1928
  //   2. During stage 4 calibration, assert for 3 clock cycles
1929
  //      (assert gate enable one clock cycle early), since we gate out
1930
  //      the last two words (in addition to the crap on the DQ bus after
1931
  //      the DQS read postamble).
1932
  // NOTE: PHY_INIT_RDEN and CTRL_RDEN have slightly different timing w/r
1933
  //  to when they are asserted w/r to the start of the read burst
1934
  //  (PHY_INIT_RDEN is one cycle earlier than CTRL_RDEN).
1935
  //*****************************************************************
1936
 
1937
  // register for timing purposes for fast clock path - currently only
1938
  // calib_done_r[2] used
1939
  always @(posedge clk)
1940
    calib_done_r <= calib_done;
1941
 
1942
  always @(*) begin
1943
    calib_ctrl_rden = ctrl_rden;
1944
    calib_init_rden = calib_done_r[2] & phy_init_rden;
1945
  end
1946
 
1947
  assign calib_ctrl_rden_negedge = ~calib_ctrl_rden & calib_ctrl_rden_r;
1948
  // check for read-idle-read before asserting DQS pulse at end of read
1949
  assign calib_ctrl_gate_pulse   = calib_ctrl_rden_negedge_r &
1950
                                   ~calib_ctrl_rden;
1951
  always @(posedge clk) begin
1952
    calib_ctrl_rden_r         <= calib_ctrl_rden;
1953
    calib_ctrl_rden_negedge_r <= calib_ctrl_rden_negedge;
1954
    calib_ctrl_gate_pulse_r   <= calib_ctrl_gate_pulse;
1955
  end
1956
 
1957
  assign calib_init_gate_pulse = ~calib_init_rden & calib_init_rden_r;
1958
  always @(posedge clk) begin
1959
    calib_init_rden_r        <= calib_init_rden;
1960
    calib_init_gate_pulse_r  <= calib_init_gate_pulse;
1961
    calib_init_gate_pulse_r1 <= calib_init_gate_pulse_r;
1962
  end
1963
 
1964
  // Gate is asserted: (1) during cal, for 3 cycles, starting 1 cycle
1965
  // after falling edge of CTRL_RDEN, (2) during normal ops, for 2
1966
  // cycles, starting 2 cycles after falling edge of CTRL_RDEN
1967
  assign gate_srl_in = ~((calib_ctrl_gate_pulse |
1968
                          calib_ctrl_gate_pulse_r) |
1969
                         (calib_init_gate_pulse   |
1970
                          calib_init_gate_pulse_r |
1971
                          calib_init_gate_pulse_r1));
1972
 
1973
  //*****************************************************************
1974
  // generate DQS enable signal for each DQS group
1975
  // There are differences between DQS gate signal for calibration vs. during
1976
  // normal operation:
1977
  //  * calibration gates the second to last clock cycle of the burst,
1978
  //    rather than after the last word (e.g. for a 8-word, 4-cycle burst,
1979
  //    cycle 4 is gated for calibration; during normal operation, cycle
1980
  //    5 (i.e. cycle after the last word) is gated)
1981
  // enable for DQS is deasserted for two clock cycles, except when
1982
  // we have the preamble for the next read immediately following
1983
  // the postamble of the current read - assume DQS does not glitch
1984
  // during this time, that it stays low. Also if we did have to gate
1985
  // the DQS for this case, then we don't have enough time to deassert
1986
  // the gate in time for the first rising edge of DQS for the second
1987
  // read
1988
  //*****************************************************************
1989
 
1990
  // Flops for targetting of multi-cycle path in UCF
1991
  genvar gate_ff_i;
1992
  generate
1993
    for (gate_ff_i = 0; gate_ff_i < 5*DQS_WIDTH;
1994
         gate_ff_i = gate_ff_i+1) begin: gen_gate_dly
1995
      FDRSE u_ff_gate_dly
1996
        (
1997
         .Q   (gate_dly_r[gate_ff_i]),
1998
         .C   (clkdiv),
1999
         .CE  (1'b1),
2000
         .D   (gate_dly[gate_ff_i]),
2001
         .R   (1'b0),
2002
         .S   (1'b0)
2003
         ) /* synthesis syn_preserve = 1 */
2004
           /* synthesis syn_replicate = 0 */;
2005
    end
2006
  endgenerate
2007
 
2008
  genvar gate_i;
2009
  generate
2010
    for (gate_i = 0; gate_i < DQS_WIDTH; gate_i = gate_i + 1) begin: gen_gate
2011
      SRLC32E u_gate_srl
2012
        (
2013
         .Q   (gate_srl_out[gate_i]),
2014
         .Q31 (),
2015
         .A   ({gate_dly_r[(gate_i*5)+4],
2016
                gate_dly_r[(gate_i*5)+3],
2017
                gate_dly_r[(gate_i*5)+2],
2018
                gate_dly_r[(gate_i*5)+1],
2019
                gate_dly_r[(gate_i*5)]}),
2020
         .CE  (1'b1),
2021
         .CLK (clk),
2022
         .D   (gate_srl_in)
2023
         );
2024
 
2025
      // For GATE_BASE_DELAY > 0, have one extra cycle to register outputs
2026
      // from controller before generating DQS gate pulse. In PAR, the
2027
      // location of the controller logic can be far from the DQS gate
2028
      // logic (DQS gate logic located near the DQS I/O's), contributing
2029
      // to large net delays. Registering the controller outputs for
2030
      // CL >= 4 (above 200MHz) adds a stage of pipelining to reduce net
2031
      // delays
2032
      if (GATE_BASE_DELAY > 0) begin: gen_gate_base_dly_gt3
2033
        // add flop between SRL32 and EN_DQS flop (which is located near the
2034
        // DDR2 IOB's)
2035
        FDRSE u_gate_srl_ff
2036
          (
2037
         .Q   (gate_srl_out_r[gate_i]),
2038
         .C   (clk),
2039
         .CE  (1'b1),
2040
         .D   (gate_srl_out[gate_i]),
2041
         .R   (1'b0),
2042
         .S   (1'b0)
2043
           ) /* synthesis syn_preserve = 1 */;
2044
      end else begin: gen_gate_base_dly_le3
2045
        assign gate_srl_out_r[gate_i] = gate_srl_out[gate_i];
2046
      end
2047
 
2048
      FDRSE u_en_dqs_ff
2049
        (
2050
         .Q   (en_dqs[gate_i]),
2051
         .C   (clk),
2052
         .CE  (1'b1),
2053
         .D   (gate_srl_out_r[gate_i]),
2054
         .R   (1'b0),
2055
         .S   (1'b0)
2056
         ) /* synthesis syn_preserve = 1 */
2057
           /* synthesis syn_replicate = 0 */;
2058
    end
2059
  endgenerate
2060
 
2061
  //*****************************************************************
2062
  // Find valid window: keep track of how long we've been in the same data
2063
  // window. If it's been long enough, then declare that we've found a stable
2064
  // valid window - in particular, that we're past any region of instability
2065
  // associated with the edge of the window. Use only when finding left edge
2066
  //*****************************************************************
2067
 
2068
  always @(posedge clkdiv)
2069
    // reset before we start to look for window
2070
    if (cal4_state == CAL4_INIT) begin
2071
      cal4_window_cnt    <= 4'b0000;
2072
      cal4_stable_window <= 1'b0;
2073
    end else if ((cal4_state == CAL4_FIND_EDGE) && cal4_seek_left) begin
2074
      // if we're looking for left edge, and incrementing IDELAY, count
2075
      // consecutive taps over which we're in the window
2076
      if (cal4_data_valid) begin
2077
        if (cal4_data_good)
2078
          cal4_window_cnt <= cal4_window_cnt + 1;
2079
        else
2080
          cal4_window_cnt <= 4'b0000;
2081
      end
2082
 
2083
      if (cal4_window_cnt == MIN_WIN_SIZE-1)
2084
        cal4_stable_window <= 1'b1;
2085
    end
2086
 
2087
  //*****************************************************************
2088
  // keep track of edge tap counts found, and whether we've
2089
  // incremented to the maximum number of taps allowed
2090
  //*****************************************************************
2091
 
2092
  always @(posedge clkdiv)
2093
    if ((cal4_state == CAL4_INIT) || cal4_dlyrst_gate) begin
2094
      cal4_idel_max_tap <= 1'b0;
2095
      cal4_idel_bit_tap <= 1'b0;
2096
      cal4_idel_tap_cnt <= 6'b000000;
2097
    end else if (cal4_dlyce_gate) begin
2098
      if (cal4_dlyinc_gate) begin
2099
        cal4_idel_tap_cnt <= cal4_idel_tap_cnt + 1;
2100
        cal4_idel_bit_tap <= (cal4_idel_tap_cnt == CAL4_IDEL_BIT_VAL-2);
2101
        cal4_idel_max_tap <= (cal4_idel_tap_cnt == 6'b111110);
2102
      end else begin
2103
        cal4_idel_tap_cnt <= cal4_idel_tap_cnt - 1;
2104
        cal4_idel_bit_tap <= 1'b0;
2105
        cal4_idel_max_tap <= 1'b0;
2106
      end
2107
    end
2108
 
2109
  always @(posedge clkdiv)
2110
    if ((cal4_state != CAL4_RDEN_PIPE_CLR_WAIT) &&
2111
        (cal3_state != CAL3_RDEN_PIPE_CLR_WAIT))
2112
      calib_rden_pipe_cnt <= CALIB_RDEN_PIPE_LEN-1;
2113
    else
2114
      calib_rden_pipe_cnt <= calib_rden_pipe_cnt - 1;
2115
 
2116
  //*****************************************************************
2117
  // Stage 4 cal state machine
2118
  //*****************************************************************
2119
 
2120
  always @(posedge clkdiv)
2121
    if (rstdiv) begin
2122
      calib_done[3]      <= 1'b0;
2123
      calib_done_tmp[3]  <= 1'b0;
2124
      calib_err[3]       <= 1'b0;
2125
      count_gate         <= 'b0;
2126
      gate_dly           <= 'b0;
2127
      next_count_gate    <= 'b0;
2128
      cal4_idel_adj_cnt  <= 6'bxxxxxx;
2129
      cal4_dlyce_gate    <= 1'b0;
2130
      cal4_dlyinc_gate   <= 1'b0;
2131
      cal4_dlyrst_gate   <= 1'b0;    // reset handled elsewhere in code
2132
      cal4_gate_srl_a    <= 5'bxxxxx;
2133
      cal4_rden_srl_a    <= 5'bxxxxx;
2134
      cal4_ref_req       <= 1'b0;
2135
      cal4_seek_left     <= 1'bx;
2136
      cal4_state         <= CAL4_IDLE;
2137
    end else begin
2138
      cal4_ref_req     <= 1'b0;
2139
      cal4_dlyce_gate  <= 1'b0;
2140
      cal4_dlyinc_gate <= 1'b0;
2141
      cal4_dlyrst_gate <= 1'b0;
2142
 
2143
      case (cal4_state)
2144
        CAL4_IDLE: begin
2145
          count_gate      <= 'b0;
2146
          next_count_gate <= 'b0;
2147
          if (calib_start[3]) begin
2148
            gate_dly      <= 'b0;
2149
            calib_done[3] <= 1'b0;
2150
            cal4_state    <= CAL4_INIT;
2151
          end
2152
        end
2153
 
2154
        CAL4_INIT: begin
2155
          // load: (1) initial value of gate delay SRL, (2) appropriate
2156
          // value of RDEN SRL (so that we get correct "data valid" timing)
2157
          cal4_gate_srl_a <= GATE_BASE_INIT;
2158
          cal4_rden_srl_a <= {calib_rden_dly[(count_gate*5)+4],
2159
                              calib_rden_dly[(count_gate*5)+3],
2160
                              calib_rden_dly[(count_gate*5)+2],
2161
                              calib_rden_dly[(count_gate*5)+1],
2162
                              calib_rden_dly[(count_gate*5)]};
2163
          // let SRL pipe clear after loading initial shift value
2164
          cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
2165
        end
2166
 
2167
        // sort of an initial state - start checking to see whether we're
2168
        // already in the window or not
2169
        CAL4_FIND_WINDOW:
2170
          // decide right away if we start in the proper window - this
2171
          // determines if we are then looking for the left (trailing) or
2172
          // right (leading) edge of the data valid window
2173
          if (cal4_data_valid) begin
2174
            // if we find a match - then we're already in window, now look
2175
            // for left edge. Otherwise, look for right edge of window
2176
            cal4_seek_left  <= cal4_data_good;
2177
            cal4_state      <= CAL4_FIND_EDGE;
2178
          end
2179
 
2180
        CAL4_FIND_EDGE:
2181
          // don't do anything until the exact clock cycle when to check that
2182
          // readback data is valid or not
2183
          if (cal4_data_valid) begin
2184
            // we're currently in the window, look for left edge of window
2185
            if (cal4_seek_left) begin
2186
              // make sure we've passed the right edge before trying to detect
2187
              // the left edge (i.e. avoid any edge "instability") - else, we
2188
              // may detect an "false" edge too soon. By design, if we start in
2189
              // the data valid window, always expect at least
2190
              // MIN(BIT_TIME_TAPS,32) (-/+ jitter, see below) taps of valid
2191
              // window before we hit the left edge (this is because when stage
2192
              // 4 calibration first begins (i.e., gate_dly = 00, and IDELAY =
2193
              // 00), we're guaranteed to NOT be in the window, and we always
2194
              // start searching for MIN(BIT_TIME_TAPS,32) for the right edge
2195
              // of window. If we don't find it, increment gate_dly, and if we
2196
              // now start in the window, we have at least approximately
2197
              // CLK_PERIOD-MIN(BIT_TIME_TAPS,32) = MIN(BIT_TIME_TAPS,32) taps.
2198
              // It's approximately because jitter, noise, etc. can bring this
2199
              // value down slightly. Because of this (although VERY UNLIKELY),
2200
              // we have to protect against not decrementing IDELAY below 0
2201
              // during adjustment phase).
2202
              if (cal4_stable_window && !cal4_data_good) begin
2203
                // found left edge of window, dec by MIN(BIT_TIME_TAPS,32)
2204
                cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
2205
                cal4_idel_adj_inc <= 1'b0;
2206
                cal4_state        <= CAL4_ADJ_IDEL;
2207
              end else begin
2208
                // Otherwise, keep looking for left edge:
2209
                if (cal4_idel_max_tap) begin
2210
                  // ran out of taps looking for left edge (max=63) - happens
2211
                  // for low frequency case, decrement by 32
2212
                  cal4_idel_adj_cnt <= 6'b100000;
2213
                  cal4_idel_adj_inc <= 1'b0;
2214
                  cal4_state        <= CAL4_ADJ_IDEL;
2215
                end else begin
2216
                  cal4_dlyce_gate  <= 1'b1;
2217
                  cal4_dlyinc_gate <= 1'b1;
2218
                  cal4_state       <= CAL4_IDEL_WAIT;
2219
                end
2220
              end
2221
            end else begin
2222
              // looking for right edge of window:
2223
              // look for the first match - this means we've found the right
2224
              // (leading) edge of the data valid window, increment by
2225
              // MIN(BIT_TIME_TAPS,32)
2226
              if (cal4_data_good) begin
2227
                cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
2228
                cal4_idel_adj_inc <= 1'b1;
2229
                cal4_state        <= CAL4_ADJ_IDEL;
2230
              end else begin
2231
                // Otherwise, keep looking:
2232
                // only look for MIN(BIT_TIME_TAPS,32) taps for right edge,
2233
                // if we haven't found it, then inc gate delay, try again
2234
                if (cal4_idel_bit_tap) begin
2235
                  // if we're already maxed out on gate delay, then error out
2236
                  // (simulation only - calib_err isn't currently connected)
2237
                  if (cal4_gate_srl_a == 5'b11111) begin
2238
                    calib_err[3] <= 1'b1;
2239
                    cal4_state   <= CAL4_IDLE;
2240
                  end else begin
2241
                    // otherwise, increment gate delay count, and start
2242
                    // over again
2243
                    cal4_gate_srl_a <= cal4_gate_srl_a + 1;
2244
                    cal4_dlyrst_gate <= 1'b1;
2245
                    cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
2246
                  end
2247
                end else begin
2248
                  // keep looking for right edge
2249
                  cal4_dlyce_gate  <= 1'b1;
2250
                  cal4_dlyinc_gate <= 1'b1;
2251
                  cal4_state       <= CAL4_IDEL_WAIT;
2252
                end
2253
              end
2254
            end
2255
          end
2256
 
2257
        // wait for GATE IDELAY to settle, after reset or increment
2258
        CAL4_IDEL_WAIT: begin
2259
          // For simulation, load SRL addresses for all DQS with same value
2260
          if (SIM_ONLY != 0) begin
2261
            for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly
2262
              gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
2263
              gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
2264
              gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
2265
              gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
2266
              gate_dly[(i*5)]   <= cal4_gate_srl_a[0];
2267
            end
2268
          end else begin
2269
            gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
2270
            gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
2271
            gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
2272
            gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
2273
            gate_dly[(count_gate*5)]   <= cal4_gate_srl_a[0];
2274
          end
2275
          // check to see if we've found edge of window
2276
          if (!idel_set_wait)
2277
            cal4_state <= CAL4_FIND_EDGE;
2278
        end
2279
 
2280
        // give additional time for RDEN_R pipe to clear from effects of
2281
        // previous pipeline (and IDELAY reset)
2282
        CAL4_RDEN_PIPE_CLR_WAIT: begin
2283
          // MIG 2.2: Bug fix - make sure to update GATE_DLY count, since
2284
          // possible for FIND_EDGE->RDEN_PIPE_CLR_WAIT->FIND_WINDOW
2285
          // transition (i.e. need to make sure the gate count updated in
2286
          // FIND_EDGE gets reflected in GATE_DLY by the time we reach
2287
          // state FIND_WINDOW) - previously GATE_DLY only being updated
2288
          // during state CAL4_IDEL_WAIT
2289
          if (SIM_ONLY != 0) begin
2290
            for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly_pipe
2291
              gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
2292
              gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
2293
              gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
2294
              gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
2295
              gate_dly[(i*5)]   <= cal4_gate_srl_a[0];
2296
            end
2297
          end else begin
2298
            gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
2299
            gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
2300
            gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
2301
            gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
2302
            gate_dly[(count_gate*5)]   <= cal4_gate_srl_a[0];
2303
          end
2304
          // look for new window
2305
          if (calib_rden_pipe_cnt == 5'b00000)
2306
            cal4_state <= CAL4_FIND_WINDOW;
2307
        end
2308
 
2309
        // increment/decrement DQS/DQ IDELAY for final adjustment
2310
        CAL4_ADJ_IDEL:
2311
          // add underflow protection for corner case when left edge found
2312
          // using fewer than MIN(BIT_TIME_TAPS,32) taps
2313
          if ((cal4_idel_adj_cnt == 6'b000000) ||
2314
              (cal4_dlyce_gate && !cal4_dlyinc_gate &&
2315
               (cal4_idel_tap_cnt == 6'b000001))) begin
2316
            cal4_state <= CAL4_DONE;
2317
            // stop when all gates calibrated, or gate[0] cal'ed (for sim)
2318
            if ((count_gate == DQS_WIDTH-1) || (SIM_ONLY != 0))
2319
              calib_done_tmp[3] <= 1'b1;
2320
            else
2321
              // need for VHDL simulation to prevent out-of-index error
2322
              next_count_gate <= count_gate + 1;
2323
          end else begin
2324
            cal4_idel_adj_cnt <= cal4_idel_adj_cnt - 1;
2325
            cal4_dlyce_gate  <= 1'b1;
2326
            // whether inc or dec depends on whether left or right edge found
2327
            cal4_dlyinc_gate <= cal4_idel_adj_inc;
2328
          end
2329
 
2330
        // wait for IDELAY output to settle after decrement. Check current
2331
        // COUNT_GATE value and decide if we're done
2332
        CAL4_DONE:
2333
          if (!idel_set_wait) begin
2334
            count_gate <= next_count_gate;
2335
            if (calib_done_tmp[3]) begin
2336
              calib_done[3] <= 1'b1;
2337
              cal4_state <= CAL4_IDLE;
2338
            end else begin
2339
              // request auto-refresh after every DQS group calibrated to
2340
              // avoid tRAS violation
2341
              cal4_ref_req <= 1'b1;
2342
              if (calib_ref_done)
2343
                cal4_state <= CAL4_INIT;
2344
            end
2345
          end
2346
      endcase
2347
    end
2348
 
2349
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.