OpenCores
URL https://opencores.org/ocsvn/genesys_ddr2/genesys_ddr2/trunk

Subversion Repositories genesys_ddr2

[/] [genesys_ddr2/] [trunk/] [rtl/] [ipcore_dir/] [MEMCtrl/] [user_design/] [rtl/] [ddr2_phy_calib.v] - Blame information for rev 3

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 3 oana.bonca
//*****************************************************************************
2
// (c) Copyright 2006-2009 Xilinx, Inc. All rights reserved.
3
//
4
// This file contains confidential and proprietary information
5
// of Xilinx, Inc. and is protected under U.S. and 
6
// international copyright and other intellectual property
7
// laws.
8
//
9
// DISCLAIMER
10
// This disclaimer is not a license and does not grant any
11
// rights to the materials distributed herewith. Except as
12
// otherwise provided in a valid license issued to you by
13
// Xilinx, and to the maximum extent permitted by applicable
14
// law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
15
// WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
16
// AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
17
// BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
18
// INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
19
// (2) Xilinx shall not be liable (whether in contract or tort,
20
// including negligence, or under any other theory of
21
// liability) for any loss or damage of any kind or nature
22
// related to, arising under or in connection with these
23
// materials, including for any direct, or any indirect,
24
// special, incidental, or consequential loss or damage
25
// (including loss of data, profits, goodwill, or any type of
26
// loss or damage suffered as a result of any action brought
27
// by a third party) even if such damage or loss was
28
// reasonably foreseeable or Xilinx had been advised of the
29
// possibility of the same.
30
//
31
// CRITICAL APPLICATIONS
32
// Xilinx products are not designed or intended to be fail-
33
// safe, or for use in any application requiring fail-safe
34
// performance, such as life-support or safety devices or
35
// systems, Class III medical devices, nuclear facilities,
36
// applications related to the deployment of airbags, or any
37
// other applications that could lead to death, personal
38
// injury, or severe property or environmental damage
39
// (individually and collectively, "Critical
40
// Applications"). Customer assumes the sole risk and
41
// liability of any use of Xilinx products in Critical
42
// Applications, subject only to applicable laws and
43
// regulations governing limitations on product liability.
44
//
45
// THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
46
// PART OF THIS FILE AT ALL TIMES.
47
//*****************************************************************************
48
//   ____  ____
49
//  /   /\/   /
50
// /___/  \  /    Vendor: Xilinx
51
// \   \   \/     Version: 3.6.1
52
//  \   \         Application: MIG
53
//  /   /         Filename: ddr2_phy_calib.v
54
// /___/   /\     Date Last Modified: $Date: 2010/11/26 18:26:02 $
55
// \   \  /  \    Date Created: Thu Aug 10 2006
56
//  \___\/\___\
57
//
58
//Device: Virtex-5
59
//Design Name: DDR2
60
//Purpose:
61
//   This module handles calibration after memory initialization.
62
//Reference:
63
//Revision History:
64
//   Rev 1.1 - Default statement is added for the CASE statement of
65
//             rdd_mux_sel logic. PK. 03/23/09
66
//   Rev 1.2 - Change training pattern detected for stage 3 calibration.
67
//             Use 2-bits per DQS group for stage 3 pattern detection.
68
//             RC. 09/21/09
69
//*****************************************************************************
70
 
71
`timescale 1ns/1ps
72
 
73
module ddr2_phy_calib #
74
  (
75
   // Following parameters are for 72-bit RDIMM design (for ML561 Reference
76
   // board design). Actual values may be different. Actual parameters values
77
   // are passed from design top module MEMCtrl module. Please refer to
78
   // the MEMCtrl module for actual values.
79
   parameter DQ_WIDTH      = 72,
80
   parameter DQ_BITS       = 7,
81
   parameter DQ_PER_DQS    = 8,
82
   parameter DQS_BITS      = 4,
83
   parameter DQS_WIDTH     = 9,
84
   parameter ADDITIVE_LAT  = 0,
85
   parameter CAS_LAT       = 5,
86
   parameter REG_ENABLE    = 1,
87
   parameter CLK_PERIOD    = 3000,
88
   parameter SIM_ONLY      = 0,
89
   parameter DEBUG_EN      = 0
90
   )
91
  (
92
   input                                   clk,
93
   input                                   clkdiv,
94
   input                                   rstdiv,
95
   input [3:0]                             calib_start,
96
   input                                   ctrl_rden,
97
   input                                   phy_init_rden,
98
   input [DQ_WIDTH-1:0]                    rd_data_rise,
99
   input [DQ_WIDTH-1:0]                    rd_data_fall,
100
   input                                   calib_ref_done,
101
   output reg [3:0]                        calib_done,
102
   output reg                              calib_ref_req,
103
   output [DQS_WIDTH-1:0]                  calib_rden,
104
   output reg [DQS_WIDTH-1:0]              calib_rden_sel,
105
   output reg                              dlyrst_dq,
106
   output reg [DQ_WIDTH-1:0]               dlyce_dq,
107
   output reg [DQ_WIDTH-1:0]               dlyinc_dq,
108
   output reg                              dlyrst_dqs,
109
   output reg [DQS_WIDTH-1:0]              dlyce_dqs,
110
   output reg [DQS_WIDTH-1:0]              dlyinc_dqs,
111
   output reg [DQS_WIDTH-1:0]              dlyrst_gate,
112
   output reg [DQS_WIDTH-1:0]              dlyce_gate,
113
   output reg [DQS_WIDTH-1:0]              dlyinc_gate,
114
   //(* XIL_PAR_NO_REG_ORDER = "TRUE", XIL_PAR_PATH="Q->u_iodelay_dq_ce.DATAIN", syn_keep = "1", keep = "TRUE"*)
115
   output [DQS_WIDTH-1:0]                  en_dqs,
116
   output [DQS_WIDTH-1:0]                  rd_data_sel,
117
   // Debug signals (optional use)
118
   input                                   dbg_idel_up_all,
119
   input                                   dbg_idel_down_all,
120
   input                                   dbg_idel_up_dq,
121
   input                                   dbg_idel_down_dq,
122
   input                                   dbg_idel_up_dqs,
123
   input                                   dbg_idel_down_dqs,
124
   input                                   dbg_idel_up_gate,
125
   input                                   dbg_idel_down_gate,
126
   input [DQ_BITS-1:0]                     dbg_sel_idel_dq,
127
   input                                   dbg_sel_all_idel_dq,
128
   input [DQS_BITS:0]                      dbg_sel_idel_dqs,
129
   input                                   dbg_sel_all_idel_dqs,
130
   input [DQS_BITS:0]                      dbg_sel_idel_gate,
131
   input                                   dbg_sel_all_idel_gate,
132
   output [3:0]                            dbg_calib_done,
133
   output [3:0]                            dbg_calib_err,
134
   output [(6*DQ_WIDTH)-1:0]               dbg_calib_dq_tap_cnt,
135
   output [(6*DQS_WIDTH)-1:0]              dbg_calib_dqs_tap_cnt,
136
   output [(6*DQS_WIDTH)-1:0]              dbg_calib_gate_tap_cnt,
137
   output [DQS_WIDTH-1:0]                  dbg_calib_rd_data_sel,
138
   output [(5*DQS_WIDTH)-1:0]              dbg_calib_rden_dly,
139
   output [(5*DQS_WIDTH)-1:0]              dbg_calib_gate_dly
140
   );
141
 
142
  // minimum time (in IDELAY taps) for which capture data must be stable for
143
  // algorithm to consider
144
  localparam MIN_WIN_SIZE = 5;
145
  // IDEL_SET_VAL = (# of cycles - 1) to wait after changing IDELAY value
146
  // we only have to wait enough for input with new IDELAY value to
147
  // propagate through pipeline stages.
148
  localparam IDEL_SET_VAL = 3'b111;
149
  // # of clock cycles to delay read enable to determine if read data pattern
150
  // is correct for stage 3/4 (RDEN, DQS gate) calibration
151
  localparam CALIB_RDEN_PIPE_LEN = 31;
152
  // translate CAS latency into number of clock cycles for read valid delay
153
  // determination. Really only needed for CL = 2.5 (set to 2)
154
  localparam CAS_LAT_RDEN = (CAS_LAT == 25) ? 2 : CAS_LAT;
155
  // an SRL32 is used to delay CTRL_RDEN to generate read valid signal. This
156
  // is min possible value delay through SRL32 can be
157
  localparam RDEN_BASE_DELAY = CAS_LAT_RDEN + ADDITIVE_LAT + REG_ENABLE;
158
  // an SRL32 is used to delay the CTRL_RDEN from the read postamble DQS
159
  // gate. This is min possible value the SRL32 delay can be:
160
  //  - Delay from end of deassertion of CTRL_RDEN to last falling edge of
161
  //    read burst = 3.5 (CTRL_RDEN -> CAS delay) + 3 (min CAS latency) = 6.5
162
  //  - Minimum time for DQS gate circuit to be generated:
163
  //      * 1 cyc to register CTRL_RDEN from controller
164
  //      * 1 cyc after RDEN_CTRL falling edge
165
  //      * 1 cyc min through SRL32
166
  //      * 1 cyc through SRL32 output flop
167
  //      * 0 (<1) cyc of synchronization to DQS domain via IDELAY
168
  //      * 1 cyc of delay through IDDR to generate CE to DQ IDDR's
169
  //    Total = 5 cyc < 6.5 cycles
170
  //    The total should be less than 5.5 cycles to account prop delays
171
  //    adding one cycle to the synchronization time via the IDELAY.
172
  //    NOTE: Value differs because of optional pipeline register added
173
  //      for case of RDEN_BASE_DELAY > 3 to improve timing
174
  localparam GATE_BASE_DELAY = RDEN_BASE_DELAY - 3;
175
  localparam GATE_BASE_INIT = (GATE_BASE_DELAY <= 1) ? 0 : GATE_BASE_DELAY;
176
  // used for RDEN calibration: difference between shift value used during
177
  // calibration, and shift value for actual RDEN SRL. Only applies when
178
  // RDEN edge is immediately captured by CLKDIV0. If not (depends on phase
179
  // of CLK0 and CLKDIV0 when RDEN is asserted), then add 1 to this value.
180
  localparam CAL3_RDEN_SRL_DLY_DELTA = 6;
181
  // fix minimum value of DQS to be 1 to handle the case where's there's only
182
  // one DQS group. We could also enforce that user always inputs minimum
183
  // value of 1 for DQS_BITS (even when DQS_WIDTH=1). Leave this as safeguard
184
  // Assume we don't have to do this for DQ, DQ_WIDTH always > 1
185
  localparam DQS_BITS_FIX = (DQS_BITS == 0) ? 1 : DQS_BITS;
186
  // how many taps to "pre-delay" DQ before stg 1 calibration - not needed for
187
  // current calibration, but leave for debug
188
  localparam DQ_IDEL_INIT = 6'b000000;
189
  // # IDELAY taps per bit time (i.e. half cycle). Limit to 63.
190
  localparam integer BIT_TIME_TAPS = (CLK_PERIOD/150 < 64) ?
191
             CLK_PERIOD/150 : 63;
192
 
193
  // used in various places during stage 4 cal: (1) determines maximum taps
194
  // to increment when finding right edge, (2) amount to decrement after
195
  // finding left edge, (3) amount to increment after finding right edge
196
  localparam CAL4_IDEL_BIT_VAL = (BIT_TIME_TAPS >= 6'b100000) ?
197
             6'b100000 : BIT_TIME_TAPS;
198
 
199
  localparam CAL1_IDLE                   = 4'h0;
200
  localparam CAL1_INIT                   = 4'h1;
201
  localparam CAL1_INC_IDEL               = 4'h2;
202
  localparam CAL1_FIND_FIRST_EDGE        = 4'h3;
203
  localparam CAL1_FIRST_EDGE_IDEL_WAIT   = 4'h4;
204
  localparam CAL1_FOUND_FIRST_EDGE_WAIT  = 4'h5;
205
  localparam CAL1_FIND_SECOND_EDGE       = 4'h6;
206
  localparam CAL1_SECOND_EDGE_IDEL_WAIT  = 4'h7;
207
  localparam CAL1_CALC_IDEL              = 4'h8;
208
  localparam CAL1_DEC_IDEL               = 4'h9;
209
  localparam CAL1_DONE                   = 4'hA;
210
 
211
  localparam CAL2_IDLE                    = 4'h0;
212
  localparam CAL2_INIT                    = 4'h1;
213
  localparam CAL2_INIT_IDEL_WAIT          = 4'h2;
214
  localparam CAL2_FIND_EDGE_POS           = 4'h3;
215
  localparam CAL2_FIND_EDGE_IDEL_WAIT_POS = 4'h4;
216
  localparam CAL2_FIND_EDGE_NEG           = 4'h5;
217
  localparam CAL2_FIND_EDGE_IDEL_WAIT_NEG = 4'h6;
218
  localparam CAL2_DEC_IDEL                = 4'h7;
219
  localparam CAL2_DONE                    = 4'h8;
220
 
221
  localparam CAL3_IDLE                    = 3'h0;
222
  localparam CAL3_INIT                    = 3'h1;
223
  localparam CAL3_DETECT                  = 3'h2;
224
  localparam CAL3_RDEN_PIPE_CLR_WAIT      = 3'h3;
225
  localparam CAL3_DONE                    = 3'h4;
226
 
227
  localparam CAL4_IDLE                    = 3'h0;
228
  localparam CAL4_INIT                    = 3'h1;
229
  localparam CAL4_FIND_WINDOW             = 3'h2;
230
  localparam CAL4_FIND_EDGE               = 3'h3;
231
  localparam CAL4_IDEL_WAIT               = 3'h4;
232
  localparam CAL4_RDEN_PIPE_CLR_WAIT      = 3'h5;
233
  localparam CAL4_ADJ_IDEL                = 3'h6;
234
  localparam CAL4_DONE                    = 3'h7;
235
 
236
  integer                        i, j;
237
 
238
  reg [5:0]                      cal1_bit_time_tap_cnt;
239
  reg [1:0]                      cal1_data_chk_last;
240
  reg                            cal1_data_chk_last_valid;
241
  reg [1:0]                      cal1_data_chk_r;
242
  reg                            cal1_dlyce_dq;
243
  reg                            cal1_dlyinc_dq;
244
  reg                            cal1_dqs_dq_init_phase;
245
  reg                            cal1_detect_edge;
246
  reg                            cal1_detect_stable;
247
  reg                            cal1_found_second_edge;
248
  reg                            cal1_found_rising;
249
  reg                            cal1_found_window;
250
  reg                            cal1_first_edge_done;
251
  reg [5:0]                      cal1_first_edge_tap_cnt;
252
  reg [6:0]                      cal1_idel_dec_cnt;
253
  reg [5:0]                      cal1_idel_inc_cnt;
254
  reg [5:0]                      cal1_idel_max_tap;
255
  reg                            cal1_idel_max_tap_we;
256
  reg [5:0]                      cal1_idel_tap_cnt;
257
  reg                            cal1_idel_tap_limit_hit;
258
  reg [6:0]                      cal1_low_freq_idel_dec;
259
  reg                            cal1_ref_req;
260
  wire                           cal1_refresh;
261
  reg [3:0]                      cal1_state;
262
  reg [3:0]                      cal1_window_cnt;
263
  reg                            cal2_curr_sel;
264
  wire                           cal2_detect_edge;
265
  reg                            cal2_dlyce_dqs;
266
  reg                            cal2_dlyinc_dqs;
267
  reg [5:0]                      cal2_idel_dec_cnt;
268
  reg [5:0]                      cal2_idel_tap_cnt;
269
  reg [5:0]                      cal2_idel_tap_limit;
270
  reg                            cal2_idel_tap_limit_hit;
271
  reg                            cal2_rd_data_fall_last_neg;
272
  reg                            cal2_rd_data_fall_last_pos;
273
  reg                            cal2_rd_data_last_valid_neg;
274
  reg                            cal2_rd_data_last_valid_pos;
275
  reg                            cal2_rd_data_rise_last_neg;
276
  reg                            cal2_rd_data_rise_last_pos;
277
  reg [DQS_WIDTH-1:0]            cal2_rd_data_sel;
278
  wire                           cal2_rd_data_sel_edge;
279
  reg [DQS_WIDTH-1:0]            cal2_rd_data_sel_r;
280
  reg                            cal2_ref_req;
281
  reg [3:0]                      cal2_state;
282
  reg                            cal3_data_match;
283
  reg                            cal3_data_match_stgd;
284
  wire                           cal3_data_valid;
285
  wire                           cal3_match_found;
286
  wire [4:0]                     cal3_rden_dly;
287
  reg [4:0]                      cal3_rden_srl_a;
288
  reg [2:0]                      cal3_state;
289
  wire                           cal4_data_good;
290
  reg                            cal4_data_match;
291
  reg                            cal4_data_match_stgd;
292
  wire                           cal4_data_valid;
293
  reg                            cal4_dlyce_gate;
294
  reg                            cal4_dlyinc_gate;
295
  reg                            cal4_dlyrst_gate;
296
  reg [4:0]                      cal4_gate_srl_a;
297
  reg [5:0]                      cal4_idel_adj_cnt;
298
  reg                            cal4_idel_adj_inc;
299
  reg                            cal4_idel_bit_tap;
300
  reg [5:0]                      cal4_idel_tap_cnt;
301
  reg                            cal4_idel_max_tap;
302
  reg [4:0]                      cal4_rden_srl_a;
303
  reg                            cal4_ref_req;
304
  reg                            cal4_seek_left;
305
  reg                            cal4_stable_window;
306
  reg [2:0]                      cal4_state;
307
  reg [3:0]                      cal4_window_cnt;
308
  reg [3:0]                      calib_done_tmp;         // only for stg1/2/4
309
  reg                            calib_ctrl_gate_pulse_r;
310
  reg                            calib_ctrl_rden;
311
  reg                            calib_ctrl_rden_r;
312
  wire                           calib_ctrl_rden_negedge;
313
  reg                            calib_ctrl_rden_negedge_r;
314
  reg [3:0]                      calib_done_r;
315
  reg [3:0]                      calib_err;
316
  reg [1:0]                      calib_err_2;
317
  wire                           calib_init_gate_pulse;
318
  reg                            calib_init_gate_pulse_r;
319
  reg                            calib_init_gate_pulse_r1;
320
  reg                            calib_init_rden;
321
  reg                            calib_init_rden_r;
322
  reg [4:0]                      calib_rden_srl_a;
323
  wire [4:0]                     calib_rden_srl_a_r;
324
  reg [(5*DQS_WIDTH)-1:0]        calib_rden_dly;
325
  reg                            calib_rden_edge_r;
326
  reg [4:0]                      calib_rden_pipe_cnt;
327
  wire                           calib_rden_srl_out;
328
  wire                           calib_rden_srl_out_r;
329
  reg                            calib_rden_srl_out_r1;
330
  reg                            calib_rden_valid;
331
  reg                            calib_rden_valid_stgd;
332
  reg [DQ_BITS-1:0]              count_dq;
333
  reg [DQS_BITS_FIX-1:0]         count_dqs;
334
  reg [DQS_BITS_FIX-1:0]         count_gate;
335
  reg [DQS_BITS_FIX-1:0]         count_rden;
336
  reg                            ctrl_rden_r;
337
  wire                           dlyce_or;
338
  reg [(5*DQS_WIDTH)-1:0]        gate_dly;
339
  wire [(5*DQS_WIDTH)-1:0]       gate_dly_r;
340
  wire                           gate_srl_in;
341
  wire [DQS_WIDTH-1:0]           gate_srl_out;
342
  wire [DQS_WIDTH-1:0]           gate_srl_out_r;
343
  reg [2:0]                      idel_set_cnt;
344
  wire                           idel_set_wait;
345
  reg [DQ_BITS-1:0]              next_count_dq;
346
  reg [DQS_BITS_FIX-1:0]         next_count_dqs;
347
  reg [DQS_BITS_FIX-1:0]         next_count_gate;
348
  reg                            phy_init_rden_r;
349
  reg                            phy_init_rden_r1;
350
  reg [DQS_WIDTH-1:0]            rd_data_fall_1x_bit1_r1;
351
  reg [DQ_WIDTH-1:0]             rd_data_fall_1x_r;
352
  reg [DQS_WIDTH-1:0]            rd_data_fall_1x_r1;
353
  reg [DQS_WIDTH-1:0]            rd_data_fall_2x_bit1_r;
354
  reg [DQS_WIDTH-1:0]            rd_data_fall_2x_r;
355
  wire [DQS_WIDTH-1:0]           rd_data_fall_chk_q1;
356
  wire [DQS_WIDTH-1:0]           rd_data_fall_chk_q1_bit1;
357
  wire [DQS_WIDTH-1:0]           rd_data_fall_chk_q2;
358
  wire [DQS_WIDTH-1:0]           rd_data_fall_chk_q2_bit1;
359
  reg [DQS_WIDTH-1:0]            rd_data_rise_1x_bit1_r1;
360
  reg [DQ_WIDTH-1:0]             rd_data_rise_1x_r;
361
  reg [DQS_WIDTH-1:0]            rd_data_rise_1x_r1;
362
  reg [DQS_WIDTH-1:0]            rd_data_rise_2x_bit1_r;
363
  reg [DQS_WIDTH-1:0]            rd_data_rise_2x_r;
364
  wire [DQS_WIDTH-1:0]           rd_data_rise_chk_q1;
365
  wire [DQS_WIDTH-1:0]           rd_data_rise_chk_q1_bit1;
366
  wire [DQS_WIDTH-1:0]           rd_data_rise_chk_q2;
367
  wire [DQS_WIDTH-1:0]           rd_data_rise_chk_q2_bit1;
368
  reg                            rdd_fall_q1;
369
  reg                            rdd_fall_q1_bit1;
370
  reg                            rdd_fall_q1_bit1_r;
371
  reg                            rdd_fall_q1_bit1_r1;
372
  reg                            rdd_fall_q1_r;
373
  reg                            rdd_fall_q1_r1;
374
  reg                            rdd_fall_q2;
375
  reg                            rdd_fall_q2_bit1;
376
  reg                            rdd_fall_q2_bit1_r;
377
  reg                            rdd_fall_q2_r;
378
  reg                            rdd_rise_q1;
379
  reg                            rdd_rise_q1_bit1;
380
  reg                            rdd_rise_q1_bit1_r;
381
  reg                            rdd_rise_q1_bit1_r1;
382
  reg                            rdd_rise_q1_r;
383
  reg                            rdd_rise_q1_r1;
384
  reg                            rdd_rise_q2;
385
  reg                            rdd_rise_q2_bit1;
386
  reg                            rdd_rise_q2_bit1_r;
387
  reg                            rdd_rise_q2_r;
388
  reg [DQS_BITS_FIX-1:0]         rdd_mux_sel;
389
  reg                            rden_dec;
390
  reg [(5*DQS_WIDTH)-1:0]        rden_dly;
391
  wire [(5*DQS_WIDTH)-1:0]       rden_dly_r;
392
  reg [4:0]                      rden_dly_0;
393
  reg                            rden_inc;
394
  reg [DQS_WIDTH-1:0]            rden_mux;
395
  wire [DQS_WIDTH-1:0]           rden_srl_out;
396
 
397
  // Debug
398
  integer                        x;
399
  reg [5:0]                      dbg_dq_tap_cnt [DQ_WIDTH-1:0];
400
  reg [5:0]                      dbg_dqs_tap_cnt [DQS_WIDTH-1:0];
401
  reg [5:0]                      dbg_gate_tap_cnt [DQS_WIDTH-1:0];
402
 
403
  //***************************************************************************
404
  // Debug output ("dbg_phy_calib_*")
405
  // NOTES:
406
  //  1. All debug outputs coming out of PHY_CALIB are clocked off CLKDIV0,
407
  //     although they are also static after calibration is complete. This
408
  //     means the user can either connect them to a Chipscope ILA, or to
409
  //     either a sync/async VIO input block. Using an async VIO has the
410
  //     advantage of not requiring these paths to meet cycle-to-cycle timing.
411
  //  2. The widths of most of these debug buses are dependent on the # of
412
  //     DQS/DQ bits (e.g. dq_tap_cnt width = 6 * (# of DQ bits)
413
  // SIGNAL DESCRIPTION:
414
  //  1. calib_done:   4 bits - each one asserted as each phase of calibration
415
  //                   is completed.
416
  //  2. calib_err:    4 bits - each one asserted when a calibration error
417
  //                   encountered for that stage. Some of these bits may not
418
  //                   be used (not all cal stages report an error).
419
  //  3. dq_tap_cnt:   final IDELAY tap counts for all DQ IDELAYs
420
  //  4. dqs_tap_cnt:  final IDELAY tap counts for all DQS IDELAYs
421
  //  5. gate_tap_cnt: final IDELAY tap counts for all DQS gate
422
  //                   synchronization IDELAYs
423
  //  6. rd_data_sel:  final read capture MUX (either "positive" or "negative"
424
  //                   edge capture) settings for all DQS groups
425
  //  7. rden_dly:     related to # of cycles after issuing a read until when
426
  //                   read data is valid - for all DQS groups
427
  //  8. gate_dly:     related to # of cycles after issuing a read until when
428
  //                   clock enable for all DQ's is deasserted to prevent
429
  //                   effect of DQS postamble glitch - for all DQS groups
430
  //***************************************************************************
431
 
432
  //*****************************************************************
433
  // Record IDELAY tap values by "snooping" IDELAY control signals
434
  //*****************************************************************
435
 
436
  // record DQ IDELAY tap values
437
  genvar dbg_dq_tc_i;
438
  generate
439
    for (dbg_dq_tc_i = 0; dbg_dq_tc_i < DQ_WIDTH;
440
         dbg_dq_tc_i = dbg_dq_tc_i + 1) begin: gen_dbg_dq_tap_cnt
441
      assign dbg_calib_dq_tap_cnt[(6*dbg_dq_tc_i)+5:(6*dbg_dq_tc_i)]
442
               = dbg_dq_tap_cnt[dbg_dq_tc_i];
443
      always @(posedge clkdiv)
444
        if (rstdiv | dlyrst_dq)
445
          dbg_dq_tap_cnt[dbg_dq_tc_i] <= 6'b000000;
446
        else
447
          if (dlyce_dq[dbg_dq_tc_i])
448
            if (dlyinc_dq[dbg_dq_tc_i])
449
              dbg_dq_tap_cnt[dbg_dq_tc_i]
450
                <= dbg_dq_tap_cnt[dbg_dq_tc_i] + 1;
451
            else
452
              dbg_dq_tap_cnt[dbg_dq_tc_i]
453
                <= dbg_dq_tap_cnt[dbg_dq_tc_i] - 1;
454
    end
455
  endgenerate
456
 
457
  // record DQS IDELAY tap values
458
  genvar dbg_dqs_tc_i;
459
  generate
460
    for (dbg_dqs_tc_i = 0; dbg_dqs_tc_i < DQS_WIDTH;
461
         dbg_dqs_tc_i = dbg_dqs_tc_i + 1) begin: gen_dbg_dqs_tap_cnt
462
      assign dbg_calib_dqs_tap_cnt[(6*dbg_dqs_tc_i)+5:(6*dbg_dqs_tc_i)]
463
               = dbg_dqs_tap_cnt[dbg_dqs_tc_i];
464
      always @(posedge clkdiv)
465
        if (rstdiv | dlyrst_dqs)
466
          dbg_dqs_tap_cnt[dbg_dqs_tc_i] <= 6'b000000;
467
        else
468
          if (dlyce_dqs[dbg_dqs_tc_i])
469
            if (dlyinc_dqs[dbg_dqs_tc_i])
470
              dbg_dqs_tap_cnt[dbg_dqs_tc_i]
471
                <= dbg_dqs_tap_cnt[dbg_dqs_tc_i] + 1;
472
            else
473
              dbg_dqs_tap_cnt[dbg_dqs_tc_i]
474
                <= dbg_dqs_tap_cnt[dbg_dqs_tc_i] - 1;
475
    end
476
  endgenerate
477
 
478
  // record DQS gate IDELAY tap values
479
  genvar dbg_gate_tc_i;
480
  generate
481
    for (dbg_gate_tc_i = 0; dbg_gate_tc_i < DQS_WIDTH;
482
         dbg_gate_tc_i = dbg_gate_tc_i + 1) begin: gen_dbg_gate_tap_cnt
483
      assign dbg_calib_gate_tap_cnt[(6*dbg_gate_tc_i)+5:(6*dbg_gate_tc_i)]
484
               = dbg_gate_tap_cnt[dbg_gate_tc_i];
485
      always @(posedge clkdiv)
486
        if (rstdiv | dlyrst_gate[dbg_gate_tc_i])
487
          dbg_gate_tap_cnt[dbg_gate_tc_i] <= 6'b000000;
488
        else
489
          if (dlyce_gate[dbg_gate_tc_i])
490
            if (dlyinc_gate[dbg_gate_tc_i])
491
              dbg_gate_tap_cnt[dbg_gate_tc_i]
492
                <= dbg_gate_tap_cnt[dbg_gate_tc_i] + 1;
493
            else
494
              dbg_gate_tap_cnt[dbg_gate_tc_i]
495
                <= dbg_gate_tap_cnt[dbg_gate_tc_i] - 1;
496
    end
497
  endgenerate
498
 
499
  assign dbg_calib_done        = calib_done;
500
  assign dbg_calib_err         = calib_err;
501
  assign dbg_calib_rd_data_sel = cal2_rd_data_sel;
502
  assign dbg_calib_rden_dly    = rden_dly;
503
  assign dbg_calib_gate_dly    = gate_dly;
504
 
505
  //***************************************************************************
506
  // Read data pipelining, and read data "ISERDES" data width expansion
507
  //***************************************************************************
508
 
509
  // For all data bits, register incoming capture data to slow clock to improve
510
  // timing. Adding single pipeline stage does not affect functionality (as
511
  // long as we make sure to wait extra clock cycle after changing DQ IDELAY)
512
  // Also note in this case that we're "missing" every other clock cycle's
513
  // worth of data capture since we're sync'ing to the slow clock. This is
514
  // fine for stage 1 and stage 2 cal, but not for stage 3 and 4 (see below
515
  // for different circuit to handle those stages)
516
  always @(posedge clkdiv) begin
517
    rd_data_rise_1x_r <= rd_data_rise;
518
    rd_data_fall_1x_r <= rd_data_fall;
519
  end
520
 
521
  // For every DQ_PER_DQS bit, generate what is essentially a ISERDES-type
522
  // data width expander. Will need this for stage 3 and 4 cal, where we need
523
  // to compare data over consecutive clock cycles. We can also use this for
524
  // stage 2 as well (stage 2 doesn't require every bit to be looked at, only
525
  // one bit per DQS group)
526
  // MIG 3.3: Expand to use lower two bits of each DQS group - use for stage
527
  //  3 calibration for added robustness, since we will be checking for the
528
  //  training pattern from the memory even when the data bus is 3-stated.
529
  //  Theoretically it is possible for whatever garbage data is on the bus
530
  //  to be interpreted as the training sequence, although this can be made
531
  //  very unlikely by the choice of training sequence (bit sequence, length)
532
  //  and the number of bits compared for each DQS group. 
533
  genvar rdd_i;
534
  generate
535
    for (rdd_i = 0; rdd_i < DQS_WIDTH; rdd_i = rdd_i + 1) begin: gen_rdd
536
      // first stage: keep data in fast clk domain. Store data over two
537
      // consecutive clock cycles for rise/fall data for proper transfer
538
      // to slow clock domain
539
      always @(posedge clk) begin
540
        rd_data_rise_2x_r[rdd_i]      <= rd_data_rise[(rdd_i*DQ_PER_DQS)];
541
        rd_data_fall_2x_r[rdd_i]      <= rd_data_fall[(rdd_i*DQ_PER_DQS)];
542
        rd_data_rise_2x_bit1_r[rdd_i] <= rd_data_rise[(rdd_i*DQ_PER_DQS)+1];
543
        rd_data_fall_2x_bit1_r[rdd_i] <= rd_data_fall[(rdd_i*DQ_PER_DQS)+1];
544
      end
545
      // second stage, register first stage to slow clock domain, 2nd stage
546
      // consists of both these flops, and the rd_data_rise_1x_r flops
547
      always @(posedge clkdiv) begin
548
        rd_data_rise_1x_r1[rdd_i]      <= rd_data_rise_2x_r[rdd_i];
549
        rd_data_fall_1x_r1[rdd_i]      <= rd_data_fall_2x_r[rdd_i];
550
        rd_data_rise_1x_bit1_r1[rdd_i] <= rd_data_rise_2x_bit1_r[rdd_i];
551
        rd_data_fall_1x_bit1_r1[rdd_i] <= rd_data_fall_2x_bit1_r[rdd_i];
552
      end
553
      // now we have four outputs - representing rise/fall outputs over last
554
      // 2 fast clock cycles. However, the ordering these represent can either
555
      // be: (1) Q2 = data @ time = n, Q1 = data @ time = n+1, or (2)
556
      // Q2 = data @ time = n - 1, Q1 = data @ time = n (and data at [Q1,Q2]
557
      // is "staggered") - leave it up to the stage of calibration using this
558
      // to figure out which is which, if they care at all (e.g. stage 2 cal
559
      // doesn't care about the ordering)
560
      assign rd_data_rise_chk_q1[rdd_i]
561
               = rd_data_rise_1x_r[(rdd_i*DQ_PER_DQS)];
562
      assign rd_data_rise_chk_q2[rdd_i]
563
               = rd_data_rise_1x_r1[rdd_i];
564
      assign rd_data_fall_chk_q1[rdd_i]
565
               = rd_data_fall_1x_r[(rdd_i*DQ_PER_DQS)];
566
      assign rd_data_fall_chk_q2[rdd_i]
567
               = rd_data_fall_1x_r1[rdd_i];
568
    // MIG 3.3: Added comparison for second bit in DQS group for stage 3 cal
569
      assign rd_data_rise_chk_q1_bit1[rdd_i]
570
               = rd_data_rise_1x_r[(rdd_i*DQ_PER_DQS)+1];
571
      assign rd_data_rise_chk_q2_bit1[rdd_i]
572
               = rd_data_rise_1x_bit1_r1[rdd_i];
573
      assign rd_data_fall_chk_q1_bit1[rdd_i]
574
               = rd_data_fall_1x_r[(rdd_i*DQ_PER_DQS)+1];
575
      assign rd_data_fall_chk_q2_bit1[rdd_i]
576
               = rd_data_fall_1x_bit1_r1[rdd_i];
577
    end
578
  endgenerate
579
 
580
  //*****************************************************************
581
  // Outputs of these simplified ISERDES circuits then feed MUXes based on
582
  // which DQ the current calibration algorithm needs to look at
583
  //*****************************************************************
584
 
585
  // generate MUX control; assume that adding an extra pipeline stage isn't
586
  // an issue - whatever stage cal logic is using output of MUX will wait
587
  // enough time after changing it
588
  always @(posedge clkdiv) begin
589
    (* full_case, parallel_case *) case (calib_done[2:0])
590
      3'b001: rdd_mux_sel <= next_count_dqs;
591
      3'b011: rdd_mux_sel <= count_rden;
592
      3'b111: rdd_mux_sel <= next_count_gate;
593
      default: rdd_mux_sel <= {DQS_BITS_FIX{1'bx}};
594
    endcase
595
  end
596
 
597
  always @(posedge clkdiv) begin
598
    rdd_rise_q1 <= rd_data_rise_chk_q1[rdd_mux_sel];
599
    rdd_rise_q2 <= rd_data_rise_chk_q2[rdd_mux_sel];
600
    rdd_fall_q1 <= rd_data_fall_chk_q1[rdd_mux_sel];
601
    rdd_fall_q2 <= rd_data_fall_chk_q2[rdd_mux_sel];
602
    rdd_rise_q1_bit1 <= rd_data_rise_chk_q1_bit1[rdd_mux_sel];
603
    rdd_rise_q2_bit1 <= rd_data_rise_chk_q2_bit1[rdd_mux_sel];
604
    rdd_fall_q1_bit1 <= rd_data_fall_chk_q1_bit1[rdd_mux_sel];
605
    rdd_fall_q2_bit1 <= rd_data_fall_chk_q2_bit1[rdd_mux_sel];
606
  end
607
 
608
  //***************************************************************************
609
  // Demultiplexor to control (reset, increment, decrement) IDELAY tap values
610
  //   For DQ:
611
  //     STG1: for per-bit-deskew, only inc/dec the current DQ. For non-per
612
  //       deskew, increment all bits in the current DQS set
613
  //     STG2: inc/dec all DQ's in the current DQS set.
614
  // NOTE: Nice to add some error checking logic here (or elsewhere in the
615
  //       code) to check if logic attempts to overflow tap value
616
  //***************************************************************************
617
 
618
  // don't use DLYRST to reset value of IDELAY after reset. Need to change this
619
  // if we want to allow user to recalibrate after initial reset
620
  always @(posedge clkdiv)
621
    if (rstdiv) begin
622
      dlyrst_dq <= 1'b1;
623
      dlyrst_dqs <= 1'b1;
624
    end else begin
625
      dlyrst_dq <= 1'b0;
626
      dlyrst_dqs <= 1'b0;
627
    end
628
 
629
  always @(posedge clkdiv) begin
630
    if (rstdiv) begin
631
      dlyce_dq   <= 'b0;
632
      dlyinc_dq  <= 'b0;
633
      dlyce_dqs  <= 'b0;
634
      dlyinc_dqs <= 'b0;
635
    end else begin
636
      dlyce_dq   <= 'b0;
637
      dlyinc_dq  <= 'b0;
638
      dlyce_dqs  <= 'b0;
639
      dlyinc_dqs <= 'b0;
640
 
641
      // stage 1 cal: change only specified DQ
642
      if (cal1_dlyce_dq) begin
643
        if (SIM_ONLY == 0) begin
644
          dlyce_dq[count_dq] <= 1'b1;
645
          dlyinc_dq[count_dq] <= cal1_dlyinc_dq;
646
        end else begin
647
          // if simulation, then calibrate only first DQ, apply results
648
          // to all DQs (i.e. assume delay on all DQs is the same)
649
          for (i = 0; i < DQ_WIDTH; i = i + 1) begin: loop_sim_dq_dly
650
            dlyce_dq[i] <= 1'b1;
651
            dlyinc_dq[i] <= cal1_dlyinc_dq;
652
          end
653
        end
654
      end else if (cal2_dlyce_dqs) begin
655
        // stage 2 cal: change DQS and all corresponding DQ's
656
        if (SIM_ONLY == 0) begin
657
          dlyce_dqs[count_dqs] <= 1'b1;
658
          dlyinc_dqs[count_dqs] <= cal2_dlyinc_dqs;
659
          for (i = 0; i < DQ_PER_DQS; i = i + 1) begin: loop_dqs_dly
660
            dlyce_dq[(DQ_PER_DQS*count_dqs)+i] <= 1'b1;
661
            dlyinc_dq[(DQ_PER_DQS*count_dqs)+i] <= cal2_dlyinc_dqs;
662
          end
663
        end else begin
664
          for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_dqs_dly
665
            // if simulation, then calibrate only first DQS
666
            dlyce_dqs[i] <= 1'b1;
667
            dlyinc_dqs[i] <= cal2_dlyinc_dqs;
668
            for (j = 0; j < DQ_PER_DQS; j = j + 1) begin: loop_sim_dq_dqs_dly
669
              dlyce_dq[(DQ_PER_DQS*i)+j] <= 1'b1;
670
              dlyinc_dq[(DQ_PER_DQS*i)+j] <= cal2_dlyinc_dqs;
671
            end
672
          end
673
        end
674
      end else if (DEBUG_EN != 0) begin
675
        // DEBUG: allow user to vary IDELAY tap settings
676
        // For DQ IDELAY taps
677
        if (dbg_idel_up_all || dbg_idel_down_all ||
678
            dbg_sel_all_idel_dq) begin
679
          for (x = 0; x < DQ_WIDTH; x = x + 1) begin: loop_dly_inc_dq
680
            dlyce_dq[x] <= dbg_idel_up_all | dbg_idel_down_all |
681
                           dbg_idel_up_dq  | dbg_idel_down_dq;
682
            dlyinc_dq[x] <= dbg_idel_up_all | dbg_idel_up_dq;
683
          end
684
        end else begin
685
          dlyce_dq <= 'b0;
686
          dlyce_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq |
687
                                       dbg_idel_down_dq;
688
          dlyinc_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq;
689
        end
690
        // For DQS IDELAY taps
691
        if (dbg_idel_up_all || dbg_idel_down_all ||
692
            dbg_sel_all_idel_dqs) begin
693
          for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_dqs
694
            dlyce_dqs[x] <= dbg_idel_up_all | dbg_idel_down_all |
695
                            dbg_idel_up_dqs | dbg_idel_down_dqs;
696
            dlyinc_dqs[x] <= dbg_idel_up_all | dbg_idel_up_dqs;
697
          end
698
        end else begin
699
          dlyce_dqs <= 'b0;
700
          dlyce_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs |
701
                                         dbg_idel_down_dqs;
702
          dlyinc_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs;
703
        end
704
      end
705
    end
706
  end
707
 
708
  // GATE synchronization is handled directly by Stage 4 calibration FSM
709
  always @(posedge clkdiv)
710
    if (rstdiv) begin
711
      dlyrst_gate <= {DQS_WIDTH{1'b1}};
712
      dlyce_gate  <= {DQS_WIDTH{1'b0}};
713
      dlyinc_gate <= {DQS_WIDTH{1'b0}};
714
    end else begin
715
      dlyrst_gate <= {DQS_WIDTH{1'b0}};
716
      dlyce_gate  <= {DQS_WIDTH{1'b0}};
717
      dlyinc_gate <= {DQS_WIDTH{1'b0}};
718
 
719
      if (cal4_dlyrst_gate) begin
720
        if (SIM_ONLY == 0)
721
          dlyrst_gate[count_gate] <= 1'b1;
722
        else
723
          for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly_rst
724
            dlyrst_gate[i] <= 1'b1;
725
          end
726
      end
727
 
728
      if (cal4_dlyce_gate) begin
729
        if (SIM_ONLY == 0) begin
730
          dlyce_gate[count_gate]  <= 1'b1;
731
          dlyinc_gate[count_gate] <= cal4_dlyinc_gate;
732
        end else begin
733
          // if simulation, then calibrate only first gate
734
          for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly
735
            dlyce_gate[i]  <= 1'b1;
736
            dlyinc_gate[i] <= cal4_dlyinc_gate;
737
          end
738
        end
739
      end else if (DEBUG_EN != 0) begin
740
        // DEBUG: allow user to vary IDELAY tap settings
741
        if (dbg_idel_up_all || dbg_idel_down_all ||
742
            dbg_sel_all_idel_gate) begin
743
          for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_gate
744
            dlyce_gate[x] <= dbg_idel_up_all | dbg_idel_down_all |
745
                             dbg_idel_up_gate | dbg_idel_down_gate;
746
            dlyinc_gate[x] <= dbg_idel_up_all | dbg_idel_up_gate;
747
          end
748
        end else begin
749
          dlyce_gate <= {DQS_WIDTH{1'b0}};
750
          dlyce_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate |
751
                                           dbg_idel_down_gate;
752
          dlyinc_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate;
753
        end
754
      end
755
    end
756
 
757
  //***************************************************************************
758
  // signal to tell calibration state machines to wait and give IDELAY time to
759
  // settle after it's value is changed (both time for IDELAY chain to settle,
760
  // and for settled output to propagate through ISERDES). For general use: use
761
  // for any calibration state machines that modify any IDELAY.
762
  // Should give at least enough time for IDELAY output to settle (technically
763
  // for V5, this should be "glitchless" when IDELAY taps are changed, so don't
764
  // need any time here), and also time for new data to propagate through both
765
  // ISERDES and the "RDD" MUX + associated pipelining
766
  // For now, give very "generous" delay - doesn't really matter since only
767
  // needed during calibration
768
  //***************************************************************************
769
 
770
  // determine if calibration polarity has changed
771
  always @(posedge clkdiv)
772
    cal2_rd_data_sel_r   <= cal2_rd_data_sel;
773
 
774
  assign cal2_rd_data_sel_edge = |(cal2_rd_data_sel ^ cal2_rd_data_sel_r);
775
 
776
  // combine requests to modify any of the IDELAYs into one. Also when second
777
  // stage capture "edge" polarity is changed (IDELAY isn't changed in this
778
  // case, but use the same counter to stall cal logic)
779
  assign dlyce_or = cal1_dlyce_dq |
780
                    cal2_dlyce_dqs |
781
                    cal2_rd_data_sel_edge |
782
                    cal4_dlyce_gate |
783
                    cal4_dlyrst_gate;
784
 
785
  // SYN_NOTE: Can later recode to avoid combinational path
786
  assign idel_set_wait = dlyce_or || (idel_set_cnt != IDEL_SET_VAL);
787
 
788
  always @(posedge clkdiv)
789
    if (rstdiv)
790
      idel_set_cnt <= 4'b0000;
791
    else if (dlyce_or)
792
      idel_set_cnt <= 4'b0000;
793
    else if (idel_set_cnt != IDEL_SET_VAL)
794
      idel_set_cnt <= idel_set_cnt + 1;
795
 
796
  // generate request to PHY_INIT logic to issue auto-refresh
797
  // used by certain states to force prech/auto-refresh part way through
798
  // calibration to avoid a tRAS violation (which will happen if that
799
  // stage of calibration lasts long enough). This signal must meet the
800
  // following requirements: (1) only transition from 0->1 when the refresh
801
  // request is needed, (2) stay at 1 and only transition 1->0 when
802
  // CALIB_REF_DONE is asserted
803
  always @(posedge clkdiv)
804
    if (rstdiv)
805
      calib_ref_req <= 1'b0;
806
    else
807
      calib_ref_req <= cal1_ref_req | cal2_ref_req  | cal4_ref_req;
808
 
809
  // stage 1 calibration requests auto-refresh every 4 bits
810
  generate
811
    if (DQ_BITS < 2) begin: gen_cal1_refresh_dq_lte4
812
      assign cal1_refresh = 1'b0;
813
    end else begin: gen_cal1_refresh_dq_gt4
814
      assign cal1_refresh = (next_count_dq[1:0] == 2'b00);
815
    end
816
  endgenerate
817
 
818
  //***************************************************************************
819
  // First stage calibration: DQ-DQS
820
  // Definitions:
821
  //  edge: detected when varying IDELAY, and current capture data != prev
822
  //    capture data
823
  //  valid bit window: detected when current capture data == prev capture
824
  //    data for more than half the bit time
825
  //  starting conditions for DQS-DQ phase:
826
  //    case 1: when DQS starts somewhere in rising edge bit window, or
827
  //      on the right edge of the rising bit window.
828
  //    case 2: when DQS starts somewhere in falling edge bit window, or
829
  //      on the right edge of the falling bit window.
830
  // Algorithm Description:
831
  //  1. Increment DQ IDELAY until we find an edge.
832
  //  2. While we're finding the first edge, note whether a valid bit window
833
  //     has been detected before we found an edge. If so, then figure out if
834
  //     this is the rising or falling bit window. If rising, then our starting
835
  //     DQS-DQ phase is case 1. If falling, then it's case 2. If don't detect
836
  //     a valid bit window, then we must have started on the edge of a window.
837
  //     Need to wait until later on to decide which case we are.
838
  //       - Store FIRST_EDGE IDELAY value
839
  //  3. Now look for second edge.
840
  //  4. While we're finding the second edge, note whether valid bit window
841
  //     is detected. If so, then use to, along with results from (2) to figure
842
  //     out what the starting case is. If in rising bit window, then we're in
843
  //     case 2. If falling, then case 1.
844
  //       - Store SECOND_EDGE IDELAY value
845
  //     NOTES:
846
  //       a. Finding two edges allows us to calculate the bit time (although
847
  //          not the "same" bit time polarity - need to investigate this
848
  //          more).
849
  //       b. If we run out of taps looking for the second edge, then the bit
850
  //       time must be too long (>= 2.5ns, and DQS-DQ starting phase must be
851
  //       case 1).
852
  //  5. Calculate absolute amount to delay DQ as:
853
  //       If second edge found, and case 1:
854
  //         - DQ_IDELAY = FIRST_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
855
  //       If second edge found, and case 2:
856
  //         - DQ_IDELAY = SECOND_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
857
  //       If second edge not found, then need to make an approximation on
858
  //       how much to shift by (should be okay, because we have more timing
859
  //       margin):
860
  //         - DQ_IDELAY = FIRST_EDGE - 0.5 * (bit_time)
861
  //     NOTE: Does this account for either case 1 or case 2?????
862
  //     NOTE: It's also possible even when we find the second edge, that
863
  //           to instead just use half the bit time to subtract from either
864
  //           FIRST or SECOND_EDGE. Finding the actual bit time (which is
865
  //           what (SECOND_EDGE - FIRST_EDGE) is, is slightly more accurate,
866
  //           since it takes into account duty cycle distortion.
867
  //  6. Repeat for each DQ in current DQS set.
868
  //***************************************************************************
869
 
870
  //*****************************************************************
871
  // for first stage calibration - used for checking if DQS is aligned to the
872
  // particular DQ, such that we're in the data valid window. Basically, this
873
  // is one giant MUX.
874
  //  = [falling data, rising data]
875
  //  = [0, 1] = rising DQS aligned in proper (rising edge) bit window
876
  //  = [1, 0] = rising DQS aligned in wrong (falling edge) bit window
877
  //  = [0, 0], or [1,1] = in uncertain region between windows
878
  //*****************************************************************
879
 
880
  // SYN_NOTE: May have to split this up into multiple levels - MUX can get
881
  //  very wide - as wide as the data bus width
882
  always @(posedge clkdiv)
883
    cal1_data_chk_r <= {rd_data_fall_1x_r[next_count_dq],
884
                       rd_data_rise_1x_r[next_count_dq]};
885
 
886
  //*****************************************************************
887
  // determine when an edge has occurred - when either the current value
888
  // is different from the previous latched value or when the DATA_CHK
889
  // outputs are the same (rare, but indicates that we're at an edge)
890
  // This is only valid when the IDELAY output and propagation of the
891
  // data through the capture flops has had a chance to settle out.
892
  //*****************************************************************
893
 
894
  // write CAL1_DETECT_EDGE and CAL1_DETECT_STABLE in such a way that
895
  // if X's are captured on the bus during functional simulation, that
896
  // the logic will register this as an edge detected. Do this to allow
897
  // use of this HDL with Denali memory models (Denali models drive DQ
898
  // to X's on both edges of the data valid window to simulate jitter)
899
  // This is only done for functional simulation purposes. **Should not**
900
  // make the final synthesized logic more complicated, but it does make
901
  // the HDL harder to understand b/c we have to "phrase" the logic
902
  // slightly differently than when not worrying about X's
903
  always @(*) begin
904
    // no edge found if: (1) we have recorded prev edge, and rise
905
    // data == fall data, (2) we haven't yet recorded prev edge, but
906
    // rise/fall data is equal to either [0,1] or [1,0] (i.e. rise/fall
907
    // data isn't either X's, or [0,0] or [1,1], which indicates we're
908
    // in the middle of an edge, since normally rise != fall data for stg1)
909
    if ((cal1_data_chk_last_valid &&
910
         (cal1_data_chk_r == cal1_data_chk_last)) ||
911
        (!cal1_data_chk_last_valid &&
912
         ((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10))))
913
      cal1_detect_edge = 1'b0;
914
    else
915
      cal1_detect_edge = 1'b1;
916
  end
917
 
918
  always @(*) begin
919
    // assert if we've found a region where data valid window is stable
920
    // over consecutive IDELAY taps, and either rise/fall = [1,0], or [0,1]
921
    if ((cal1_data_chk_last_valid &&
922
         (cal1_data_chk_r == cal1_data_chk_last)) &&
923
        ((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10)))
924
      cal1_detect_stable = 1'b1;
925
    else
926
      cal1_detect_stable = 1'b0;
927
  end
928
 
929
  //*****************************************************************
930
  // Find valid window: keep track of how long we've been in the same data
931
  // window. If it's been long enough, then declare that we've found a valid
932
  // window. Also returns whether we found a rising or falling window (only
933
  // valid when found_window is asserted)
934
  //*****************************************************************
935
 
936
  always @(posedge clkdiv) begin
937
    if (cal1_state == CAL1_INIT) begin
938
      cal1_window_cnt   <= 4'b0000;
939
      cal1_found_window <= 1'b0;
940
      cal1_found_rising <= 1'bx;
941
    end else if (!cal1_data_chk_last_valid) begin
942
      // if we haven't stored a previous value of CAL1_DATA_CHK (or it got
943
      // invalidated because we detected an edge, and are now looking for the
944
      // second edge), then make sure FOUND_WINDOW deasserted on following
945
      // clock edge (to avoid finding a false window immediately after finding
946
      // an edge). Note that because of jitter, it's possible to not find an
947
      // edge at the end of the IDELAY increment settling time, but to find an
948
      // edge on the next clock cycle (e.g. during CAL1_FIND_FIRST_EDGE)
949
      cal1_window_cnt   <= 4'b0000;
950
      cal1_found_window <= 1'b0;
951
      cal1_found_rising <= 1'bx;
952
    end else if (((cal1_state == CAL1_FIRST_EDGE_IDEL_WAIT) ||
953
                  (cal1_state == CAL1_SECOND_EDGE_IDEL_WAIT)) &&
954
                 !idel_set_wait) begin
955
      // while finding the first and second edges, see if we can detect a
956
      // stable bit window (occurs over MIN_WIN_SIZE number of taps). If
957
      // so, then we're away from an edge, and can conclusively determine the
958
      // starting DQS-DQ phase.
959
      if (cal1_detect_stable) begin
960
        cal1_window_cnt <= cal1_window_cnt + 1;
961
        if (cal1_window_cnt == MIN_WIN_SIZE-1) begin
962
          cal1_found_window <= 1'b1;
963
          if (cal1_data_chk_r == 2'b01)
964
            cal1_found_rising <= 1'b1;
965
          else
966
            cal1_found_rising <= 1'b0;
967
        end
968
      end else begin
969
        // otherwise, we're not in a data valid window, reset the window
970
        // counter, and indicate we're not currently in window. This should
971
        // happen by design at least once after finding the first edge.
972
        cal1_window_cnt <= 4'b0000;
973
        cal1_found_window <= 1'b0;
974
        cal1_found_rising <= 1'bx;
975
      end
976
    end
977
  end
978
 
979
  //*****************************************************************
980
  // keep track of edge tap counts found, and whether we've
981
  // incremented to the maximum number of taps allowed
982
  //*****************************************************************
983
 
984
  always @(posedge clkdiv)
985
    if (cal1_state == CAL1_INIT) begin
986
      cal1_idel_tap_limit_hit   <= 1'b0;
987
      cal1_idel_tap_cnt   <= 6'b000000;
988
    end else if (cal1_dlyce_dq) begin
989
      if (cal1_dlyinc_dq) begin
990
        cal1_idel_tap_cnt <= cal1_idel_tap_cnt + 1;
991
        cal1_idel_tap_limit_hit <= (cal1_idel_tap_cnt == 6'b111110);
992
      end else begin
993
        cal1_idel_tap_cnt <= cal1_idel_tap_cnt - 1;
994
        cal1_idel_tap_limit_hit <= 1'b0;
995
      end
996
    end
997
 
998
  //*****************************************************************
999
  // Pipeline for better timing - amount to decrement by if second
1000
  // edge not found
1001
  //*****************************************************************
1002
  // if only one edge found (possible for low frequencies), then:
1003
  //  1. Assume starting DQS-DQ phase has DQS in DQ window (aka "case 1")
1004
  //  2. We have to decrement by (63 - first_edge_tap_cnt) + (BIT_TIME_TAPS/2)
1005
  //     (i.e. decrement by 63-first_edge_tap_cnt to get to right edge of
1006
  //     DQ window. Then decrement again by (BIT_TIME_TAPS/2) to get to center
1007
  //     of DQ window.
1008
  //  3. Clamp the above value at 63 to ensure we don't underflow IDELAY
1009
  //     (note: clamping happens in the CAL1 state machine)
1010
  always @(posedge clkdiv)
1011
    cal1_low_freq_idel_dec
1012
      <= (7'b0111111 - {1'b0, cal1_first_edge_tap_cnt}) +
1013
         (BIT_TIME_TAPS/2);
1014
 
1015
  //*****************************************************************
1016
  // Keep track of max taps used during stage 1, use this to limit
1017
  // the number of taps that can be used in stage 2
1018
  //*****************************************************************
1019
 
1020
  always @(posedge clkdiv)
1021
    if (rstdiv) begin
1022
      cal1_idel_max_tap    <= 6'b000000;
1023
      cal1_idel_max_tap_we <= 1'b0;
1024
    end else begin
1025
      // pipeline latch enable for CAL1_IDEL_MAX_TAP - we have plenty
1026
      // of time, tap count gets updated, then dead cycles waiting for
1027
      // IDELAY output to settle
1028
      cal1_idel_max_tap_we <= (cal1_idel_max_tap < cal1_idel_tap_cnt);
1029
      // record maximum # of taps used for stg 1 cal
1030
      if ((cal1_state == CAL1_DONE) && cal1_idel_max_tap_we)
1031
        cal1_idel_max_tap <= cal1_idel_tap_cnt;
1032
    end
1033
 
1034
  //*****************************************************************
1035
 
1036
  always @(posedge clkdiv)
1037
    if (rstdiv) begin
1038
      calib_done[0]            <= 1'b0;
1039
      calib_done_tmp[0]        <= 1'bx;
1040
      calib_err[0]             <= 1'b0;
1041
      count_dq                 <= {DQ_BITS{1'b0}};
1042
      next_count_dq            <= {DQ_BITS{1'b0}};
1043
      cal1_bit_time_tap_cnt    <= 6'bxxxxxx;
1044
      cal1_data_chk_last       <= 2'bxx;
1045
      cal1_data_chk_last_valid <= 1'bx;
1046
      cal1_dlyce_dq            <= 1'b0;
1047
      cal1_dlyinc_dq           <= 1'b0;
1048
      cal1_dqs_dq_init_phase   <= 1'bx;
1049
      cal1_first_edge_done     <= 1'bx;
1050
      cal1_found_second_edge   <= 1'bx;
1051
      cal1_first_edge_tap_cnt  <= 6'bxxxxxx;
1052
      cal1_idel_dec_cnt        <= 7'bxxxxxxx;
1053
      cal1_idel_inc_cnt        <= 6'bxxxxxx;
1054
      cal1_ref_req             <= 1'b0;
1055
      cal1_state               <= CAL1_IDLE;
1056
    end else begin
1057
      // default values for all "pulse" outputs
1058
      cal1_ref_req        <= 1'b0;
1059
      cal1_dlyce_dq       <= 1'b0;
1060
      cal1_dlyinc_dq      <= 1'b0;
1061
 
1062
      case (cal1_state)
1063
        CAL1_IDLE: begin
1064
          count_dq      <= {DQ_BITS{1'b0}};
1065
          next_count_dq <= {DQ_BITS{1'b0}};
1066
          if (calib_start[0]) begin
1067
            calib_done[0] <= 1'b0;
1068
            calib_done_tmp[0] <= 1'b0;
1069
            cal1_state    <= CAL1_INIT;
1070
          end
1071
        end
1072
 
1073
        CAL1_INIT: begin
1074
          cal1_data_chk_last_valid <= 1'b0;
1075
          cal1_found_second_edge <= 1'b0;
1076
          cal1_dqs_dq_init_phase <= 1'b0;
1077
          cal1_idel_inc_cnt      <= 6'b000000;
1078
          cal1_state <= CAL1_INC_IDEL;
1079
        end
1080
 
1081
        // increment DQ IDELAY so that either: (1) DQS starts somewhere in
1082
        // first rising DQ window, or (2) DQS starts in first falling DQ
1083
        // window. The amount to shift is frequency dependent (and is either
1084
        // precalculated by MIG or possibly adjusted by the user)
1085
        CAL1_INC_IDEL:
1086
          if ((cal1_idel_inc_cnt == DQ_IDEL_INIT) && !idel_set_wait) begin
1087
            cal1_state <= CAL1_FIND_FIRST_EDGE;
1088
          end else if (cal1_idel_inc_cnt != DQ_IDEL_INIT) begin
1089
            cal1_idel_inc_cnt <= cal1_idel_inc_cnt + 1;
1090
            cal1_dlyce_dq <= 1'b1;
1091
            cal1_dlyinc_dq <= 1'b1;
1092
          end
1093
 
1094
        // look for first edge
1095
        CAL1_FIND_FIRST_EDGE: begin
1096
          // Determine DQS-DQ phase if we can detect enough of a valid window
1097
          if (cal1_found_window)
1098
            cal1_dqs_dq_init_phase <= ~cal1_found_rising;
1099
          // find first edge - if found then record position
1100
          if (cal1_detect_edge) begin
1101
            cal1_state <= CAL1_FOUND_FIRST_EDGE_WAIT;
1102
            cal1_first_edge_done   <= 1'b0;
1103
            cal1_first_edge_tap_cnt <= cal1_idel_tap_cnt;
1104
            cal1_data_chk_last_valid <= 1'b0;
1105
          end else begin
1106
            // otherwise, store the current value of DATA_CHK, increment
1107
            // DQ IDELAY, and compare again
1108
            cal1_state <= CAL1_FIRST_EDGE_IDEL_WAIT;
1109
            cal1_data_chk_last <= cal1_data_chk_r;
1110
            // avoid comparing against DATA_CHK_LAST for previous iteration
1111
            cal1_data_chk_last_valid <= 1'b1;
1112
            cal1_dlyce_dq <= 1'b1;
1113
            cal1_dlyinc_dq <= 1'b1;
1114
          end
1115
        end
1116
 
1117
        // wait for DQ IDELAY to settle
1118
        CAL1_FIRST_EDGE_IDEL_WAIT:
1119
          if (!idel_set_wait)
1120
            cal1_state <= CAL1_FIND_FIRST_EDGE;
1121
 
1122
        // delay state between finding first edge and looking for second
1123
        // edge. Necessary in order to invalidate CAL1_FOUND_WINDOW before
1124
        // starting to look for second edge
1125
        CAL1_FOUND_FIRST_EDGE_WAIT:
1126
          cal1_state <= CAL1_FIND_SECOND_EDGE;
1127
 
1128
        // Try and find second edge
1129
        CAL1_FIND_SECOND_EDGE: begin
1130
          // When looking for 2nd edge, first make sure data stabilized (by
1131
          // detecting valid data window) - needed to avoid false edges
1132
          if (cal1_found_window) begin
1133
            cal1_first_edge_done <= 1'b1;
1134
            cal1_dqs_dq_init_phase <= cal1_found_rising;
1135
          end
1136
          // exit if run out of taps to increment
1137
          if (cal1_idel_tap_limit_hit)
1138
            cal1_state <= CAL1_CALC_IDEL;
1139
          else begin
1140
            // found second edge, record the current edge count
1141
            if (cal1_first_edge_done && cal1_detect_edge) begin
1142
              cal1_state <= CAL1_CALC_IDEL;
1143
              cal1_found_second_edge <= 1'b1;
1144
              cal1_bit_time_tap_cnt <= cal1_idel_tap_cnt -
1145
                                       cal1_first_edge_tap_cnt + 1;
1146
            end else begin
1147
              cal1_state <= CAL1_SECOND_EDGE_IDEL_WAIT;
1148
              cal1_data_chk_last <= cal1_data_chk_r;
1149
              cal1_data_chk_last_valid <= 1'b1;
1150
              cal1_dlyce_dq <= 1'b1;
1151
              cal1_dlyinc_dq <= 1'b1;
1152
            end
1153
          end
1154
        end
1155
 
1156
        // wait for DQ IDELAY to settle, then store ISERDES output
1157
        CAL1_SECOND_EDGE_IDEL_WAIT:
1158
          if (!idel_set_wait)
1159
            cal1_state <= CAL1_FIND_SECOND_EDGE;
1160
 
1161
        // pipeline delay state to calculate amount to decrement DQ IDELAY
1162
        // NOTE: We're calculating the amount to decrement by, not the
1163
        //  absolute setting for DQ IDELAY
1164
        CAL1_CALC_IDEL: begin
1165
          // if two edges found
1166
          if (cal1_found_second_edge)
1167
            // case 1: DQS was in DQ window to start with. First edge found
1168
            // corresponds to left edge of DQ rising window. Backup by 1.5*BT
1169
            // NOTE: In this particular case, it is possible to decrement
1170
            //  "below 0" in the case where DQS delay is less than 0.5*BT,
1171
            //  need to limit decrement to prevent IDELAY tap underflow
1172
            if (!cal1_dqs_dq_init_phase)
1173
              cal1_idel_dec_cnt <= {1'b0, cal1_bit_time_tap_cnt} +
1174
                                   {1'b0, (cal1_bit_time_tap_cnt >> 1)};
1175
            // case 2: DQS was in wrong DQ window (in DQ falling window).
1176
            // First edge found is right edge of DQ rising window. Second
1177
            // edge is left edge of DQ rising window. Backup by 0.5*BT
1178
            else
1179
              cal1_idel_dec_cnt <= {1'b0, (cal1_bit_time_tap_cnt >> 1)};
1180
          // if only one edge found - assume will always be case 1 - DQS in
1181
          // DQS window. Case 2 only possible if path delay on DQS > 5ns
1182
          else
1183
            cal1_idel_dec_cnt <= cal1_low_freq_idel_dec;
1184
          cal1_state <= CAL1_DEC_IDEL;
1185
        end
1186
 
1187
        // decrement DQ IDELAY for final adjustment
1188
        CAL1_DEC_IDEL:
1189
          // once adjustment is complete, we're done with calibration for
1190
          // this DQ, now return to IDLE state and repeat for next DQ
1191
          // Add underflow protection for case of 2 edges found and DQS
1192
          // starting in DQ window (see comments for above state) - note we
1193
          // have to take into account delayed value of CAL1_IDEL_TAP_CNT -
1194
          // gets updated one clock cycle after CAL1_DLYCE/INC_DQ
1195
          if ((cal1_idel_dec_cnt == 7'b0000000) ||
1196
              (cal1_dlyce_dq && (cal1_idel_tap_cnt == 6'b000001))) begin
1197
            cal1_state <= CAL1_DONE;
1198
            // stop when all DQ's calibrated, or DQ[0] cal'ed (for sim)
1199
            if ((count_dq == DQ_WIDTH-1) || (SIM_ONLY != 0))
1200
              calib_done_tmp[0] <= 1'b1;
1201
            else
1202
              // need for VHDL simulation to prevent out-of-index error
1203
              next_count_dq <= count_dq + 1;
1204
          end else begin
1205
            // keep decrementing until final tap count reached
1206
            cal1_idel_dec_cnt <= cal1_idel_dec_cnt - 1;
1207
            cal1_dlyce_dq <= 1'b1;
1208
            cal1_dlyinc_dq <= 1'b0;
1209
          end
1210
 
1211
        // delay state to allow count_dq and DATA_CHK to point to the next
1212
        // DQ bit (allows us to potentially begin checking for an edge on
1213
        // next DQ right away).
1214
        CAL1_DONE:
1215
          if (!idel_set_wait) begin
1216
            count_dq <= next_count_dq;
1217
            if (calib_done_tmp[0]) begin
1218
              calib_done[0] <= 1'b1;
1219
              cal1_state <= CAL1_IDLE;
1220
            end else begin
1221
              // request auto-refresh after every 8-bits calibrated to
1222
              // avoid tRAS violation
1223
              if (cal1_refresh) begin
1224
                cal1_ref_req <= 1'b1;
1225
                if (calib_ref_done)
1226
                  cal1_state <= CAL1_INIT;
1227
              end else
1228
                // if no need this time for refresh, proceed to next bit
1229
                cal1_state <= CAL1_INIT;
1230
            end
1231
          end
1232
      endcase
1233
    end
1234
 
1235
  //***************************************************************************
1236
  // Second stage calibration: DQS-FPGA Clock
1237
  // Algorithm Description:
1238
  //  1. Assumes a training pattern that will produce a pattern oscillating at
1239
  //     half the core clock frequency each on rise and fall outputs, and such
1240
  //     that rise and fall outputs are 180 degrees out of phase from each
1241
  //     other. Note that since the calibration logic runs at half the speed
1242
  //     of the interface, expect that data sampled with the slow clock always
1243
  //     to be constant (either always = 1, or = 0, and rise data != fall data)
1244
  //     unless we cross the edge of the data valid window
1245
  //  2. Start by setting RD_DATA_SEL = 0. This selects the rising capture data
1246
  //     sync'ed to rising edge of core clock, and falling edge data sync'ed
1247
  //     to falling edge of core clock
1248
  //  3. Start looking for an edge. An edge is defined as either: (1) a
1249
  //     change in capture value or (2) an invalid capture value (e.g. rising
1250
  //     data != falling data for that same clock cycle).
1251
  //  4. If an edge is found, go to step (6). If edge hasn't been found, then
1252
  //     set RD_DATA_SEL = 1, and try again.
1253
  //  5. If no edge is found, then increment IDELAY and return to step (3)
1254
  //  6. If an edge if found, then invert RD_DATA_SEL - this shifts the
1255
  //     capture point 180 degrees from the edge of the window (minus duty
1256
  //     cycle distortion, delay skew between rising/falling edge capture
1257
  //     paths, etc.)
1258
  //  7. If no edge is found by CAL2_IDEL_TAP_LIMIT (= 63 - # taps used for
1259
  //     stage 1 calibration), then decrement IDELAY (without reinverting
1260
  //     RD_DATA_SEL) by CAL2_IDEL_TAP_LIMIT/2. This guarantees we at least
1261
  //     have CAL2_IDEL_TAP_LIMIT/2 of slack both before and after the
1262
  //     capture point (not optimal, but best we can do not having found an
1263
  //     of the window). This happens only for very low frequencies.
1264
  //  8. Repeat for each DQS group.
1265
  //  NOTE: Step 6 is not optimal. A better (and perhaps more complicated)
1266
  //   algorithm might be to find both edges of the data valid window (using
1267
  //   the same polarity of RD_DATA_SEL), and then decrement to the midpoint.
1268
  //***************************************************************************
1269
 
1270
  // RD_DATA_SEL should be tagged with FROM-TO (multi-cycle) constraint in
1271
  // UCF file to relax timing. This net is "pseudo-static" (after value is
1272
  // changed, FSM waits number of cycles before using the output).
1273
  // Note that we are adding one clock cycle of delay (to isolate it from
1274
  // the other logic CAL2_RD_DATA_SEL feeds), make sure FSM waits long
1275
  // enough to compensate (by default it does, it waits a few cycles more
1276
  // than minimum # of clock cycles)
1277
  genvar rd_i;
1278
  generate
1279
    for (rd_i = 0; rd_i < DQS_WIDTH; rd_i = rd_i+1) begin: gen_rd_data_sel
1280
      FDRSE u_ff_rd_data_sel
1281
        (
1282
         .Q   (rd_data_sel[rd_i]),
1283
         .C   (clkdiv),
1284
         .CE  (1'b1),
1285
         .D   (cal2_rd_data_sel[rd_i]),
1286
         .R   (1'b0),
1287
         .S   (1'b0)
1288
         ) /* synthesis syn_preserve = 1 */
1289
           /* synthesis syn_replicate = 0 */;
1290
    end
1291
  endgenerate
1292
 
1293
  //*****************************************************************
1294
  // Max number of taps used for stg2 cal dependent on number of taps
1295
  // used for stg1 (give priority to stg1 cal - let it use as many
1296
  // taps as it needs - the remainder of the IDELAY taps can be used
1297
  // by stg2)
1298
  //*****************************************************************
1299
 
1300
  always @(posedge clkdiv)
1301
    cal2_idel_tap_limit <= 6'b111111 - cal1_idel_max_tap;
1302
 
1303
  //*****************************************************************
1304
  // second stage calibration uses readback pattern of "1100" (i.e.
1305
  // 1st rising = 1, 1st falling = 1, 2nd rising = 0, 2nd falling = 0)
1306
  // only look at the first bit of each DQS group
1307
  //*****************************************************************
1308
 
1309
  // deasserted when captured data has changed since IDELAY was
1310
  // incremented, or when we're right on the edge (i.e. rise data =
1311
  // fall data).
1312
  assign cal2_detect_edge =
1313
    ((((rdd_rise_q1 != cal2_rd_data_rise_last_pos) ||
1314
       (rdd_fall_q1 != cal2_rd_data_fall_last_pos)) &&
1315
      cal2_rd_data_last_valid_pos && (!cal2_curr_sel)) ||
1316
     (((rdd_rise_q1 != cal2_rd_data_rise_last_neg) ||
1317
       (rdd_fall_q1 != cal2_rd_data_fall_last_neg)) &&
1318
      cal2_rd_data_last_valid_neg && (cal2_curr_sel)) ||
1319
     (rdd_rise_q1 != rdd_fall_q1));
1320
 
1321
  //*****************************************************************
1322
  // keep track of edge tap counts found, and whether we've
1323
  // incremented to the maximum number of taps allowed
1324
  // NOTE: Assume stage 2 cal always increments the tap count (never
1325
  //       decrements) when searching for edge of the data valid window
1326
  //*****************************************************************
1327
 
1328
  always @(posedge clkdiv)
1329
    if (cal2_state == CAL2_INIT) begin
1330
      cal2_idel_tap_limit_hit <= 1'b0;
1331
      cal2_idel_tap_cnt <= 6'b000000;
1332
    end else if (cal2_dlyce_dqs) begin
1333
      cal2_idel_tap_cnt <= cal2_idel_tap_cnt + 1;
1334
      cal2_idel_tap_limit_hit <= (cal2_idel_tap_cnt ==
1335
                                  cal2_idel_tap_limit - 1);
1336
    end
1337
 
1338
  //*****************************************************************
1339
 
1340
  always @(posedge clkdiv)
1341
    if (rstdiv) begin
1342
      calib_done[1]               <= 1'b0;
1343
      calib_done_tmp[1]           <= 1'bx;
1344
      calib_err[1]                <= 1'b0;
1345
      count_dqs                   <= 'b0;
1346
      next_count_dqs              <= 'b0;
1347
      cal2_dlyce_dqs              <= 1'b0;
1348
      cal2_dlyinc_dqs             <= 1'b0;
1349
      cal2_idel_dec_cnt           <= 6'bxxxxxx;
1350
      cal2_rd_data_last_valid_neg <= 1'bx;
1351
      cal2_rd_data_last_valid_pos <= 1'bx;
1352
      cal2_rd_data_sel            <= 'b0;
1353
      cal2_ref_req                <= 1'b0;
1354
      cal2_state                  <= CAL2_IDLE;
1355
    end else begin
1356
      cal2_ref_req      <= 1'b0;
1357
      cal2_dlyce_dqs    <= 1'b0;
1358
      cal2_dlyinc_dqs   <= 1'b0;
1359
 
1360
      case (cal2_state)
1361
        CAL2_IDLE: begin
1362
          count_dqs      <= 'b0;
1363
          next_count_dqs <= 'b0;
1364
          if (calib_start[1]) begin
1365
            cal2_rd_data_sel  <= {DQS_WIDTH{1'b0}};
1366
            calib_done[1]     <= 1'b0;
1367
            calib_done_tmp[1] <= 1'b0;
1368
            cal2_state        <= CAL2_INIT;
1369
          end
1370
        end
1371
 
1372
        // Pass through this state every time we calibrate a new DQS group
1373
        CAL2_INIT: begin
1374
          cal2_curr_sel <= 1'b0;
1375
          cal2_rd_data_last_valid_neg <= 1'b0;
1376
          cal2_rd_data_last_valid_pos <= 1'b0;
1377
          cal2_state <= CAL2_INIT_IDEL_WAIT;
1378
        end
1379
 
1380
        // Stall state only used if calibration run more than once. Can take
1381
        // this state out if design never runs calibration more than once.
1382
        // We need this state to give time for MUX'ed data to settle after
1383
        // resetting RD_DATA_SEL
1384
        CAL2_INIT_IDEL_WAIT:
1385
          if (!idel_set_wait)
1386
            cal2_state <= CAL2_FIND_EDGE_POS;
1387
 
1388
        // Look for an edge - first check "positive-edge" stage 2 capture
1389
        CAL2_FIND_EDGE_POS: begin
1390
          // if found an edge, then switch to the opposite edge stage 2
1391
          // capture and we're done - no need to decrement the tap count,
1392
          // since switching to the opposite edge will shift the capture
1393
          // point by 180 degrees
1394
          if (cal2_detect_edge) begin
1395
            cal2_curr_sel <= 1'b1;
1396
            cal2_state <= CAL2_DONE;
1397
            // set all DQS groups to be the same for simulation
1398
            if (SIM_ONLY != 0)
1399
              cal2_rd_data_sel <= {DQS_WIDTH{1'b1}};
1400
            else
1401
              cal2_rd_data_sel[count_dqs] <= 1'b1;
1402
            if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
1403
              calib_done_tmp[1] <= 1'b1;
1404
            else
1405
              // MIG 2.1: Fix for simulation out-of-bounds error when
1406
              // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
1407
              next_count_dqs <= count_dqs + 1;
1408
          end else begin
1409
            // otherwise, invert polarity of stage 2 capture and look for
1410
            // an edge with opposite capture clock polarity
1411
            cal2_curr_sel <= 1'b1;
1412
            cal2_rd_data_sel[count_dqs] <= 1'b1;
1413
            cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_POS;
1414
            cal2_rd_data_rise_last_pos  <= rdd_rise_q1;
1415
            cal2_rd_data_fall_last_pos  <= rdd_fall_q1;
1416
            cal2_rd_data_last_valid_pos <= 1'b1;
1417
          end
1418
        end
1419
 
1420
        // Give time to switch from positive-edge to negative-edge second
1421
        // stage capture (need time for data to filter though pipe stages)
1422
        CAL2_FIND_EDGE_IDEL_WAIT_POS:
1423
          if (!idel_set_wait)
1424
            cal2_state <= CAL2_FIND_EDGE_NEG;
1425
 
1426
        // Look for an edge - check "negative-edge" stage 2 capture
1427
        CAL2_FIND_EDGE_NEG:
1428
          if (cal2_detect_edge) begin
1429
            cal2_curr_sel <= 1'b0;
1430
            cal2_state <= CAL2_DONE;
1431
            // set all DQS groups to be the same for simulation
1432
            if (SIM_ONLY != 0)
1433
              cal2_rd_data_sel <= {DQS_WIDTH{1'b0}};
1434
            else
1435
              cal2_rd_data_sel[count_dqs] <= 1'b0;
1436
            if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
1437
              calib_done_tmp[1] <= 1'b1;
1438
            else
1439
              // MIG 2.1: Fix for simulation out-of-bounds error when
1440
              // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
1441
              next_count_dqs <= count_dqs + 1;
1442
          end else if (cal2_idel_tap_limit_hit) begin
1443
            // otherwise, if we've run out of taps, then immediately
1444
            // backoff by half # of taps used - that's our best estimate
1445
            // for optimal calibration point. Doesn't matter whether which
1446
            // polarity we're using for capture (we don't know which one is
1447
            // best to use)
1448
            cal2_idel_dec_cnt <= {1'b0, cal2_idel_tap_limit[5:1]};
1449
            cal2_state <= CAL2_DEC_IDEL;
1450
            if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
1451
              calib_done_tmp[1] <= 1'b1;
1452
            else
1453
              // MIG 2.1: Fix for simulation out-of-bounds error when
1454
              // SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
1455
              next_count_dqs <= count_dqs + 1;
1456
          end else begin
1457
            // otherwise, increment IDELAY, and start looking for edge again
1458
            cal2_curr_sel <= 1'b0;
1459
            cal2_rd_data_sel[count_dqs] <= 1'b0;
1460
            cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_NEG;
1461
            cal2_rd_data_rise_last_neg  <= rdd_rise_q1;
1462
            cal2_rd_data_fall_last_neg  <= rdd_fall_q1;
1463
            cal2_rd_data_last_valid_neg <= 1'b1;
1464
            cal2_dlyce_dqs  <= 1'b1;
1465
            cal2_dlyinc_dqs <= 1'b1;
1466
          end
1467
 
1468
        CAL2_FIND_EDGE_IDEL_WAIT_NEG:
1469
          if (!idel_set_wait)
1470
            cal2_state <= CAL2_FIND_EDGE_POS;
1471
 
1472
        // if no edge found, then decrement by half # of taps used
1473
        CAL2_DEC_IDEL: begin
1474
          if (cal2_idel_dec_cnt == 6'b000000)
1475
            cal2_state <= CAL2_DONE;
1476
          else begin
1477
            cal2_idel_dec_cnt <= cal2_idel_dec_cnt - 1;
1478
            cal2_dlyce_dqs  <= 1'b1;
1479
            cal2_dlyinc_dqs <= 1'b0;
1480
          end
1481
        end
1482
 
1483
        // delay state to allow count_dqs and ISERDES data to point to next
1484
        // DQ bit (DQS group) before going to INIT
1485
        CAL2_DONE:
1486
          if (!idel_set_wait) begin
1487
            count_dqs <= next_count_dqs;
1488
            if (calib_done_tmp[1]) begin
1489
              calib_done[1] <= 1'b1;
1490
              cal2_state <= CAL2_IDLE;
1491
            end else begin
1492
              // request auto-refresh after every DQS group calibrated to
1493
              // avoid tRAS violation
1494
              cal2_ref_req <= 1'b1;
1495
              if (calib_ref_done)
1496
                cal2_state <= CAL2_INIT;
1497
            end
1498
          end
1499
      endcase
1500
    end
1501
 
1502
  //***************************************************************************
1503
  // Stage 3 calibration: Read Enable
1504
  // Description:
1505
  // read enable calibration determines the "round-trip" time (in # of CLK0
1506
  // cycles) between when a read command is issued by the controller, and
1507
  // when the corresponding read data is synchronized by into the CLK0 domain
1508
  // this is a long delay chain to delay read enable signal from controller/
1509
  // initialization logic (i.e. this is used for both initialization and
1510
  // during normal controller operation). Stage 3 calibration logic decides
1511
  // which delayed version is appropriate to use (which is affected by the
1512
  // round trip delay of DQ/DQS) as a "valid" signal to tell rest of logic
1513
  // when the captured data output from ISERDES is valid.
1514
  //***************************************************************************
1515
 
1516
  //*****************************************************************
1517
  // Delay chains: Use shift registers
1518
  // Two sets of delay chains are used:
1519
  //  1. One to delay RDEN from PHY_INIT module for calibration
1520
  //     purposes (delay required for RDEN for calibration is different
1521
  //     than during normal operation)
1522
  //  2. One per DQS group to delay RDEN from controller for normal
1523
  //     operation - the value to delay for each DQS group can be different
1524
  //     as is determined during calibration
1525
  //*****************************************************************
1526
 
1527
  //*****************************************************************
1528
  // First delay chain, use only for calibration
1529
  // input = asserted on rising edge of RDEN from PHY_INIT module
1530
  //*****************************************************************
1531
 
1532
  always @(posedge clk) begin
1533
    ctrl_rden_r       <= ctrl_rden;
1534
    phy_init_rden_r   <= phy_init_rden;
1535
    phy_init_rden_r1  <= phy_init_rden_r;
1536
    calib_rden_edge_r <= phy_init_rden_r & ~phy_init_rden_r1;
1537
  end
1538
 
1539
  // Calibration shift register used for both Stage 3 and Stage 4 cal
1540
  // (not strictly necessary for stage 4, but use as an additional check
1541
  // to make sure we're checking for correct data on the right clock cycle)
1542
  always @(posedge clkdiv)
1543
    if (!calib_done[2])
1544
      calib_rden_srl_a <= cal3_rden_srl_a;
1545
    else
1546
      calib_rden_srl_a <= cal4_rden_srl_a;
1547
 
1548
  // Flops for targetting of multi-cycle path in UCF
1549
  genvar cal_rden_ff_i;
1550
  generate
1551
    for (cal_rden_ff_i = 0; cal_rden_ff_i < 5;
1552
         cal_rden_ff_i = cal_rden_ff_i+1) begin: gen_cal_rden_dly
1553
      FDRSE u_ff_cal_rden_dly
1554
        (
1555
         .Q   (calib_rden_srl_a_r[cal_rden_ff_i]),
1556
         .C   (clkdiv),
1557
         .CE  (1'b1),
1558
         .D   (calib_rden_srl_a[cal_rden_ff_i]),
1559
         .R   (1'b0),
1560
         .S   (1'b0)
1561
         ) /* synthesis syn_preserve = 1 */
1562
           /* synthesis syn_replicate = 0 */;
1563
    end
1564
  endgenerate
1565
 
1566
  SRLC32E u_calib_rden_srl
1567
    (
1568
     .Q   (calib_rden_srl_out),
1569
     .Q31 (),
1570
     .A   (calib_rden_srl_a_r),
1571
     .CE  (1'b1),
1572
     .CLK (clk),
1573
     .D   (calib_rden_edge_r)
1574
     );
1575
 
1576
  FDRSE u_calib_rden_srl_out_r
1577
    (
1578
         .Q   (calib_rden_srl_out_r),
1579
         .C   (clk),
1580
         .CE  (1'b1),
1581
         .D   (calib_rden_srl_out),
1582
         .R   (1'b0),
1583
         .S   (1'b0)
1584
     ) /* synthesis syn_preserve = 1 */;
1585
 
1586
  // convert to CLKDIV domain. Two version are generated because we need
1587
  // to be able to tell exactly which fast (clk) clock cycle the read
1588
  // enable was asserted in. Only one of CALIB_DATA_VALID or
1589
  // CALIB_DATA_VALID_STGD will be asserted for any given shift value
1590
  always @(posedge clk)
1591
    calib_rden_srl_out_r1 <= calib_rden_srl_out_r;
1592
 
1593
  always @(posedge clkdiv) begin
1594
    calib_rden_valid      <= calib_rden_srl_out_r;
1595
    calib_rden_valid_stgd <= calib_rden_srl_out_r1;
1596
  end
1597
 
1598
  //*****************************************************************
1599
  // Second set of delays chain, use for normal reads
1600
  // input = RDEN from controller
1601
  //*****************************************************************
1602
 
1603
  // Flops for targetting of multi-cycle path in UCF
1604
  genvar rden_ff_i;
1605
  generate
1606
    for (rden_ff_i = 0; rden_ff_i < 5*DQS_WIDTH;
1607
         rden_ff_i = rden_ff_i+1) begin: gen_rden_dly
1608
      FDRSE u_ff_rden_dly
1609
        (
1610
         .Q   (rden_dly_r[rden_ff_i]),
1611
         .C   (clkdiv),
1612
         .CE  (1'b1),
1613
         .D   (rden_dly[rden_ff_i]),
1614
         .R   (1'b0),
1615
         .S   (1'b0)
1616
         ) /* synthesis syn_preserve = 1 */
1617
           /* synthesis syn_replicate = 0 */;
1618
    end
1619
  endgenerate
1620
 
1621
  // NOTE: Comment this section explaining purpose of SRL's
1622
  genvar rden_i;
1623
  generate
1624
    for (rden_i = 0; rden_i < DQS_WIDTH; rden_i = rden_i + 1) begin: gen_rden
1625
      SRLC32E u_rden_srl
1626
        (
1627
         .Q   (rden_srl_out[rden_i]),
1628
         .Q31 (),
1629
         .A   ({rden_dly_r[(rden_i*5)+4],
1630
                rden_dly_r[(rden_i*5)+3],
1631
                rden_dly_r[(rden_i*5)+2],
1632
                rden_dly_r[(rden_i*5)+1],
1633
                rden_dly_r[(rden_i*5)]}),
1634
         .CE  (1'b1),
1635
         .CLK (clk),
1636
         .D   (ctrl_rden_r)
1637
         );
1638
      FDRSE u_calib_rden_r
1639
        (
1640
         .Q   (calib_rden[rden_i]),
1641
         .C   (clk),
1642
         .CE  (1'b1),
1643
         .D   (rden_srl_out[rden_i]),
1644
         .R   (1'b0),
1645
         .S   (1'b0)
1646
         ) /* synthesis syn_preserve = 1 */;
1647
    end
1648
  endgenerate
1649
 
1650
  //*****************************************************************
1651
  // indicates that current received data is the correct pattern. Check both
1652
  // rising and falling data for first DQ in each DQS group. Note that
1653
  // we're checking using a pipelined version of read data, so need to take
1654
  // this inherent delay into account in determining final read valid delay
1655
  // Data is written to the memory in the following order (first -> last):
1656
  //   0x1, 0xE, 0xE, 0x1, 0x1, 0xE, 0x1, 0xE
1657
  // Looking at the two LSb bits, expect data in sequence (in binary):
1658
  //   bit[0]: 1, 0, 0, 1, 0, 1, 0, 1
1659
  //   bit[1]: 0, 1, 1, 0, 1, 0, 1, 0
1660
  // Check for the presence of the first 7 words, and compensate read valid
1661
  // delay accordingly. Don't check last falling edge data, it may be
1662
  // corrupted by the DQS tri-state glitch at end of read postamble
1663
  // (glitch protection not yet active until stage 4 cal)
1664
  //*****************************************************************
1665
 
1666
  always @(posedge clkdiv) begin
1667
    rdd_rise_q1_r  <= rdd_rise_q1;
1668
    rdd_fall_q1_r  <= rdd_fall_q1;
1669
    rdd_rise_q2_r  <= rdd_rise_q2;
1670
    rdd_fall_q2_r  <= rdd_fall_q2;
1671
    rdd_rise_q1_r1 <= rdd_rise_q1_r;
1672
    rdd_fall_q1_r1 <= rdd_fall_q1_r;
1673
    // MIG 3.3: Added comparison for second bit in DQS group for stage 3 cal
1674
    rdd_rise_q1_bit1_r  <= rdd_rise_q1_bit1;
1675
    rdd_fall_q1_bit1_r  <= rdd_fall_q1_bit1;
1676
    rdd_rise_q2_bit1_r  <= rdd_rise_q2_bit1;
1677
    rdd_fall_q2_bit1_r  <= rdd_fall_q2_bit1;
1678
    rdd_rise_q1_bit1_r1 <= rdd_rise_q1_bit1_r;
1679
    rdd_fall_q1_bit1_r1 <= rdd_fall_q1_bit1_r;
1680
  end
1681
 
1682
  always @(posedge clkdiv) begin
1683
    // For the following sequence from memory:
1684
    //   rise[0], fall[0], rise[1], fall[1]
1685
    // if data is aligned out of fabric ISERDES:
1686
    //   RDD_RISE_Q2 = rise[0]
1687
    //   RDD_FALL_Q2 = fall[0]
1688
    //   RDD_RISE_Q1 = rise[1]
1689
    //   RDD_FALL_Q1 = fall[1]
1690
    cal3_data_match <= ((rdd_rise_q2_r == 1) &&
1691
                        (rdd_fall_q2_r == 0) &&
1692
                        (rdd_rise_q1_r == 0) &&
1693
                        (rdd_fall_q1_r == 1) &&
1694
                        (rdd_rise_q2   == 0) &&
1695
                        (rdd_fall_q2   == 1) &&
1696
                        (rdd_rise_q1   == 0) &&
1697
                        (rdd_rise_q2_bit1_r == 0) &&
1698
                        (rdd_fall_q2_bit1_r == 1) &&
1699
                        (rdd_rise_q1_bit1_r == 1) &&
1700
                        (rdd_fall_q1_bit1_r == 0) &&
1701
                        (rdd_rise_q2_bit1   == 1) &&
1702
                        (rdd_fall_q2_bit1   == 0) &&
1703
                        (rdd_rise_q1_bit1   == 1));
1704
 
1705
    // if data is staggered out of fabric ISERDES:
1706
    //   RDD_RISE_Q1_R = rise[0]
1707
    //   RDD_FALL_Q1_R = fall[0]
1708
    //   RDD_RISE_Q2   = rise[1]
1709
    //   RDD_FALL_Q2   = fall[1]
1710
    cal3_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
1711
                             (rdd_fall_q1_r1 == 0) &&
1712
                             (rdd_rise_q2_r  == 0) &&
1713
                             (rdd_fall_q2_r  == 1) &&
1714
                             (rdd_rise_q1_r  == 0) &&
1715
                             (rdd_fall_q1_r  == 1) &&
1716
                             (rdd_rise_q2    == 0) &&
1717
                             (rdd_rise_q1_bit1_r1 == 0) &&
1718
                             (rdd_fall_q1_bit1_r1 == 1) &&
1719
                             (rdd_rise_q2_bit1_r  == 1) &&
1720
                             (rdd_fall_q2_bit1_r  == 0) &&
1721
                             (rdd_rise_q1_bit1_r  == 1) &&
1722
                             (rdd_fall_q1_bit1_r  == 0) &&
1723
                             (rdd_rise_q2_bit1    == 1));
1724
  end
1725
 
1726
  assign cal3_rden_dly = cal3_rden_srl_a - CAL3_RDEN_SRL_DLY_DELTA;
1727
  assign cal3_data_valid = (calib_rden_valid | calib_rden_valid_stgd);
1728
  assign cal3_match_found
1729
    = ((calib_rden_valid && cal3_data_match) ||
1730
       (calib_rden_valid_stgd && cal3_data_match_stgd));
1731
 
1732
  // when calibrating, check to see which clock cycle (after the read is
1733
  // issued) does the expected data pattern arrive. Record this result
1734
  // NOTE: Can add error checking here in case valid data not found on any
1735
  //  of the available pipeline stages
1736
  always @(posedge clkdiv) begin
1737
    if (rstdiv) begin
1738
      cal3_rden_srl_a <= 5'bxxxxx;
1739
      cal3_state      <= CAL3_IDLE;
1740
      calib_done[2]   <= 1'b0;
1741
      calib_err_2[0]  <= 1'b0;
1742
      count_rden      <= {DQS_WIDTH{1'b0}};
1743
      rden_dly        <= {5*DQS_WIDTH{1'b0}};
1744
    end else begin
1745
 
1746
      case (cal3_state)
1747
        CAL3_IDLE: begin
1748
          count_rden <= {DQS_WIDTH{1'b0}};
1749
          if (calib_start[2]) begin
1750
            calib_done[2] <= 1'b0;
1751
            cal3_state    <= CAL3_INIT;
1752
          end
1753
        end
1754
 
1755
        CAL3_INIT: begin
1756
          cal3_rden_srl_a <= RDEN_BASE_DELAY;
1757
          // let SRL pipe clear after loading initial shift value
1758
          cal3_state      <= CAL3_RDEN_PIPE_CLR_WAIT;
1759
        end
1760
 
1761
        CAL3_DETECT:
1762
          if (cal3_data_valid)
1763
            // if match found at the correct clock cycle
1764
            if (cal3_match_found) begin
1765
 
1766
              // For simulation, load SRL addresses for all DQS with same value
1767
              if (SIM_ONLY != 0) begin
1768
                for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_rden_dly
1769
                  rden_dly[(i*5)]   <= cal3_rden_dly[0];
1770
                  rden_dly[(i*5)+1] <= cal3_rden_dly[1];
1771
                  rden_dly[(i*5)+2] <= cal3_rden_dly[2];
1772
                  rden_dly[(i*5)+3] <= cal3_rden_dly[3];
1773
                  rden_dly[(i*5)+4] <= cal3_rden_dly[4];
1774
                end
1775
              end else begin
1776
                rden_dly[(count_rden*5)]   <= cal3_rden_dly[0];
1777
                rden_dly[(count_rden*5)+1] <= cal3_rden_dly[1];
1778
                rden_dly[(count_rden*5)+2] <= cal3_rden_dly[2];
1779
                rden_dly[(count_rden*5)+3] <= cal3_rden_dly[3];
1780
                rden_dly[(count_rden*5)+4] <= cal3_rden_dly[4];
1781
              end
1782
 
1783
              // Use for stage 4 calibration
1784
              calib_rden_dly[(count_rden*5)]   <= cal3_rden_srl_a[0];
1785
              calib_rden_dly[(count_rden*5)+1] <= cal3_rden_srl_a[1];
1786
              calib_rden_dly[(count_rden*5)+2] <= cal3_rden_srl_a[2];
1787
              calib_rden_dly[(count_rden*5)+3] <= cal3_rden_srl_a[3];
1788
              calib_rden_dly[(count_rden*5)+4] <= cal3_rden_srl_a[4];
1789
              cal3_state <= CAL3_DONE;
1790
            end else begin
1791
              // If we run out of stages to shift, without finding correct
1792
              // result, the stop and assert error
1793
              if (cal3_rden_srl_a == 5'b11111) begin
1794
                calib_err_2[0] <= 1'b1;
1795
                cal3_state   <= CAL3_IDLE;
1796
              end else begin
1797
                // otherwise, increase the shift value and try again
1798
                cal3_rden_srl_a <= cal3_rden_srl_a + 1;
1799
                cal3_state      <= CAL3_RDEN_PIPE_CLR_WAIT;
1800
              end
1801
            end
1802
 
1803
        // give additional time for RDEN_R pipe to clear from effects of
1804
        // previous pipeline or IDELAY tap change
1805
        CAL3_RDEN_PIPE_CLR_WAIT:
1806
          if (calib_rden_pipe_cnt == 5'b00000)
1807
              cal3_state <= CAL3_DETECT;
1808
 
1809
        CAL3_DONE: begin
1810
          if ((count_rden == DQS_WIDTH-1) || (SIM_ONLY != 0)) begin
1811
            calib_done[2] <= 1'b1;
1812
            cal3_state    <= CAL3_IDLE;
1813
          end else begin
1814
            count_rden    <= count_rden + 1;
1815
            cal3_state    <= CAL3_INIT;
1816
          end
1817
        end
1818
      endcase
1819
    end
1820
  end
1821
 
1822
  //*****************************************************************
1823
  // Last part of stage 3 calibration - compensate for differences
1824
  // in delay between different DQS groups. Assume that in the worst
1825
  // case, DQS groups can only differ by one clock cycle. Data for
1826
  // certain DQS groups must be delayed by one clock cycle.
1827
  // NOTE: May need to increase allowable variation to greater than
1828
  //  one clock cycle in certain customer designs.
1829
  // Algorithm is:
1830
  //   1. Record shift delay value for DQS[0]
1831
  //   2. Compare each DQS[x] delay value to that of DQS[0]:
1832
  //     - If different, than record this fact (RDEN_MUX)
1833
  //     - If greater than DQS[0], set RDEN_INC. Assume greater by
1834
  //       one clock cycle only - this is a key assumption, assume no
1835
  //       more than a one clock cycle variation.
1836
  //     - If less than DQS[0], set RDEN_DEC
1837
  //   3. After calibration is complete, set control for DQS group
1838
  //      delay (CALIB_RDEN_SEL):
1839
  //     - If RDEN_DEC = 1, then assume that DQS[0] is the lowest
1840
  //       delay (and at least one other DQS group has a higher
1841
  //       delay).
1842
  //     - If RDEN_INC = 1, then assume that DQS[0] is the highest
1843
  //       delay (and that all other DQS groups have the same or
1844
  //       lower delay).
1845
  //     - If both RDEN_INC and RDEN_DEC = 1, then flag error
1846
  //       (variation is too high for this algorithm to handle)
1847
  //*****************************************************************
1848
 
1849
  always @(posedge clkdiv) begin
1850
    if (rstdiv) begin
1851
      calib_err_2[1] <= 1'b0;
1852
      calib_rden_sel <= {DQS_WIDTH{1'bx}};
1853
      rden_dec       <= 1'b0;
1854
      rden_dly_0     <= 5'bxxxxx;
1855
      rden_inc       <= 1'b0;
1856
      rden_mux       <= {DQS_WIDTH{1'b0}};
1857
    end else begin
1858
      // if a match if found, then store the value of rden_dly
1859
      if (!calib_done[2]) begin
1860
        if ((cal3_state == CAL3_DETECT) && cal3_match_found) begin
1861
          // store the value for DQS[0] as a reference
1862
          if (count_rden == 0) begin
1863
            // for simulation, RDEN calibration only happens for DQS[0]
1864
            // set RDEN_MUX for all DQS groups to be the same as DQS[0]
1865
            if (SIM_ONLY != 0)
1866
              rden_mux <= {DQS_WIDTH{1'b0}};
1867
            else begin
1868
              // otherwise, load values for DQS[0]
1869
              rden_dly_0  <= cal3_rden_srl_a;
1870
              rden_mux[0] <= 1'b0;
1871
            end
1872
          end else if (SIM_ONLY == 0) begin
1873
            // for all other DQS groups, compare RDEN_DLY delay value with
1874
            // that of DQS[0]
1875
            if (rden_dly_0 != cal3_rden_srl_a) begin
1876
              // record that current DQS group has a different delay
1877
              // than DQS[0] (the "reference" DQS group)
1878
              rden_mux[count_rden] <= 1'b1;
1879
              if (rden_dly_0 > cal3_rden_srl_a)
1880
                rden_inc <= 1'b1;
1881
              else if (rden_dly_0 < cal3_rden_srl_a)
1882
                rden_dec <= 1'b1;
1883
              // otherwise, if current DQS group has same delay as DQS[0],
1884
              // then rden_mux[count_rden] remains at 0 (since rden_mux
1885
              // array contents initialized to 0)
1886
            end
1887
          end
1888
        end
1889
      end else begin
1890
        // Otherwise - if we're done w/ stage 2 calibration:
1891
        // set final value for RDEN data delay
1892
        // flag error if there's more than one cycle variation from DQS[0]
1893
        calib_err_2[1] <= (rden_inc && rden_dec);
1894
        if (rden_inc)
1895
          // if DQS[0] delay represents max delay
1896
          calib_rden_sel <= ~rden_mux;
1897
        else
1898
          // if DQS[0] delay represents min delay (or all the delays are
1899
          // the same between DQS groups)
1900
          calib_rden_sel <= rden_mux;
1901
      end
1902
    end
1903
  end
1904
 
1905
  // flag error for stage 3 if appropriate
1906
  always @(posedge clkdiv)
1907
    calib_err[2] <= calib_err_2[0] | calib_err_2[1];
1908
 
1909
  //***************************************************************************
1910
  // Stage 4 calibration: DQS gate
1911
  //***************************************************************************
1912
 
1913
  //*****************************************************************
1914
  // indicates that current received data is the correct pattern. Same as
1915
  // for READ VALID calibration, except that the expected data sequence is
1916
  // different since DQS gate is asserted after the 6th word.
1917
  // Data sequence:
1918
  //  Arrives from memory (at FPGA input) (R, F): 1 0 0 1 1 0 0 1
1919
  //  After gating the sequence looks like: 1 0 0 1 1 0 1 0 (7th word =
1920
  //   5th word, 8th word = 6th word)
1921
  // What is the gate timing is off? Need to make sure we can distinquish
1922
  // between the results of correct vs. incorrect gate timing. We also use
1923
  // the "read_valid" signal from stage 3 calibration to help us determine
1924
  // when to check for a valid sequence for stage 4 calibration (i.e. use
1925
  // CAL4_DATA_VALID in addition to CAL4_DATA_MATCH/CAL4_DATA_MATCH_STGD)
1926
  // Note that since the gate signal from the CLK0 domain is synchronized
1927
  // to the falling edge of DQS, that the effect of the gate will only be
1928
  // seen starting with a rising edge data (although it is possible
1929
  // the GATE IDDR output could go metastable and cause a unexpected result
1930
  // on the first rising and falling edges after the gate is enabled).
1931
  // Also note that the actual DQS glitch can come more than 0.5*tCK after
1932
  // the last falling edge of DQS and the constraint for this path is can
1933
  // be > 0.5*tCK; however, this means when calibrating, the output of the
1934
  // GATE IDDR may miss the setup time requirement of the rising edge flop
1935
  // and only meet it for the falling edge flop. Therefore the rising
1936
  // edge data immediately following the assertion of the gate can either
1937
  // be a 1 or 0 (can rely on either)
1938
  // As the timing on the gate is varied, we expect to see (sequence of
1939
  // captured read data shown below):
1940
  //       - 1 0 0 1 1 0 0 1 (gate is really early, starts and ends before
1941
  //                          read burst even starts)
1942
  //       - x 0 0 1 1 0 0 1 (gate pulse starts before the burst, and ends
1943
  //       - x y 0 1 1 0 0 1  sometime during the burst; x,y = 0, or 1, but
1944
  //       - x y x 1 1 0 0 1  all bits that show an x are the same value,
1945
  //       - x y x y 1 0 0 1  and y are the same value)
1946
  //       - x y x y x 0 0 1
1947
  //       - x y x y x y 0 1 (gate starts just before start of burst)
1948
  //       - 1 0 x 0 x 0 x 0 (gate starts after 1st falling word. The "x"
1949
  //                          represents possiblity that gate may not disable
1950
  //                          clock for 2nd rising word in time)
1951
  //       - 1 0 0 1 x 1 x 1 (gate starts after 2nd falling word)
1952
  //       - 1 0 0 1 1 0 x 0 (gate starts after 3rd falling word - GOOD!!)
1953
  //       - 1 0 0 1 1 0 0 1 (gate starts after burst is already done)
1954
  //*****************************************************************
1955
 
1956
  assign cal4_data_valid = calib_rden_valid | calib_rden_valid_stgd;
1957
  assign cal4_data_good  = (calib_rden_valid &
1958
                            cal4_data_match) |
1959
                           (calib_rden_valid_stgd &
1960
                            cal4_data_match_stgd);
1961
 
1962
  always @(posedge clkdiv) begin
1963
    // if data is aligned out of fabric ISERDES:
1964
    cal4_data_match <= ((rdd_rise_q2_r == 1) &&
1965
                        (rdd_fall_q2_r == 0) &&
1966
                        (rdd_rise_q1_r == 0) &&
1967
                        (rdd_fall_q1_r == 1) &&
1968
                        (rdd_rise_q2   == 1) &&
1969
                        (rdd_fall_q2   == 0) &&
1970
                        // MIG 2.1: Last rising edge data value not
1971
                        // guaranteed to be certain value at higher
1972
                        // frequencies
1973
                        // (rdd_rise_q1   == 0) &&
1974
                        (rdd_fall_q1   == 0));
1975
    // if data is staggered out of fabric ISERDES:
1976
    cal4_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
1977
                             (rdd_fall_q1_r1 == 0) &&
1978
                             (rdd_rise_q2_r  == 0) &&
1979
                             (rdd_fall_q2_r  == 1) &&
1980
                             (rdd_rise_q1_r  == 1) &&
1981
                             (rdd_fall_q1_r  == 0) &&
1982
                             // MIG 2.1: Last rising edge data value not
1983
                             // guaranteed to be certain value at higher
1984
                             // frequencies
1985
                             // (rdd_rise_q2    == 0) &&
1986
                             (rdd_fall_q2    == 0));
1987
  end
1988
 
1989
  //*****************************************************************
1990
  // DQS gate enable generation:
1991
  // This signal gets synchronized to DQS domain, and drives IDDR
1992
  // register that in turn asserts/deasserts CE to all 4 or 8 DQ
1993
  // IDDR's in that DQS group.
1994
  //   1. During normal (post-cal) operation, this is only for 2 clock
1995
  //      cycles following the end of a burst. Check for falling edge
1996
  //      of RDEN. But must also make sure NOT assert for a read-idle-
1997
  //      read (two non-consecutive reads, separated by exactly one
1998
  //      idle cycle) - in this case, don't assert the gate because:
1999
  //      (1) we don't have enough time to deassert the gate before the
2000
  //          first rising edge of DQS for second burst (b/c of fact
2001
  //          that DQS gate is generated in the fabric only off rising
2002
  //          edge of CLK0 - if we somehow had an ODDR in fabric, we
2003
  //          could pull this off, (2) assumption is that the DQS glitch
2004
  //          will not rise enough to cause a glitch because the
2005
  //          post-amble of the first burst is followed immediately by
2006
  //          the pre-amble of the next burst
2007
  //   2. During stage 4 calibration, assert for 3 clock cycles
2008
  //      (assert gate enable one clock cycle early), since we gate out
2009
  //      the last two words (in addition to the crap on the DQ bus after
2010
  //      the DQS read postamble).
2011
  // NOTE: PHY_INIT_RDEN and CTRL_RDEN have slightly different timing w/r
2012
  //  to when they are asserted w/r to the start of the read burst
2013
  //  (PHY_INIT_RDEN is one cycle earlier than CTRL_RDEN).
2014
  //*****************************************************************
2015
 
2016
  // register for timing purposes for fast clock path - currently only
2017
  // calib_done_r[2] used
2018
  always @(posedge clk)
2019
    calib_done_r <= calib_done;
2020
 
2021
  always @(*) begin
2022
    calib_ctrl_rden = ctrl_rden;
2023
    calib_init_rden = calib_done_r[2] & phy_init_rden;
2024
  end
2025
 
2026
  assign calib_ctrl_rden_negedge = ~calib_ctrl_rden & calib_ctrl_rden_r;
2027
  // check for read-idle-read before asserting DQS pulse at end of read
2028
  assign calib_ctrl_gate_pulse   = calib_ctrl_rden_negedge_r &
2029
                                   ~calib_ctrl_rden;
2030
  always @(posedge clk) begin
2031
    calib_ctrl_rden_r         <= calib_ctrl_rden;
2032
    calib_ctrl_rden_negedge_r <= calib_ctrl_rden_negedge;
2033
    calib_ctrl_gate_pulse_r   <= calib_ctrl_gate_pulse;
2034
  end
2035
 
2036
  assign calib_init_gate_pulse = ~calib_init_rden & calib_init_rden_r;
2037
  always @(posedge clk) begin
2038
    calib_init_rden_r        <= calib_init_rden;
2039
    calib_init_gate_pulse_r  <= calib_init_gate_pulse;
2040
    calib_init_gate_pulse_r1 <= calib_init_gate_pulse_r;
2041
  end
2042
 
2043
  // Gate is asserted: (1) during cal, for 3 cycles, starting 1 cycle
2044
  // after falling edge of CTRL_RDEN, (2) during normal ops, for 2
2045
  // cycles, starting 2 cycles after falling edge of CTRL_RDEN
2046
  assign gate_srl_in = ~((calib_ctrl_gate_pulse |
2047
                          calib_ctrl_gate_pulse_r) |
2048
                         (calib_init_gate_pulse   |
2049
                          calib_init_gate_pulse_r |
2050
                          calib_init_gate_pulse_r1));
2051
 
2052
  //*****************************************************************
2053
  // generate DQS enable signal for each DQS group
2054
  // There are differences between DQS gate signal for calibration vs. during
2055
  // normal operation:
2056
  //  * calibration gates the second to last clock cycle of the burst,
2057
  //    rather than after the last word (e.g. for a 8-word, 4-cycle burst,
2058
  //    cycle 4 is gated for calibration; during normal operation, cycle
2059
  //    5 (i.e. cycle after the last word) is gated)
2060
  // enable for DQS is deasserted for two clock cycles, except when
2061
  // we have the preamble for the next read immediately following
2062
  // the postamble of the current read - assume DQS does not glitch
2063
  // during this time, that it stays low. Also if we did have to gate
2064
  // the DQS for this case, then we don't have enough time to deassert
2065
  // the gate in time for the first rising edge of DQS for the second
2066
  // read
2067
  //*****************************************************************
2068
 
2069
  // Flops for targetting of multi-cycle path in UCF
2070
  genvar gate_ff_i;
2071
  generate
2072
    for (gate_ff_i = 0; gate_ff_i < 5*DQS_WIDTH;
2073
         gate_ff_i = gate_ff_i+1) begin: gen_gate_dly
2074
      FDRSE u_ff_gate_dly
2075
        (
2076
         .Q   (gate_dly_r[gate_ff_i]),
2077
         .C   (clkdiv),
2078
         .CE  (1'b1),
2079
         .D   (gate_dly[gate_ff_i]),
2080
         .R   (1'b0),
2081
         .S   (1'b0)
2082
         ) /* synthesis syn_preserve = 1 */
2083
           /* synthesis syn_replicate = 0 */;
2084
    end
2085
  endgenerate
2086
 
2087
  genvar gate_i;
2088
  generate
2089
    for (gate_i = 0; gate_i < DQS_WIDTH; gate_i = gate_i + 1) begin: gen_gate
2090
      SRLC32E u_gate_srl
2091
        (
2092
         .Q   (gate_srl_out[gate_i]),
2093
         .Q31 (),
2094
         .A   ({gate_dly_r[(gate_i*5)+4],
2095
                gate_dly_r[(gate_i*5)+3],
2096
                gate_dly_r[(gate_i*5)+2],
2097
                gate_dly_r[(gate_i*5)+1],
2098
                gate_dly_r[(gate_i*5)]}),
2099
         .CE  (1'b1),
2100
         .CLK (clk),
2101
         .D   (gate_srl_in)
2102
         );
2103
 
2104
      // For GATE_BASE_DELAY > 0, have one extra cycle to register outputs
2105
      // from controller before generating DQS gate pulse. In PAR, the
2106
      // location of the controller logic can be far from the DQS gate
2107
      // logic (DQS gate logic located near the DQS I/O's), contributing
2108
      // to large net delays. Registering the controller outputs for
2109
      // CL >= 4 (above 200MHz) adds a stage of pipelining to reduce net
2110
      // delays
2111
      if (GATE_BASE_DELAY > 0) begin: gen_gate_base_dly_gt3
2112
        // add flop between SRL32 and EN_DQS flop (which is located near the
2113
        // DDR2 IOB's)
2114
        FDRSE u_gate_srl_ff
2115
          (
2116
         .Q   (gate_srl_out_r[gate_i]),
2117
         .C   (clk),
2118
         .CE  (1'b1),
2119
         .D   (gate_srl_out[gate_i]),
2120
         .R   (1'b0),
2121
         .S   (1'b0)
2122
           ) /* synthesis syn_preserve = 1 */;
2123
      end else begin: gen_gate_base_dly_le3
2124
        assign gate_srl_out_r[gate_i] = gate_srl_out[gate_i];
2125
      end
2126
 
2127
      FDRSE u_en_dqs_ff
2128
        (
2129
         .Q   (en_dqs[gate_i]),
2130
         .C   (clk),
2131
         .CE  (1'b1),
2132
         .D   (gate_srl_out_r[gate_i]),
2133
         .R   (1'b0),
2134
         .S   (1'b0)
2135
         ) /* synthesis syn_preserve = 1 */
2136
           /* synthesis syn_replicate = 0 */;
2137
    end
2138
  endgenerate
2139
 
2140
  //*****************************************************************
2141
  // Find valid window: keep track of how long we've been in the same data
2142
  // window. If it's been long enough, then declare that we've found a stable
2143
  // valid window - in particular, that we're past any region of instability
2144
  // associated with the edge of the window. Use only when finding left edge
2145
  //*****************************************************************
2146
 
2147
  always @(posedge clkdiv)
2148
    // reset before we start to look for window
2149
    if (cal4_state == CAL4_INIT) begin
2150
      cal4_window_cnt    <= 4'b0000;
2151
      cal4_stable_window <= 1'b0;
2152
    end else if ((cal4_state == CAL4_FIND_EDGE) && cal4_seek_left) begin
2153
      // if we're looking for left edge, and incrementing IDELAY, count
2154
      // consecutive taps over which we're in the window
2155
      if (cal4_data_valid) begin
2156
        if (cal4_data_good)
2157
          cal4_window_cnt <= cal4_window_cnt + 1;
2158
        else
2159
          cal4_window_cnt <= 4'b0000;
2160
      end
2161
 
2162
      if (cal4_window_cnt == MIN_WIN_SIZE-1)
2163
        cal4_stable_window <= 1'b1;
2164
    end
2165
 
2166
  //*****************************************************************
2167
  // keep track of edge tap counts found, and whether we've
2168
  // incremented to the maximum number of taps allowed
2169
  //*****************************************************************
2170
 
2171
  always @(posedge clkdiv)
2172
    if ((cal4_state == CAL4_INIT) || cal4_dlyrst_gate) begin
2173
      cal4_idel_max_tap <= 1'b0;
2174
      cal4_idel_bit_tap <= 1'b0;
2175
      cal4_idel_tap_cnt <= 6'b000000;
2176
    end else if (cal4_dlyce_gate) begin
2177
      if (cal4_dlyinc_gate) begin
2178
        cal4_idel_tap_cnt <= cal4_idel_tap_cnt + 1;
2179
        cal4_idel_bit_tap <= (cal4_idel_tap_cnt == CAL4_IDEL_BIT_VAL-2);
2180
        cal4_idel_max_tap <= (cal4_idel_tap_cnt == 6'b111110);
2181
      end else begin
2182
        cal4_idel_tap_cnt <= cal4_idel_tap_cnt - 1;
2183
        cal4_idel_bit_tap <= 1'b0;
2184
        cal4_idel_max_tap <= 1'b0;
2185
      end
2186
    end
2187
 
2188
  always @(posedge clkdiv)
2189
    if ((cal4_state != CAL4_RDEN_PIPE_CLR_WAIT) &&
2190
        (cal3_state != CAL3_RDEN_PIPE_CLR_WAIT))
2191
      calib_rden_pipe_cnt <= CALIB_RDEN_PIPE_LEN-1;
2192
    else
2193
      calib_rden_pipe_cnt <= calib_rden_pipe_cnt - 1;
2194
 
2195
  //*****************************************************************
2196
  // Stage 4 cal state machine
2197
  //*****************************************************************
2198
 
2199
  always @(posedge clkdiv)
2200
    if (rstdiv) begin
2201
      calib_done[3]      <= 1'b0;
2202
      calib_done_tmp[3]  <= 1'b0;
2203
      calib_err[3]       <= 1'b0;
2204
      count_gate         <= 'b0;
2205
      gate_dly           <= 'b0;
2206
      next_count_gate    <= 'b0;
2207
      cal4_idel_adj_cnt  <= 6'bxxxxxx;
2208
      cal4_dlyce_gate    <= 1'b0;
2209
      cal4_dlyinc_gate   <= 1'b0;
2210
      cal4_dlyrst_gate   <= 1'b0;    // reset handled elsewhere in code
2211
      cal4_gate_srl_a    <= 5'bxxxxx;
2212
      cal4_rden_srl_a    <= 5'bxxxxx;
2213
      cal4_ref_req       <= 1'b0;
2214
      cal4_seek_left     <= 1'bx;
2215
      cal4_state         <= CAL4_IDLE;
2216
    end else begin
2217
      cal4_ref_req     <= 1'b0;
2218
      cal4_dlyce_gate  <= 1'b0;
2219
      cal4_dlyinc_gate <= 1'b0;
2220
      cal4_dlyrst_gate <= 1'b0;
2221
 
2222
      case (cal4_state)
2223
        CAL4_IDLE: begin
2224
          count_gate      <= 'b0;
2225
          next_count_gate <= 'b0;
2226
          if (calib_start[3]) begin
2227
            gate_dly      <= 'b0;
2228
            calib_done[3] <= 1'b0;
2229
            cal4_state    <= CAL4_INIT;
2230
          end
2231
        end
2232
 
2233
        CAL4_INIT: begin
2234
          // load: (1) initial value of gate delay SRL, (2) appropriate
2235
          // value of RDEN SRL (so that we get correct "data valid" timing)
2236
          cal4_gate_srl_a <= GATE_BASE_INIT;
2237
          cal4_rden_srl_a <= {calib_rden_dly[(count_gate*5)+4],
2238
                              calib_rden_dly[(count_gate*5)+3],
2239
                              calib_rden_dly[(count_gate*5)+2],
2240
                              calib_rden_dly[(count_gate*5)+1],
2241
                              calib_rden_dly[(count_gate*5)]};
2242
          // let SRL pipe clear after loading initial shift value
2243
          cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
2244
        end
2245
 
2246
        // sort of an initial state - start checking to see whether we're
2247
        // already in the window or not
2248
        CAL4_FIND_WINDOW:
2249
          // decide right away if we start in the proper window - this
2250
          // determines if we are then looking for the left (trailing) or
2251
          // right (leading) edge of the data valid window
2252
          if (cal4_data_valid) begin
2253
            // if we find a match - then we're already in window, now look
2254
            // for left edge. Otherwise, look for right edge of window
2255
            cal4_seek_left  <= cal4_data_good;
2256
            cal4_state      <= CAL4_FIND_EDGE;
2257
          end
2258
 
2259
        CAL4_FIND_EDGE:
2260
          // don't do anything until the exact clock cycle when to check that
2261
          // readback data is valid or not
2262
          if (cal4_data_valid) begin
2263
            // we're currently in the window, look for left edge of window
2264
            if (cal4_seek_left) begin
2265
              // make sure we've passed the right edge before trying to detect
2266
              // the left edge (i.e. avoid any edge "instability") - else, we
2267
              // may detect an "false" edge too soon. By design, if we start in
2268
              // the data valid window, always expect at least
2269
              // MIN(BIT_TIME_TAPS,32) (-/+ jitter, see below) taps of valid
2270
              // window before we hit the left edge (this is because when stage
2271
              // 4 calibration first begins (i.e., gate_dly = 00, and IDELAY =
2272
              // 00), we're guaranteed to NOT be in the window, and we always
2273
              // start searching for MIN(BIT_TIME_TAPS,32) for the right edge
2274
              // of window. If we don't find it, increment gate_dly, and if we
2275
              // now start in the window, we have at least approximately
2276
              // CLK_PERIOD-MIN(BIT_TIME_TAPS,32) = MIN(BIT_TIME_TAPS,32) taps.
2277
              // It's approximately because jitter, noise, etc. can bring this
2278
              // value down slightly. Because of this (although VERY UNLIKELY),
2279
              // we have to protect against not decrementing IDELAY below 0
2280
              // during adjustment phase).
2281
              if (cal4_stable_window && !cal4_data_good) begin
2282
                // found left edge of window, dec by MIN(BIT_TIME_TAPS,32)
2283
                cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
2284
                cal4_idel_adj_inc <= 1'b0;
2285
                cal4_state        <= CAL4_ADJ_IDEL;
2286
              end else begin
2287
                // Otherwise, keep looking for left edge:
2288
                if (cal4_idel_max_tap) begin
2289
                  // ran out of taps looking for left edge (max=63) - happens
2290
                  // for low frequency case, decrement by 32
2291
                  cal4_idel_adj_cnt <= 6'b100000;
2292
                  cal4_idel_adj_inc <= 1'b0;
2293
                  cal4_state        <= CAL4_ADJ_IDEL;
2294
                end else begin
2295
                  cal4_dlyce_gate  <= 1'b1;
2296
                  cal4_dlyinc_gate <= 1'b1;
2297
                  cal4_state       <= CAL4_IDEL_WAIT;
2298
                end
2299
              end
2300
            end else begin
2301
              // looking for right edge of window:
2302
              // look for the first match - this means we've found the right
2303
              // (leading) edge of the data valid window, increment by
2304
              // MIN(BIT_TIME_TAPS,32)
2305
              if (cal4_data_good) begin
2306
                cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
2307
                cal4_idel_adj_inc <= 1'b1;
2308
                cal4_state        <= CAL4_ADJ_IDEL;
2309
              end else begin
2310
                // Otherwise, keep looking:
2311
                // only look for MIN(BIT_TIME_TAPS,32) taps for right edge,
2312
                // if we haven't found it, then inc gate delay, try again
2313
                if (cal4_idel_bit_tap) begin
2314
                  // if we're already maxed out on gate delay, then error out
2315
                  // (simulation only - calib_err isn't currently connected)
2316
                  if (cal4_gate_srl_a == 5'b11111) begin
2317
                    calib_err[3] <= 1'b1;
2318
                    cal4_state   <= CAL4_IDLE;
2319
                  end else begin
2320
                    // otherwise, increment gate delay count, and start
2321
                    // over again
2322
                    cal4_gate_srl_a <= cal4_gate_srl_a + 1;
2323
                    cal4_dlyrst_gate <= 1'b1;
2324
                    cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
2325
                  end
2326
                end else begin
2327
                  // keep looking for right edge
2328
                  cal4_dlyce_gate  <= 1'b1;
2329
                  cal4_dlyinc_gate <= 1'b1;
2330
                  cal4_state       <= CAL4_IDEL_WAIT;
2331
                end
2332
              end
2333
            end
2334
          end
2335
 
2336
        // wait for GATE IDELAY to settle, after reset or increment
2337
        CAL4_IDEL_WAIT: begin
2338
          // For simulation, load SRL addresses for all DQS with same value
2339
          if (SIM_ONLY != 0) begin
2340
            for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly
2341
              gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
2342
              gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
2343
              gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
2344
              gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
2345
              gate_dly[(i*5)]   <= cal4_gate_srl_a[0];
2346
            end
2347
          end else begin
2348
            gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
2349
            gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
2350
            gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
2351
            gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
2352
            gate_dly[(count_gate*5)]   <= cal4_gate_srl_a[0];
2353
          end
2354
          // check to see if we've found edge of window
2355
          if (!idel_set_wait)
2356
            cal4_state <= CAL4_FIND_EDGE;
2357
        end
2358
 
2359
        // give additional time for RDEN_R pipe to clear from effects of
2360
        // previous pipeline (and IDELAY reset)
2361
        CAL4_RDEN_PIPE_CLR_WAIT: begin
2362
          // MIG 2.2: Bug fix - make sure to update GATE_DLY count, since
2363
          // possible for FIND_EDGE->RDEN_PIPE_CLR_WAIT->FIND_WINDOW
2364
          // transition (i.e. need to make sure the gate count updated in
2365
          // FIND_EDGE gets reflected in GATE_DLY by the time we reach
2366
          // state FIND_WINDOW) - previously GATE_DLY only being updated
2367
          // during state CAL4_IDEL_WAIT
2368
          if (SIM_ONLY != 0) begin
2369
            for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly_pipe
2370
              gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
2371
              gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
2372
              gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
2373
              gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
2374
              gate_dly[(i*5)]   <= cal4_gate_srl_a[0];
2375
            end
2376
          end else begin
2377
            gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
2378
            gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
2379
            gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
2380
            gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
2381
            gate_dly[(count_gate*5)]   <= cal4_gate_srl_a[0];
2382
          end
2383
          // look for new window
2384
          if (calib_rden_pipe_cnt == 5'b00000)
2385
            cal4_state <= CAL4_FIND_WINDOW;
2386
        end
2387
 
2388
        // increment/decrement DQS/DQ IDELAY for final adjustment
2389
        CAL4_ADJ_IDEL:
2390
          // add underflow protection for corner case when left edge found
2391
          // using fewer than MIN(BIT_TIME_TAPS,32) taps
2392
          if ((cal4_idel_adj_cnt == 6'b000000) ||
2393
              (cal4_dlyce_gate && !cal4_dlyinc_gate &&
2394
               (cal4_idel_tap_cnt == 6'b000001))) begin
2395
            cal4_state <= CAL4_DONE;
2396
            // stop when all gates calibrated, or gate[0] cal'ed (for sim)
2397
            if ((count_gate == DQS_WIDTH-1) || (SIM_ONLY != 0))
2398
              calib_done_tmp[3] <= 1'b1;
2399
            else
2400
              // need for VHDL simulation to prevent out-of-index error
2401
              next_count_gate <= count_gate + 1;
2402
          end else begin
2403
            cal4_idel_adj_cnt <= cal4_idel_adj_cnt - 1;
2404
            cal4_dlyce_gate  <= 1'b1;
2405
            // whether inc or dec depends on whether left or right edge found
2406
            cal4_dlyinc_gate <= cal4_idel_adj_inc;
2407
          end
2408
 
2409
        // wait for IDELAY output to settle after decrement. Check current
2410
        // COUNT_GATE value and decide if we're done
2411
        CAL4_DONE:
2412
          if (!idel_set_wait) begin
2413
            count_gate <= next_count_gate;
2414
            if (calib_done_tmp[3]) begin
2415
              calib_done[3] <= 1'b1;
2416
              cal4_state <= CAL4_IDLE;
2417
            end else begin
2418
              // request auto-refresh after every DQS group calibrated to
2419
              // avoid tRAS violation
2420
              cal4_ref_req <= 1'b1;
2421
              if (calib_ref_done)
2422
                cal4_state <= CAL4_INIT;
2423
            end
2424
          end
2425
      endcase
2426
    end
2427
 
2428
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.