1 |
412 |
julius |
//*****************************************************************************
|
2 |
|
|
// DISCLAIMER OF LIABILITY
|
3 |
|
|
//
|
4 |
|
|
// This file contains proprietary and confidential information of
|
5 |
|
|
// Xilinx, Inc. ("Xilinx"), that is distributed under a license
|
6 |
|
|
// from Xilinx, and may be used, copied and/or disclosed only
|
7 |
|
|
// pursuant to the terms of a valid license agreement with Xilinx.
|
8 |
|
|
//
|
9 |
|
|
// XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION
|
10 |
|
|
// ("MATERIALS") "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
|
11 |
|
|
// EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING WITHOUT
|
12 |
|
|
// LIMITATION, ANY WARRANTY WITH RESPECT TO NONINFRINGEMENT,
|
13 |
|
|
// MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE. Xilinx
|
14 |
|
|
// does not warrant that functions included in the Materials will
|
15 |
|
|
// meet the requirements of Licensee, or that the operation of the
|
16 |
|
|
// Materials will be uninterrupted or error-free, or that defects
|
17 |
|
|
// in the Materials will be corrected. Furthermore, Xilinx does
|
18 |
|
|
// not warrant or make any representations regarding use, or the
|
19 |
|
|
// results of the use, of the Materials in terms of correctness,
|
20 |
|
|
// accuracy, reliability or otherwise.
|
21 |
|
|
//
|
22 |
|
|
// Xilinx products are not designed or intended to be fail-safe,
|
23 |
|
|
// or for use in any application requiring fail-safe performance,
|
24 |
|
|
// such as life-support or safety devices or systems, Class III
|
25 |
|
|
// medical devices, nuclear facilities, applications related to
|
26 |
|
|
// the deployment of airbags, or any other applications that could
|
27 |
|
|
// lead to death, personal injury or severe property or
|
28 |
|
|
// environmental damage (individually and collectively, "critical
|
29 |
|
|
// applications"). Customer assumes the sole risk and liability
|
30 |
|
|
// of any use of Xilinx products in critical applications,
|
31 |
|
|
// subject only to applicable laws and regulations governing
|
32 |
|
|
// limitations on product liability.
|
33 |
|
|
//
|
34 |
|
|
// Copyright 2006, 2007, 2008 Xilinx, Inc.
|
35 |
|
|
// All rights reserved.
|
36 |
|
|
//
|
37 |
|
|
// This disclaimer and copyright notice must be retained as part
|
38 |
|
|
// of this file at all times.
|
39 |
|
|
//*****************************************************************************
|
40 |
|
|
// ____ ____
|
41 |
|
|
// / /\/ /
|
42 |
|
|
// /___/ \ / Vendor: Xilinx
|
43 |
|
|
// \ \ \/ Version: 3.0
|
44 |
|
|
// \ \ Application: MIG
|
45 |
|
|
// / / Filename: ddr2_phy_calib.v
|
46 |
|
|
// /___/ /\ Date Last Modified: $Date: 2008/12/23 14:26:00 $
|
47 |
|
|
// \ \ / \ Date Created: Thu Aug 10 2006
|
48 |
|
|
// \___\/\___\
|
49 |
|
|
//
|
50 |
|
|
//Device: Virtex-5
|
51 |
|
|
//Design Name: DDR2
|
52 |
|
|
//Purpose:
|
53 |
|
|
// This module handles calibration after memory initialization.
|
54 |
|
|
//Reference:
|
55 |
|
|
//Revision History:
|
56 |
|
|
//*****************************************************************************
|
57 |
|
|
|
58 |
|
|
`timescale 1ns/1ps
|
59 |
|
|
|
60 |
|
|
module ddr2_phy_calib #
|
61 |
|
|
(
|
62 |
|
|
// Following parameters are for 72-bit RDIMM design (for ML561 Reference
|
63 |
|
|
// board design). Actual values may be different. Actual parameters values
|
64 |
|
|
// are passed from design top module ddr2_mig module. Please refer to
|
65 |
|
|
// the ddr2_mig module for actual values.
|
66 |
|
|
parameter DQ_WIDTH = 72,
|
67 |
|
|
parameter DQ_BITS = 7,
|
68 |
|
|
parameter DQ_PER_DQS = 8,
|
69 |
|
|
parameter DQS_BITS = 4,
|
70 |
|
|
parameter DQS_WIDTH = 9,
|
71 |
|
|
parameter ADDITIVE_LAT = 0,
|
72 |
|
|
parameter CAS_LAT = 5,
|
73 |
|
|
parameter REG_ENABLE = 1,
|
74 |
|
|
parameter CLK_PERIOD = 3000,
|
75 |
|
|
parameter SIM_ONLY = 0,
|
76 |
|
|
parameter DEBUG_EN = 0
|
77 |
|
|
)
|
78 |
|
|
(
|
79 |
|
|
input clk,
|
80 |
|
|
input clkdiv,
|
81 |
|
|
input rstdiv,
|
82 |
|
|
input [3:0] calib_start,
|
83 |
|
|
input ctrl_rden,
|
84 |
|
|
input phy_init_rden,
|
85 |
|
|
input [DQ_WIDTH-1:0] rd_data_rise,
|
86 |
|
|
input [DQ_WIDTH-1:0] rd_data_fall,
|
87 |
|
|
input calib_ref_done,
|
88 |
|
|
output reg [3:0] calib_done,
|
89 |
|
|
output reg calib_ref_req,
|
90 |
|
|
output [DQS_WIDTH-1:0] calib_rden,
|
91 |
|
|
output reg [DQS_WIDTH-1:0] calib_rden_sel,
|
92 |
|
|
output reg dlyrst_dq,
|
93 |
|
|
output reg [DQ_WIDTH-1:0] dlyce_dq,
|
94 |
|
|
output reg [DQ_WIDTH-1:0] dlyinc_dq,
|
95 |
|
|
output reg dlyrst_dqs,
|
96 |
|
|
output reg [DQS_WIDTH-1:0] dlyce_dqs,
|
97 |
|
|
output reg [DQS_WIDTH-1:0] dlyinc_dqs,
|
98 |
|
|
output reg [DQS_WIDTH-1:0] dlyrst_gate,
|
99 |
|
|
output reg [DQS_WIDTH-1:0] dlyce_gate,
|
100 |
|
|
output reg [DQS_WIDTH-1:0] dlyinc_gate,
|
101 |
|
|
output [DQS_WIDTH-1:0] en_dqs,
|
102 |
|
|
output [DQS_WIDTH-1:0] rd_data_sel,
|
103 |
|
|
// Debug signals (optional use)
|
104 |
|
|
input dbg_idel_up_all,
|
105 |
|
|
input dbg_idel_down_all,
|
106 |
|
|
input dbg_idel_up_dq,
|
107 |
|
|
input dbg_idel_down_dq,
|
108 |
|
|
input dbg_idel_up_dqs,
|
109 |
|
|
input dbg_idel_down_dqs,
|
110 |
|
|
input dbg_idel_up_gate,
|
111 |
|
|
input dbg_idel_down_gate,
|
112 |
|
|
input [DQ_BITS-1:0] dbg_sel_idel_dq,
|
113 |
|
|
input dbg_sel_all_idel_dq,
|
114 |
|
|
input [DQS_BITS:0] dbg_sel_idel_dqs,
|
115 |
|
|
input dbg_sel_all_idel_dqs,
|
116 |
|
|
input [DQS_BITS:0] dbg_sel_idel_gate,
|
117 |
|
|
input dbg_sel_all_idel_gate,
|
118 |
|
|
output [3:0] dbg_calib_done,
|
119 |
|
|
output [3:0] dbg_calib_err,
|
120 |
|
|
output [(6*DQ_WIDTH)-1:0] dbg_calib_dq_tap_cnt,
|
121 |
|
|
output [(6*DQS_WIDTH)-1:0] dbg_calib_dqs_tap_cnt,
|
122 |
|
|
output [(6*DQS_WIDTH)-1:0] dbg_calib_gate_tap_cnt,
|
123 |
|
|
output [DQS_WIDTH-1:0] dbg_calib_rd_data_sel,
|
124 |
|
|
output [(5*DQS_WIDTH)-1:0] dbg_calib_rden_dly,
|
125 |
|
|
output [(5*DQS_WIDTH)-1:0] dbg_calib_gate_dly
|
126 |
|
|
);
|
127 |
|
|
|
128 |
|
|
// minimum time (in IDELAY taps) for which capture data must be stable for
|
129 |
|
|
// algorithm to consider
|
130 |
|
|
localparam MIN_WIN_SIZE = 5;
|
131 |
|
|
// IDEL_SET_VAL = (# of cycles - 1) to wait after changing IDELAY value
|
132 |
|
|
// we only have to wait enough for input with new IDELAY value to
|
133 |
|
|
// propagate through pipeline stages.
|
134 |
|
|
localparam IDEL_SET_VAL = 3'b111;
|
135 |
|
|
// # of clock cycles to delay read enable to determine if read data pattern
|
136 |
|
|
// is correct for stage 3/4 (RDEN, DQS gate) calibration
|
137 |
|
|
localparam CALIB_RDEN_PIPE_LEN = 31;
|
138 |
|
|
// translate CAS latency into number of clock cycles for read valid delay
|
139 |
|
|
// determination. Really only needed for CL = 2.5 (set to 2)
|
140 |
|
|
localparam CAS_LAT_RDEN = (CAS_LAT == 25) ? 2 : CAS_LAT;
|
141 |
|
|
// an SRL32 is used to delay CTRL_RDEN to generate read valid signal. This
|
142 |
|
|
// is min possible value delay through SRL32 can be
|
143 |
|
|
localparam RDEN_BASE_DELAY = CAS_LAT_RDEN + ADDITIVE_LAT + REG_ENABLE;
|
144 |
|
|
// an SRL32 is used to delay the CTRL_RDEN from the read postamble DQS
|
145 |
|
|
// gate. This is min possible value the SRL32 delay can be:
|
146 |
|
|
// - Delay from end of deassertion of CTRL_RDEN to last falling edge of
|
147 |
|
|
// read burst = 3.5 (CTRL_RDEN -> CAS delay) + 3 (min CAS latency) = 6.5
|
148 |
|
|
// - Minimum time for DQS gate circuit to be generated:
|
149 |
|
|
// * 1 cyc to register CTRL_RDEN from controller
|
150 |
|
|
// * 1 cyc after RDEN_CTRL falling edge
|
151 |
|
|
// * 1 cyc min through SRL32
|
152 |
|
|
// * 1 cyc through SRL32 output flop
|
153 |
|
|
// * 0 (<1) cyc of synchronization to DQS domain via IDELAY
|
154 |
|
|
// * 1 cyc of delay through IDDR to generate CE to DQ IDDR's
|
155 |
|
|
// Total = 5 cyc < 6.5 cycles
|
156 |
|
|
// The total should be less than 5.5 cycles to account prop delays
|
157 |
|
|
// adding one cycle to the synchronization time via the IDELAY.
|
158 |
|
|
// NOTE: Value differs because of optional pipeline register added
|
159 |
|
|
// for case of RDEN_BASE_DELAY > 3 to improve timing
|
160 |
|
|
localparam GATE_BASE_DELAY = RDEN_BASE_DELAY - 3;
|
161 |
|
|
localparam GATE_BASE_INIT = (GATE_BASE_DELAY <= 1) ? 0 : GATE_BASE_DELAY;
|
162 |
|
|
// used for RDEN calibration: difference between shift value used during
|
163 |
|
|
// calibration, and shift value for actual RDEN SRL. Only applies when
|
164 |
|
|
// RDEN edge is immediately captured by CLKDIV0. If not (depends on phase
|
165 |
|
|
// of CLK0 and CLKDIV0 when RDEN is asserted), then add 1 to this value.
|
166 |
|
|
localparam CAL3_RDEN_SRL_DLY_DELTA = 6;
|
167 |
|
|
// fix minimum value of DQS to be 1 to handle the case where's there's only
|
168 |
|
|
// one DQS group. We could also enforce that user always inputs minimum
|
169 |
|
|
// value of 1 for DQS_BITS (even when DQS_WIDTH=1). Leave this as safeguard
|
170 |
|
|
// Assume we don't have to do this for DQ, DQ_WIDTH always > 1
|
171 |
|
|
localparam DQS_BITS_FIX = (DQS_BITS == 0) ? 1 : DQS_BITS;
|
172 |
|
|
// how many taps to "pre-delay" DQ before stg 1 calibration - not needed for
|
173 |
|
|
// current calibration, but leave for debug
|
174 |
|
|
localparam DQ_IDEL_INIT = 6'b000000;
|
175 |
|
|
// # IDELAY taps per bit time (i.e. half cycle). Limit to 63.
|
176 |
|
|
localparam integer BIT_TIME_TAPS = (CLK_PERIOD/150 < 64) ?
|
177 |
|
|
CLK_PERIOD/150 : 63;
|
178 |
|
|
|
179 |
|
|
// used in various places during stage 4 cal: (1) determines maximum taps
|
180 |
|
|
// to increment when finding right edge, (2) amount to decrement after
|
181 |
|
|
// finding left edge, (3) amount to increment after finding right edge
|
182 |
|
|
localparam CAL4_IDEL_BIT_VAL = (BIT_TIME_TAPS >= 6'b100000) ?
|
183 |
|
|
6'b100000 : BIT_TIME_TAPS;
|
184 |
|
|
|
185 |
|
|
localparam CAL1_IDLE = 4'h0;
|
186 |
|
|
localparam CAL1_INIT = 4'h1;
|
187 |
|
|
localparam CAL1_INC_IDEL = 4'h2;
|
188 |
|
|
localparam CAL1_FIND_FIRST_EDGE = 4'h3;
|
189 |
|
|
localparam CAL1_FIRST_EDGE_IDEL_WAIT = 4'h4;
|
190 |
|
|
localparam CAL1_FOUND_FIRST_EDGE_WAIT = 4'h5;
|
191 |
|
|
localparam CAL1_FIND_SECOND_EDGE = 4'h6;
|
192 |
|
|
localparam CAL1_SECOND_EDGE_IDEL_WAIT = 4'h7;
|
193 |
|
|
localparam CAL1_CALC_IDEL = 4'h8;
|
194 |
|
|
localparam CAL1_DEC_IDEL = 4'h9;
|
195 |
|
|
localparam CAL1_DONE = 4'hA;
|
196 |
|
|
|
197 |
|
|
localparam CAL2_IDLE = 4'h0;
|
198 |
|
|
localparam CAL2_INIT = 4'h1;
|
199 |
|
|
localparam CAL2_INIT_IDEL_WAIT = 4'h2;
|
200 |
|
|
localparam CAL2_FIND_EDGE_POS = 4'h3;
|
201 |
|
|
localparam CAL2_FIND_EDGE_IDEL_WAIT_POS = 4'h4;
|
202 |
|
|
localparam CAL2_FIND_EDGE_NEG = 4'h5;
|
203 |
|
|
localparam CAL2_FIND_EDGE_IDEL_WAIT_NEG = 4'h6;
|
204 |
|
|
localparam CAL2_DEC_IDEL = 4'h7;
|
205 |
|
|
localparam CAL2_DONE = 4'h8;
|
206 |
|
|
|
207 |
|
|
localparam CAL3_IDLE = 3'h0;
|
208 |
|
|
localparam CAL3_INIT = 3'h1;
|
209 |
|
|
localparam CAL3_DETECT = 3'h2;
|
210 |
|
|
localparam CAL3_RDEN_PIPE_CLR_WAIT = 3'h3;
|
211 |
|
|
localparam CAL3_DONE = 3'h4;
|
212 |
|
|
|
213 |
|
|
localparam CAL4_IDLE = 3'h0;
|
214 |
|
|
localparam CAL4_INIT = 3'h1;
|
215 |
|
|
localparam CAL4_FIND_WINDOW = 3'h2;
|
216 |
|
|
localparam CAL4_FIND_EDGE = 3'h3;
|
217 |
|
|
localparam CAL4_IDEL_WAIT = 3'h4;
|
218 |
|
|
localparam CAL4_RDEN_PIPE_CLR_WAIT = 3'h5;
|
219 |
|
|
localparam CAL4_ADJ_IDEL = 3'h6;
|
220 |
|
|
localparam CAL4_DONE = 3'h7;
|
221 |
|
|
|
222 |
|
|
integer i, j;
|
223 |
|
|
|
224 |
|
|
reg [5:0] cal1_bit_time_tap_cnt;
|
225 |
|
|
reg [1:0] cal1_data_chk_last;
|
226 |
|
|
reg cal1_data_chk_last_valid;
|
227 |
|
|
reg [1:0] cal1_data_chk_r;
|
228 |
|
|
reg cal1_dlyce_dq;
|
229 |
|
|
reg cal1_dlyinc_dq;
|
230 |
|
|
reg cal1_dqs_dq_init_phase;
|
231 |
|
|
reg cal1_detect_edge;
|
232 |
|
|
reg cal1_detect_stable;
|
233 |
|
|
reg cal1_found_second_edge;
|
234 |
|
|
reg cal1_found_rising;
|
235 |
|
|
reg cal1_found_window;
|
236 |
|
|
reg cal1_first_edge_done;
|
237 |
|
|
reg [5:0] cal1_first_edge_tap_cnt;
|
238 |
|
|
reg [6:0] cal1_idel_dec_cnt;
|
239 |
|
|
reg [5:0] cal1_idel_inc_cnt;
|
240 |
|
|
reg [5:0] cal1_idel_max_tap;
|
241 |
|
|
reg cal1_idel_max_tap_we;
|
242 |
|
|
reg [5:0] cal1_idel_tap_cnt;
|
243 |
|
|
reg cal1_idel_tap_limit_hit;
|
244 |
|
|
reg [6:0] cal1_low_freq_idel_dec;
|
245 |
|
|
reg cal1_ref_req;
|
246 |
|
|
wire cal1_refresh;
|
247 |
|
|
reg [3:0] cal1_state;
|
248 |
|
|
reg [3:0] cal1_window_cnt;
|
249 |
|
|
reg cal2_curr_sel;
|
250 |
|
|
wire cal2_detect_edge;
|
251 |
|
|
reg cal2_dlyce_dqs;
|
252 |
|
|
reg cal2_dlyinc_dqs;
|
253 |
|
|
reg [5:0] cal2_idel_dec_cnt;
|
254 |
|
|
reg [5:0] cal2_idel_tap_cnt;
|
255 |
|
|
reg [5:0] cal2_idel_tap_limit;
|
256 |
|
|
reg cal2_idel_tap_limit_hit;
|
257 |
|
|
reg cal2_rd_data_fall_last_neg;
|
258 |
|
|
reg cal2_rd_data_fall_last_pos;
|
259 |
|
|
reg cal2_rd_data_last_valid_neg;
|
260 |
|
|
reg cal2_rd_data_last_valid_pos;
|
261 |
|
|
reg cal2_rd_data_rise_last_neg;
|
262 |
|
|
reg cal2_rd_data_rise_last_pos;
|
263 |
|
|
reg [DQS_WIDTH-1:0] cal2_rd_data_sel;
|
264 |
|
|
wire cal2_rd_data_sel_edge;
|
265 |
|
|
reg [DQS_WIDTH-1:0] cal2_rd_data_sel_r;
|
266 |
|
|
reg cal2_ref_req;
|
267 |
|
|
reg [3:0] cal2_state;
|
268 |
|
|
reg cal3_data_match;
|
269 |
|
|
reg cal3_data_match_stgd;
|
270 |
|
|
wire cal3_data_valid;
|
271 |
|
|
wire cal3_match_found;
|
272 |
|
|
wire [4:0] cal3_rden_dly;
|
273 |
|
|
reg [4:0] cal3_rden_srl_a;
|
274 |
|
|
reg [2:0] cal3_state;
|
275 |
|
|
wire cal4_data_good;
|
276 |
|
|
reg cal4_data_match;
|
277 |
|
|
reg cal4_data_match_stgd;
|
278 |
|
|
wire cal4_data_valid;
|
279 |
|
|
reg cal4_dlyce_gate;
|
280 |
|
|
reg cal4_dlyinc_gate;
|
281 |
|
|
reg cal4_dlyrst_gate;
|
282 |
|
|
reg [4:0] cal4_gate_srl_a;
|
283 |
|
|
reg [5:0] cal4_idel_adj_cnt;
|
284 |
|
|
reg cal4_idel_adj_inc;
|
285 |
|
|
reg cal4_idel_bit_tap;
|
286 |
|
|
reg [5:0] cal4_idel_tap_cnt;
|
287 |
|
|
reg cal4_idel_max_tap;
|
288 |
|
|
reg [4:0] cal4_rden_srl_a;
|
289 |
|
|
reg cal4_ref_req;
|
290 |
|
|
reg cal4_seek_left;
|
291 |
|
|
reg cal4_stable_window;
|
292 |
|
|
reg [2:0] cal4_state;
|
293 |
|
|
reg [3:0] cal4_window_cnt;
|
294 |
|
|
reg [3:0] calib_done_tmp; // only for stg1/2/4
|
295 |
|
|
reg calib_ctrl_gate_pulse_r;
|
296 |
|
|
reg calib_ctrl_rden;
|
297 |
|
|
reg calib_ctrl_rden_r;
|
298 |
|
|
wire calib_ctrl_rden_negedge;
|
299 |
|
|
reg calib_ctrl_rden_negedge_r;
|
300 |
|
|
reg [3:0] calib_done_r;
|
301 |
|
|
reg [3:0] calib_err;
|
302 |
|
|
reg [1:0] calib_err_2;
|
303 |
|
|
wire calib_init_gate_pulse;
|
304 |
|
|
reg calib_init_gate_pulse_r;
|
305 |
|
|
reg calib_init_gate_pulse_r1;
|
306 |
|
|
reg calib_init_rden;
|
307 |
|
|
reg calib_init_rden_r;
|
308 |
|
|
reg [4:0] calib_rden_srl_a;
|
309 |
|
|
wire [4:0] calib_rden_srl_a_r;
|
310 |
|
|
reg [(5*DQS_WIDTH)-1:0] calib_rden_dly;
|
311 |
|
|
reg calib_rden_edge_r;
|
312 |
|
|
reg [4:0] calib_rden_pipe_cnt;
|
313 |
|
|
wire calib_rden_srl_out;
|
314 |
|
|
wire calib_rden_srl_out_r;
|
315 |
|
|
reg calib_rden_srl_out_r1;
|
316 |
|
|
reg calib_rden_valid;
|
317 |
|
|
reg calib_rden_valid_stgd;
|
318 |
|
|
reg [DQ_BITS-1:0] count_dq;
|
319 |
|
|
reg [DQS_BITS_FIX-1:0] count_dqs;
|
320 |
|
|
reg [DQS_BITS_FIX-1:0] count_gate;
|
321 |
|
|
reg [DQS_BITS_FIX-1:0] count_rden;
|
322 |
|
|
reg ctrl_rden_r;
|
323 |
|
|
wire dlyce_or;
|
324 |
|
|
reg [(5*DQS_WIDTH)-1:0] gate_dly;
|
325 |
|
|
wire [(5*DQS_WIDTH)-1:0] gate_dly_r;
|
326 |
|
|
wire gate_srl_in;
|
327 |
|
|
wire [DQS_WIDTH-1:0] gate_srl_out;
|
328 |
|
|
wire [DQS_WIDTH-1:0] gate_srl_out_r;
|
329 |
|
|
reg [2:0] idel_set_cnt;
|
330 |
|
|
wire idel_set_wait;
|
331 |
|
|
reg [DQ_BITS-1:0] next_count_dq;
|
332 |
|
|
reg [DQS_BITS_FIX-1:0] next_count_dqs;
|
333 |
|
|
reg [DQS_BITS_FIX-1:0] next_count_gate;
|
334 |
|
|
reg phy_init_rden_r;
|
335 |
|
|
reg phy_init_rden_r1;
|
336 |
|
|
reg [DQ_WIDTH-1:0] rd_data_fall_1x_r;
|
337 |
|
|
reg [DQS_WIDTH-1:0] rd_data_fall_1x_r1;
|
338 |
|
|
reg [DQS_WIDTH-1:0] rd_data_fall_2x_r;
|
339 |
|
|
wire [DQS_WIDTH-1:0] rd_data_fall_chk_q1;
|
340 |
|
|
wire [DQS_WIDTH-1:0] rd_data_fall_chk_q2;
|
341 |
|
|
reg [DQ_WIDTH-1:0] rd_data_rise_1x_r;
|
342 |
|
|
reg [DQS_WIDTH-1:0] rd_data_rise_1x_r1;
|
343 |
|
|
reg [DQS_WIDTH-1:0] rd_data_rise_2x_r;
|
344 |
|
|
wire [DQS_WIDTH-1:0] rd_data_rise_chk_q1;
|
345 |
|
|
wire [DQS_WIDTH-1:0] rd_data_rise_chk_q2;
|
346 |
|
|
reg rdd_fall_q1;
|
347 |
|
|
reg rdd_fall_q1_r;
|
348 |
|
|
reg rdd_fall_q1_r1;
|
349 |
|
|
reg rdd_fall_q2;
|
350 |
|
|
reg rdd_fall_q2_r;
|
351 |
|
|
reg rdd_rise_q1;
|
352 |
|
|
reg rdd_rise_q1_r;
|
353 |
|
|
reg rdd_rise_q1_r1;
|
354 |
|
|
reg rdd_rise_q2;
|
355 |
|
|
reg rdd_rise_q2_r;
|
356 |
|
|
reg [DQS_BITS_FIX-1:0] rdd_mux_sel;
|
357 |
|
|
reg rden_dec;
|
358 |
|
|
reg [(5*DQS_WIDTH)-1:0] rden_dly;
|
359 |
|
|
wire [(5*DQS_WIDTH)-1:0] rden_dly_r;
|
360 |
|
|
reg [4:0] rden_dly_0;
|
361 |
|
|
reg rden_inc;
|
362 |
|
|
reg [DQS_WIDTH-1:0] rden_mux;
|
363 |
|
|
wire [DQS_WIDTH-1:0] rden_srl_out;
|
364 |
|
|
|
365 |
|
|
// Debug
|
366 |
|
|
integer x;
|
367 |
|
|
reg [5:0] dbg_dq_tap_cnt [DQ_WIDTH-1:0];
|
368 |
|
|
reg [5:0] dbg_dqs_tap_cnt [DQS_WIDTH-1:0];
|
369 |
|
|
reg [5:0] dbg_gate_tap_cnt [DQS_WIDTH-1:0];
|
370 |
|
|
|
371 |
|
|
//***************************************************************************
|
372 |
|
|
// Debug output ("dbg_phy_calib_*")
|
373 |
|
|
// NOTES:
|
374 |
|
|
// 1. All debug outputs coming out of PHY_CALIB are clocked off CLKDIV0,
|
375 |
|
|
// although they are also static after calibration is complete. This
|
376 |
|
|
// means the user can either connect them to a Chipscope ILA, or to
|
377 |
|
|
// either a sync/async VIO input block. Using an async VIO has the
|
378 |
|
|
// advantage of not requiring these paths to meet cycle-to-cycle timing.
|
379 |
|
|
// 2. The widths of most of these debug buses are dependent on the # of
|
380 |
|
|
// DQS/DQ bits (e.g. dq_tap_cnt width = 6 * (# of DQ bits)
|
381 |
|
|
// SIGNAL DESCRIPTION:
|
382 |
|
|
// 1. calib_done: 4 bits - each one asserted as each phase of calibration
|
383 |
|
|
// is completed.
|
384 |
|
|
// 2. calib_err: 4 bits - each one asserted when a calibration error
|
385 |
|
|
// encountered for that stage. Some of these bits may not
|
386 |
|
|
// be used (not all cal stages report an error).
|
387 |
|
|
// 3. dq_tap_cnt: final IDELAY tap counts for all DQ IDELAYs
|
388 |
|
|
// 4. dqs_tap_cnt: final IDELAY tap counts for all DQS IDELAYs
|
389 |
|
|
// 5. gate_tap_cnt: final IDELAY tap counts for all DQS gate
|
390 |
|
|
// synchronization IDELAYs
|
391 |
|
|
// 6. rd_data_sel: final read capture MUX (either "positive" or "negative"
|
392 |
|
|
// edge capture) settings for all DQS groups
|
393 |
|
|
// 7. rden_dly: related to # of cycles after issuing a read until when
|
394 |
|
|
// read data is valid - for all DQS groups
|
395 |
|
|
// 8. gate_dly: related to # of cycles after issuing a read until when
|
396 |
|
|
// clock enable for all DQ's is deasserted to prevent
|
397 |
|
|
// effect of DQS postamble glitch - for all DQS groups
|
398 |
|
|
//***************************************************************************
|
399 |
|
|
|
400 |
|
|
//*****************************************************************
|
401 |
|
|
// Record IDELAY tap values by "snooping" IDELAY control signals
|
402 |
|
|
//*****************************************************************
|
403 |
|
|
|
404 |
|
|
// record DQ IDELAY tap values
|
405 |
|
|
genvar dbg_dq_tc_i;
|
406 |
|
|
generate
|
407 |
|
|
for (dbg_dq_tc_i = 0; dbg_dq_tc_i < DQ_WIDTH;
|
408 |
|
|
dbg_dq_tc_i = dbg_dq_tc_i + 1) begin: gen_dbg_dq_tap_cnt
|
409 |
|
|
assign dbg_calib_dq_tap_cnt[(6*dbg_dq_tc_i)+5:(6*dbg_dq_tc_i)]
|
410 |
|
|
= dbg_dq_tap_cnt[dbg_dq_tc_i];
|
411 |
|
|
always @(posedge clkdiv)
|
412 |
|
|
if (rstdiv | dlyrst_dq)
|
413 |
|
|
dbg_dq_tap_cnt[dbg_dq_tc_i] <= 6'b000000;
|
414 |
|
|
else
|
415 |
|
|
if (dlyce_dq[dbg_dq_tc_i])
|
416 |
|
|
if (dlyinc_dq[dbg_dq_tc_i])
|
417 |
|
|
dbg_dq_tap_cnt[dbg_dq_tc_i]
|
418 |
|
|
<= dbg_dq_tap_cnt[dbg_dq_tc_i] + 1;
|
419 |
|
|
else
|
420 |
|
|
dbg_dq_tap_cnt[dbg_dq_tc_i]
|
421 |
|
|
<= dbg_dq_tap_cnt[dbg_dq_tc_i] - 1;
|
422 |
|
|
end
|
423 |
|
|
endgenerate
|
424 |
|
|
|
425 |
|
|
// record DQS IDELAY tap values
|
426 |
|
|
genvar dbg_dqs_tc_i;
|
427 |
|
|
generate
|
428 |
|
|
for (dbg_dqs_tc_i = 0; dbg_dqs_tc_i < DQS_WIDTH;
|
429 |
|
|
dbg_dqs_tc_i = dbg_dqs_tc_i + 1) begin: gen_dbg_dqs_tap_cnt
|
430 |
|
|
assign dbg_calib_dqs_tap_cnt[(6*dbg_dqs_tc_i)+5:(6*dbg_dqs_tc_i)]
|
431 |
|
|
= dbg_dqs_tap_cnt[dbg_dqs_tc_i];
|
432 |
|
|
always @(posedge clkdiv)
|
433 |
|
|
if (rstdiv | dlyrst_dqs)
|
434 |
|
|
dbg_dqs_tap_cnt[dbg_dqs_tc_i] <= 6'b000000;
|
435 |
|
|
else
|
436 |
|
|
if (dlyce_dqs[dbg_dqs_tc_i])
|
437 |
|
|
if (dlyinc_dqs[dbg_dqs_tc_i])
|
438 |
|
|
dbg_dqs_tap_cnt[dbg_dqs_tc_i]
|
439 |
|
|
<= dbg_dqs_tap_cnt[dbg_dqs_tc_i] + 1;
|
440 |
|
|
else
|
441 |
|
|
dbg_dqs_tap_cnt[dbg_dqs_tc_i]
|
442 |
|
|
<= dbg_dqs_tap_cnt[dbg_dqs_tc_i] - 1;
|
443 |
|
|
end
|
444 |
|
|
endgenerate
|
445 |
|
|
|
446 |
|
|
// record DQS gate IDELAY tap values
|
447 |
|
|
genvar dbg_gate_tc_i;
|
448 |
|
|
generate
|
449 |
|
|
for (dbg_gate_tc_i = 0; dbg_gate_tc_i < DQS_WIDTH;
|
450 |
|
|
dbg_gate_tc_i = dbg_gate_tc_i + 1) begin: gen_dbg_gate_tap_cnt
|
451 |
|
|
assign dbg_calib_gate_tap_cnt[(6*dbg_gate_tc_i)+5:(6*dbg_gate_tc_i)]
|
452 |
|
|
= dbg_gate_tap_cnt[dbg_gate_tc_i];
|
453 |
|
|
always @(posedge clkdiv)
|
454 |
|
|
if (rstdiv | dlyrst_gate[dbg_gate_tc_i])
|
455 |
|
|
dbg_gate_tap_cnt[dbg_gate_tc_i] <= 6'b000000;
|
456 |
|
|
else
|
457 |
|
|
if (dlyce_gate[dbg_gate_tc_i])
|
458 |
|
|
if (dlyinc_gate[dbg_gate_tc_i])
|
459 |
|
|
dbg_gate_tap_cnt[dbg_gate_tc_i]
|
460 |
|
|
<= dbg_gate_tap_cnt[dbg_gate_tc_i] + 1;
|
461 |
|
|
else
|
462 |
|
|
dbg_gate_tap_cnt[dbg_gate_tc_i]
|
463 |
|
|
<= dbg_gate_tap_cnt[dbg_gate_tc_i] - 1;
|
464 |
|
|
end
|
465 |
|
|
endgenerate
|
466 |
|
|
|
467 |
|
|
assign dbg_calib_done = calib_done;
|
468 |
|
|
assign dbg_calib_err = calib_err;
|
469 |
|
|
assign dbg_calib_rd_data_sel = cal2_rd_data_sel;
|
470 |
|
|
assign dbg_calib_rden_dly = rden_dly;
|
471 |
|
|
assign dbg_calib_gate_dly = gate_dly;
|
472 |
|
|
|
473 |
|
|
//***************************************************************************
|
474 |
|
|
// Read data pipelining, and read data "ISERDES" data width expansion
|
475 |
|
|
//***************************************************************************
|
476 |
|
|
|
477 |
|
|
// For all data bits, register incoming capture data to slow clock to improve
|
478 |
|
|
// timing. Adding single pipeline stage does not affect functionality (as
|
479 |
|
|
// long as we make sure to wait extra clock cycle after changing DQ IDELAY)
|
480 |
|
|
// Also note in this case that we're "missing" every other clock cycle's
|
481 |
|
|
// worth of data capture since we're sync'ing to the slow clock. This is
|
482 |
|
|
// fine for stage 1 and stage 2 cal, but not for stage 3 and 4 (see below
|
483 |
|
|
// for different circuit to handle those stages)
|
484 |
|
|
always @(posedge clkdiv) begin
|
485 |
|
|
rd_data_rise_1x_r <= rd_data_rise;
|
486 |
|
|
rd_data_fall_1x_r <= rd_data_fall;
|
487 |
|
|
end
|
488 |
|
|
|
489 |
|
|
// For every DQ_PER_DQS bit, generate what is essentially a ISERDES-type
|
490 |
|
|
// data width expander. Will need this for stage 3 and 4 cal, where we need
|
491 |
|
|
// to compare data over consecutive clock cycles. We can also use this for
|
492 |
|
|
// stage 2 as well (stage 2 doesn't require every bit to be looked at, only
|
493 |
|
|
// one bit per DQS group)
|
494 |
|
|
genvar rdd_i;
|
495 |
|
|
generate
|
496 |
|
|
for (rdd_i = 0; rdd_i < DQS_WIDTH; rdd_i = rdd_i + 1) begin: gen_rdd
|
497 |
|
|
// first stage: keep data in fast clk domain. Store data over two
|
498 |
|
|
// consecutive clock cycles for rise/fall data for proper transfer
|
499 |
|
|
// to slow clock domain
|
500 |
|
|
always @(posedge clk) begin
|
501 |
|
|
rd_data_rise_2x_r[rdd_i] <= rd_data_rise[(rdd_i*DQ_PER_DQS)];
|
502 |
|
|
rd_data_fall_2x_r[rdd_i] <= rd_data_fall[(rdd_i*DQ_PER_DQS)];
|
503 |
|
|
end
|
504 |
|
|
// second stage, register first stage to slow clock domain, 2nd stage
|
505 |
|
|
// consists of both these flops, and the rd_data_rise_1x_r flops
|
506 |
|
|
always @(posedge clkdiv) begin
|
507 |
|
|
rd_data_rise_1x_r1[rdd_i] <= rd_data_rise_2x_r[rdd_i];
|
508 |
|
|
rd_data_fall_1x_r1[rdd_i] <= rd_data_fall_2x_r[rdd_i];
|
509 |
|
|
end
|
510 |
|
|
// now we have four outputs - representing rise/fall outputs over last
|
511 |
|
|
// 2 fast clock cycles. However, the ordering these represent can either
|
512 |
|
|
// be: (1) Q2 = data @ time = n, Q1 = data @ time = n+1, or (2)
|
513 |
|
|
// Q2 = data @ time = n - 1, Q1 = data @ time = n (and data at [Q1,Q2]
|
514 |
|
|
// is "staggered") - leave it up to the stage of calibration using this
|
515 |
|
|
// to figure out which is which, if they care at all (e.g. stage 2 cal
|
516 |
|
|
// doesn't care about the ordering)
|
517 |
|
|
assign rd_data_rise_chk_q1[rdd_i]
|
518 |
|
|
= rd_data_rise_1x_r[(rdd_i*DQ_PER_DQS)];
|
519 |
|
|
assign rd_data_rise_chk_q2[rdd_i]
|
520 |
|
|
= rd_data_rise_1x_r1[rdd_i];
|
521 |
|
|
assign rd_data_fall_chk_q1[rdd_i]
|
522 |
|
|
= rd_data_fall_1x_r[(rdd_i*DQ_PER_DQS)];
|
523 |
|
|
assign rd_data_fall_chk_q2[rdd_i]
|
524 |
|
|
= rd_data_fall_1x_r1[rdd_i];
|
525 |
|
|
end
|
526 |
|
|
endgenerate
|
527 |
|
|
|
528 |
|
|
//*****************************************************************
|
529 |
|
|
// Outputs of these simplified ISERDES circuits then feed MUXes based on
|
530 |
|
|
// which DQ the current calibration algorithm needs to look at
|
531 |
|
|
//*****************************************************************
|
532 |
|
|
|
533 |
|
|
// generate MUX control; assume that adding an extra pipeline stage isn't
|
534 |
|
|
// an issue - whatever stage cal logic is using output of MUX will wait
|
535 |
|
|
// enough time after changing it
|
536 |
|
|
always @(posedge clkdiv) begin
|
537 |
|
|
(* full_case, parallel_case *) case (calib_done[2:0])
|
538 |
|
|
3'b001: rdd_mux_sel <= next_count_dqs;
|
539 |
|
|
3'b011: rdd_mux_sel <= count_rden;
|
540 |
|
|
3'b111: rdd_mux_sel <= next_count_gate;
|
541 |
|
|
endcase
|
542 |
|
|
end
|
543 |
|
|
|
544 |
|
|
always @(posedge clkdiv) begin
|
545 |
|
|
rdd_rise_q1 <= rd_data_rise_chk_q1[rdd_mux_sel];
|
546 |
|
|
rdd_rise_q2 <= rd_data_rise_chk_q2[rdd_mux_sel];
|
547 |
|
|
rdd_fall_q1 <= rd_data_fall_chk_q1[rdd_mux_sel];
|
548 |
|
|
rdd_fall_q2 <= rd_data_fall_chk_q2[rdd_mux_sel];
|
549 |
|
|
end
|
550 |
|
|
|
551 |
|
|
//***************************************************************************
|
552 |
|
|
// Demultiplexor to control (reset, increment, decrement) IDELAY tap values
|
553 |
|
|
// For DQ:
|
554 |
|
|
// STG1: for per-bit-deskew, only inc/dec the current DQ. For non-per
|
555 |
|
|
// deskew, increment all bits in the current DQS set
|
556 |
|
|
// STG2: inc/dec all DQ's in the current DQS set.
|
557 |
|
|
// NOTE: Nice to add some error checking logic here (or elsewhere in the
|
558 |
|
|
// code) to check if logic attempts to overflow tap value
|
559 |
|
|
//***************************************************************************
|
560 |
|
|
|
561 |
|
|
// don't use DLYRST to reset value of IDELAY after reset. Need to change this
|
562 |
|
|
// if we want to allow user to recalibrate after initial reset
|
563 |
|
|
always @(posedge clkdiv)
|
564 |
|
|
if (rstdiv) begin
|
565 |
|
|
dlyrst_dq <= 1'b1;
|
566 |
|
|
dlyrst_dqs <= 1'b1;
|
567 |
|
|
end else begin
|
568 |
|
|
dlyrst_dq <= 1'b0;
|
569 |
|
|
dlyrst_dqs <= 1'b0;
|
570 |
|
|
end
|
571 |
|
|
|
572 |
|
|
always @(posedge clkdiv) begin
|
573 |
|
|
if (rstdiv) begin
|
574 |
|
|
dlyce_dq <= 'b0;
|
575 |
|
|
dlyinc_dq <= 'b0;
|
576 |
|
|
dlyce_dqs <= 'b0;
|
577 |
|
|
dlyinc_dqs <= 'b0;
|
578 |
|
|
end else begin
|
579 |
|
|
dlyce_dq <= 'b0;
|
580 |
|
|
dlyinc_dq <= 'b0;
|
581 |
|
|
dlyce_dqs <= 'b0;
|
582 |
|
|
dlyinc_dqs <= 'b0;
|
583 |
|
|
|
584 |
|
|
// stage 1 cal: change only specified DQ
|
585 |
|
|
if (cal1_dlyce_dq) begin
|
586 |
|
|
if (SIM_ONLY == 0) begin
|
587 |
|
|
dlyce_dq[count_dq] <= 1'b1;
|
588 |
|
|
dlyinc_dq[count_dq] <= cal1_dlyinc_dq;
|
589 |
|
|
end else begin
|
590 |
|
|
// if simulation, then calibrate only first DQ, apply results
|
591 |
|
|
// to all DQs (i.e. assume delay on all DQs is the same)
|
592 |
|
|
for (i = 0; i < DQ_WIDTH; i = i + 1) begin: loop_sim_dq_dly
|
593 |
|
|
dlyce_dq[i] <= 1'b1;
|
594 |
|
|
dlyinc_dq[i] <= cal1_dlyinc_dq;
|
595 |
|
|
end
|
596 |
|
|
end
|
597 |
|
|
end else if (cal2_dlyce_dqs) begin
|
598 |
|
|
// stage 2 cal: change DQS and all corresponding DQ's
|
599 |
|
|
if (SIM_ONLY == 0) begin
|
600 |
|
|
dlyce_dqs[count_dqs] <= 1'b1;
|
601 |
|
|
dlyinc_dqs[count_dqs] <= cal2_dlyinc_dqs;
|
602 |
|
|
for (i = 0; i < DQ_PER_DQS; i = i + 1) begin: loop_dqs_dly
|
603 |
|
|
dlyce_dq[(DQ_PER_DQS*count_dqs)+i] <= 1'b1;
|
604 |
|
|
dlyinc_dq[(DQ_PER_DQS*count_dqs)+i] <= cal2_dlyinc_dqs;
|
605 |
|
|
end
|
606 |
|
|
end else begin
|
607 |
|
|
for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_dqs_dly
|
608 |
|
|
// if simulation, then calibrate only first DQS
|
609 |
|
|
dlyce_dqs[i] <= 1'b1;
|
610 |
|
|
dlyinc_dqs[i] <= cal2_dlyinc_dqs;
|
611 |
|
|
for (j = 0; j < DQ_PER_DQS; j = j + 1) begin: loop_sim_dq_dqs_dly
|
612 |
|
|
dlyce_dq[(DQ_PER_DQS*i)+j] <= 1'b1;
|
613 |
|
|
dlyinc_dq[(DQ_PER_DQS*i)+j] <= cal2_dlyinc_dqs;
|
614 |
|
|
end
|
615 |
|
|
end
|
616 |
|
|
end
|
617 |
|
|
end else if (DEBUG_EN != 0) begin
|
618 |
|
|
// DEBUG: allow user to vary IDELAY tap settings
|
619 |
|
|
// For DQ IDELAY taps
|
620 |
|
|
if (dbg_idel_up_all || dbg_idel_down_all ||
|
621 |
|
|
dbg_sel_all_idel_dq) begin
|
622 |
|
|
for (x = 0; x < DQ_WIDTH; x = x + 1) begin: loop_dly_inc_dq
|
623 |
|
|
dlyce_dq[x] <= dbg_idel_up_all | dbg_idel_down_all |
|
624 |
|
|
dbg_idel_up_dq | dbg_idel_down_dq;
|
625 |
|
|
dlyinc_dq[x] <= dbg_idel_up_all | dbg_idel_up_dq;
|
626 |
|
|
end
|
627 |
|
|
end else begin
|
628 |
|
|
dlyce_dq <= 'b0;
|
629 |
|
|
dlyce_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq |
|
630 |
|
|
dbg_idel_down_dq;
|
631 |
|
|
dlyinc_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq;
|
632 |
|
|
end
|
633 |
|
|
// For DQS IDELAY taps
|
634 |
|
|
if (dbg_idel_up_all || dbg_idel_down_all ||
|
635 |
|
|
dbg_sel_all_idel_dqs) begin
|
636 |
|
|
for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_dqs
|
637 |
|
|
dlyce_dqs[x] <= dbg_idel_up_all | dbg_idel_down_all |
|
638 |
|
|
dbg_idel_up_dqs | dbg_idel_down_dqs;
|
639 |
|
|
dlyinc_dqs[x] <= dbg_idel_up_all | dbg_idel_up_dqs;
|
640 |
|
|
end
|
641 |
|
|
end else begin
|
642 |
|
|
dlyce_dqs <= 'b0;
|
643 |
|
|
dlyce_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs |
|
644 |
|
|
dbg_idel_down_dqs;
|
645 |
|
|
dlyinc_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs;
|
646 |
|
|
end
|
647 |
|
|
end
|
648 |
|
|
end
|
649 |
|
|
end
|
650 |
|
|
|
651 |
|
|
// GATE synchronization is handled directly by Stage 4 calibration FSM
|
652 |
|
|
always @(posedge clkdiv)
|
653 |
|
|
if (rstdiv) begin
|
654 |
|
|
dlyrst_gate <= {DQS_WIDTH{1'b1}};
|
655 |
|
|
dlyce_gate <= {DQS_WIDTH{1'b0}};
|
656 |
|
|
dlyinc_gate <= {DQS_WIDTH{1'b0}};
|
657 |
|
|
end else begin
|
658 |
|
|
dlyrst_gate <= {DQS_WIDTH{1'b0}};
|
659 |
|
|
dlyce_gate <= {DQS_WIDTH{1'b0}};
|
660 |
|
|
dlyinc_gate <= {DQS_WIDTH{1'b0}};
|
661 |
|
|
|
662 |
|
|
if (cal4_dlyrst_gate) begin
|
663 |
|
|
if (SIM_ONLY == 0)
|
664 |
|
|
dlyrst_gate[count_gate] <= 1'b1;
|
665 |
|
|
else
|
666 |
|
|
for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly_rst
|
667 |
|
|
dlyrst_gate[i] <= 1'b1;
|
668 |
|
|
end
|
669 |
|
|
end
|
670 |
|
|
|
671 |
|
|
if (cal4_dlyce_gate) begin
|
672 |
|
|
if (SIM_ONLY == 0) begin
|
673 |
|
|
dlyce_gate[count_gate] <= 1'b1;
|
674 |
|
|
dlyinc_gate[count_gate] <= cal4_dlyinc_gate;
|
675 |
|
|
end else begin
|
676 |
|
|
// if simulation, then calibrate only first gate
|
677 |
|
|
for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly
|
678 |
|
|
dlyce_gate[i] <= 1'b1;
|
679 |
|
|
dlyinc_gate[i] <= cal4_dlyinc_gate;
|
680 |
|
|
end
|
681 |
|
|
end
|
682 |
|
|
end else if (DEBUG_EN != 0) begin
|
683 |
|
|
// DEBUG: allow user to vary IDELAY tap settings
|
684 |
|
|
if (dbg_idel_up_all || dbg_idel_down_all ||
|
685 |
|
|
dbg_sel_all_idel_gate) begin
|
686 |
|
|
for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_gate
|
687 |
|
|
dlyce_gate[x] <= dbg_idel_up_all | dbg_idel_down_all |
|
688 |
|
|
dbg_idel_up_gate | dbg_idel_down_gate;
|
689 |
|
|
dlyinc_gate[x] <= dbg_idel_up_all | dbg_idel_up_gate;
|
690 |
|
|
end
|
691 |
|
|
end else begin
|
692 |
|
|
dlyce_gate <= {DQS_WIDTH{1'b0}};
|
693 |
|
|
dlyce_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate |
|
694 |
|
|
dbg_idel_down_gate;
|
695 |
|
|
dlyinc_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate;
|
696 |
|
|
end
|
697 |
|
|
end
|
698 |
|
|
end
|
699 |
|
|
|
700 |
|
|
//***************************************************************************
|
701 |
|
|
// signal to tell calibration state machines to wait and give IDELAY time to
|
702 |
|
|
// settle after it's value is changed (both time for IDELAY chain to settle,
|
703 |
|
|
// and for settled output to propagate through ISERDES). For general use: use
|
704 |
|
|
// for any calibration state machines that modify any IDELAY.
|
705 |
|
|
// Should give at least enough time for IDELAY output to settle (technically
|
706 |
|
|
// for V5, this should be "glitchless" when IDELAY taps are changed, so don't
|
707 |
|
|
// need any time here), and also time for new data to propagate through both
|
708 |
|
|
// ISERDES and the "RDD" MUX + associated pipelining
|
709 |
|
|
// For now, give very "generous" delay - doesn't really matter since only
|
710 |
|
|
// needed during calibration
|
711 |
|
|
//***************************************************************************
|
712 |
|
|
|
713 |
|
|
// determine if calibration polarity has changed
|
714 |
|
|
always @(posedge clkdiv)
|
715 |
|
|
cal2_rd_data_sel_r <= cal2_rd_data_sel;
|
716 |
|
|
|
717 |
|
|
assign cal2_rd_data_sel_edge = |(cal2_rd_data_sel ^ cal2_rd_data_sel_r);
|
718 |
|
|
|
719 |
|
|
// combine requests to modify any of the IDELAYs into one. Also when second
|
720 |
|
|
// stage capture "edge" polarity is changed (IDELAY isn't changed in this
|
721 |
|
|
// case, but use the same counter to stall cal logic)
|
722 |
|
|
assign dlyce_or = cal1_dlyce_dq |
|
723 |
|
|
cal2_dlyce_dqs |
|
724 |
|
|
cal2_rd_data_sel_edge |
|
725 |
|
|
cal4_dlyce_gate |
|
726 |
|
|
cal4_dlyrst_gate;
|
727 |
|
|
|
728 |
|
|
// SYN_NOTE: Can later recode to avoid combinational path
|
729 |
|
|
assign idel_set_wait = dlyce_or || (idel_set_cnt != IDEL_SET_VAL);
|
730 |
|
|
|
731 |
|
|
always @(posedge clkdiv)
|
732 |
|
|
if (rstdiv)
|
733 |
|
|
idel_set_cnt <= 4'b0000;
|
734 |
|
|
else if (dlyce_or)
|
735 |
|
|
idel_set_cnt <= 4'b0000;
|
736 |
|
|
else if (idel_set_cnt != IDEL_SET_VAL)
|
737 |
|
|
idel_set_cnt <= idel_set_cnt + 1;
|
738 |
|
|
|
739 |
|
|
// generate request to PHY_INIT logic to issue auto-refresh
|
740 |
|
|
// used by certain states to force prech/auto-refresh part way through
|
741 |
|
|
// calibration to avoid a tRAS violation (which will happen if that
|
742 |
|
|
// stage of calibration lasts long enough). This signal must meet the
|
743 |
|
|
// following requirements: (1) only transition from 0->1 when the refresh
|
744 |
|
|
// request is needed, (2) stay at 1 and only transition 1->0 when
|
745 |
|
|
// CALIB_REF_DONE is asserted
|
746 |
|
|
always @(posedge clkdiv)
|
747 |
|
|
if (rstdiv)
|
748 |
|
|
calib_ref_req <= 1'b0;
|
749 |
|
|
else
|
750 |
|
|
calib_ref_req <= cal1_ref_req | cal2_ref_req | cal4_ref_req;
|
751 |
|
|
|
752 |
|
|
// stage 1 calibration requests auto-refresh every 4 bits
|
753 |
|
|
generate
|
754 |
|
|
if (DQ_BITS < 2) begin: gen_cal1_refresh_dq_lte4
|
755 |
|
|
assign cal1_refresh = 1'b0;
|
756 |
|
|
end else begin: gen_cal1_refresh_dq_gt4
|
757 |
|
|
assign cal1_refresh = (next_count_dq[1:0] == 2'b00);
|
758 |
|
|
end
|
759 |
|
|
endgenerate
|
760 |
|
|
|
761 |
|
|
//***************************************************************************
|
762 |
|
|
// First stage calibration: DQ-DQS
|
763 |
|
|
// Definitions:
|
764 |
|
|
// edge: detected when varying IDELAY, and current capture data != prev
|
765 |
|
|
// capture data
|
766 |
|
|
// valid bit window: detected when current capture data == prev capture
|
767 |
|
|
// data for more than half the bit time
|
768 |
|
|
// starting conditions for DQS-DQ phase:
|
769 |
|
|
// case 1: when DQS starts somewhere in rising edge bit window, or
|
770 |
|
|
// on the right edge of the rising bit window.
|
771 |
|
|
// case 2: when DQS starts somewhere in falling edge bit window, or
|
772 |
|
|
// on the right edge of the falling bit window.
|
773 |
|
|
// Algorithm Description:
|
774 |
|
|
// 1. Increment DQ IDELAY until we find an edge.
|
775 |
|
|
// 2. While we're finding the first edge, note whether a valid bit window
|
776 |
|
|
// has been detected before we found an edge. If so, then figure out if
|
777 |
|
|
// this is the rising or falling bit window. If rising, then our starting
|
778 |
|
|
// DQS-DQ phase is case 1. If falling, then it's case 2. If don't detect
|
779 |
|
|
// a valid bit window, then we must have started on the edge of a window.
|
780 |
|
|
// Need to wait until later on to decide which case we are.
|
781 |
|
|
// - Store FIRST_EDGE IDELAY value
|
782 |
|
|
// 3. Now look for second edge.
|
783 |
|
|
// 4. While we're finding the second edge, note whether valid bit window
|
784 |
|
|
// is detected. If so, then use to, along with results from (2) to figure
|
785 |
|
|
// out what the starting case is. If in rising bit window, then we're in
|
786 |
|
|
// case 2. If falling, then case 1.
|
787 |
|
|
// - Store SECOND_EDGE IDELAY value
|
788 |
|
|
// NOTES:
|
789 |
|
|
// a. Finding two edges allows us to calculate the bit time (although
|
790 |
|
|
// not the "same" bit time polarity - need to investigate this
|
791 |
|
|
// more).
|
792 |
|
|
// b. If we run out of taps looking for the second edge, then the bit
|
793 |
|
|
// time must be too long (>= 2.5ns, and DQS-DQ starting phase must be
|
794 |
|
|
// case 1).
|
795 |
|
|
// 5. Calculate absolute amount to delay DQ as:
|
796 |
|
|
// If second edge found, and case 1:
|
797 |
|
|
// - DQ_IDELAY = FIRST_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
|
798 |
|
|
// If second edge found, and case 2:
|
799 |
|
|
// - DQ_IDELAY = SECOND_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
|
800 |
|
|
// If second edge not found, then need to make an approximation on
|
801 |
|
|
// how much to shift by (should be okay, because we have more timing
|
802 |
|
|
// margin):
|
803 |
|
|
// - DQ_IDELAY = FIRST_EDGE - 0.5 * (bit_time)
|
804 |
|
|
// NOTE: Does this account for either case 1 or case 2?????
|
805 |
|
|
// NOTE: It's also possible even when we find the second edge, that
|
806 |
|
|
// to instead just use half the bit time to subtract from either
|
807 |
|
|
// FIRST or SECOND_EDGE. Finding the actual bit time (which is
|
808 |
|
|
// what (SECOND_EDGE - FIRST_EDGE) is, is slightly more accurate,
|
809 |
|
|
// since it takes into account duty cycle distortion.
|
810 |
|
|
// 6. Repeat for each DQ in current DQS set.
|
811 |
|
|
//***************************************************************************
|
812 |
|
|
|
813 |
|
|
//*****************************************************************
|
814 |
|
|
// for first stage calibration - used for checking if DQS is aligned to the
|
815 |
|
|
// particular DQ, such that we're in the data valid window. Basically, this
|
816 |
|
|
// is one giant MUX.
|
817 |
|
|
// = [falling data, rising data]
|
818 |
|
|
// = [0, 1] = rising DQS aligned in proper (rising edge) bit window
|
819 |
|
|
// = [1, 0] = rising DQS aligned in wrong (falling edge) bit window
|
820 |
|
|
// = [0, 0], or [1,1] = in uncertain region between windows
|
821 |
|
|
//*****************************************************************
|
822 |
|
|
|
823 |
|
|
// SYN_NOTE: May have to split this up into multiple levels - MUX can get
|
824 |
|
|
// very wide - as wide as the data bus width
|
825 |
|
|
always @(posedge clkdiv)
|
826 |
|
|
cal1_data_chk_r <= {rd_data_fall_1x_r[next_count_dq],
|
827 |
|
|
rd_data_rise_1x_r[next_count_dq]};
|
828 |
|
|
|
829 |
|
|
//*****************************************************************
|
830 |
|
|
// determine when an edge has occurred - when either the current value
|
831 |
|
|
// is different from the previous latched value or when the DATA_CHK
|
832 |
|
|
// outputs are the same (rare, but indicates that we're at an edge)
|
833 |
|
|
// This is only valid when the IDELAY output and propagation of the
|
834 |
|
|
// data through the capture flops has had a chance to settle out.
|
835 |
|
|
//*****************************************************************
|
836 |
|
|
|
837 |
|
|
// write CAL1_DETECT_EDGE and CAL1_DETECT_STABLE in such a way that
|
838 |
|
|
// if X's are captured on the bus during functional simulation, that
|
839 |
|
|
// the logic will register this as an edge detected. Do this to allow
|
840 |
|
|
// use of this HDL with Denali memory models (Denali models drive DQ
|
841 |
|
|
// to X's on both edges of the data valid window to simulate jitter)
|
842 |
|
|
// This is only done for functional simulation purposes. **Should not**
|
843 |
|
|
// make the final synthesized logic more complicated, but it does make
|
844 |
|
|
// the HDL harder to understand b/c we have to "phrase" the logic
|
845 |
|
|
// slightly differently than when not worrying about X's
|
846 |
|
|
always @(*) begin
|
847 |
|
|
// no edge found if: (1) we have recorded prev edge, and rise
|
848 |
|
|
// data == fall data, (2) we haven't yet recorded prev edge, but
|
849 |
|
|
// rise/fall data is equal to either [0,1] or [1,0] (i.e. rise/fall
|
850 |
|
|
// data isn't either X's, or [0,0] or [1,1], which indicates we're
|
851 |
|
|
// in the middle of an edge, since normally rise != fall data for stg1)
|
852 |
|
|
if ((cal1_data_chk_last_valid &&
|
853 |
|
|
(cal1_data_chk_r == cal1_data_chk_last)) ||
|
854 |
|
|
(!cal1_data_chk_last_valid &&
|
855 |
|
|
((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10))))
|
856 |
|
|
cal1_detect_edge = 1'b0;
|
857 |
|
|
else
|
858 |
|
|
cal1_detect_edge = 1'b1;
|
859 |
|
|
end
|
860 |
|
|
|
861 |
|
|
always @(*) begin
|
862 |
|
|
// assert if we've found a region where data valid window is stable
|
863 |
|
|
// over consecutive IDELAY taps, and either rise/fall = [1,0], or [0,1]
|
864 |
|
|
if ((cal1_data_chk_last_valid &&
|
865 |
|
|
(cal1_data_chk_r == cal1_data_chk_last)) &&
|
866 |
|
|
((cal1_data_chk_r == 2'b01) || (cal1_data_chk_r == 2'b10)))
|
867 |
|
|
cal1_detect_stable <= 1'b1;
|
868 |
|
|
else
|
869 |
|
|
cal1_detect_stable <= 1'b0;
|
870 |
|
|
end
|
871 |
|
|
|
872 |
|
|
//*****************************************************************
|
873 |
|
|
// Find valid window: keep track of how long we've been in the same data
|
874 |
|
|
// window. If it's been long enough, then declare that we've found a valid
|
875 |
|
|
// window. Also returns whether we found a rising or falling window (only
|
876 |
|
|
// valid when found_window is asserted)
|
877 |
|
|
//*****************************************************************
|
878 |
|
|
|
879 |
|
|
always @(posedge clkdiv) begin
|
880 |
|
|
if (cal1_state == CAL1_INIT) begin
|
881 |
|
|
cal1_window_cnt <= 4'b0000;
|
882 |
|
|
cal1_found_window <= 1'b0;
|
883 |
|
|
cal1_found_rising <= 1'bx;
|
884 |
|
|
end else if (!cal1_data_chk_last_valid) begin
|
885 |
|
|
// if we haven't stored a previous value of CAL1_DATA_CHK (or it got
|
886 |
|
|
// invalidated because we detected an edge, and are now looking for the
|
887 |
|
|
// second edge), then make sure FOUND_WINDOW deasserted on following
|
888 |
|
|
// clock edge (to avoid finding a false window immediately after finding
|
889 |
|
|
// an edge). Note that because of jitter, it's possible to not find an
|
890 |
|
|
// edge at the end of the IDELAY increment settling time, but to find an
|
891 |
|
|
// edge on the next clock cycle (e.g. during CAL1_FIND_FIRST_EDGE)
|
892 |
|
|
cal1_window_cnt <= 4'b0000;
|
893 |
|
|
cal1_found_window <= 1'b0;
|
894 |
|
|
cal1_found_rising <= 1'bx;
|
895 |
|
|
end else if (((cal1_state == CAL1_FIRST_EDGE_IDEL_WAIT) ||
|
896 |
|
|
(cal1_state == CAL1_SECOND_EDGE_IDEL_WAIT)) &&
|
897 |
|
|
!idel_set_wait) begin
|
898 |
|
|
// while finding the first and second edges, see if we can detect a
|
899 |
|
|
// stable bit window (occurs over MIN_WIN_SIZE number of taps). If
|
900 |
|
|
// so, then we're away from an edge, and can conclusively determine the
|
901 |
|
|
// starting DQS-DQ phase.
|
902 |
|
|
if (cal1_detect_stable) begin
|
903 |
|
|
cal1_window_cnt <= cal1_window_cnt + 1;
|
904 |
|
|
if (cal1_window_cnt == MIN_WIN_SIZE-1) begin
|
905 |
|
|
cal1_found_window <= 1'b1;
|
906 |
|
|
if (cal1_data_chk_r == 2'b01)
|
907 |
|
|
cal1_found_rising <= 1'b1;
|
908 |
|
|
else
|
909 |
|
|
cal1_found_rising <= 1'b0;
|
910 |
|
|
end
|
911 |
|
|
end else begin
|
912 |
|
|
// otherwise, we're not in a data valid window, reset the window
|
913 |
|
|
// counter, and indicate we're not currently in window. This should
|
914 |
|
|
// happen by design at least once after finding the first edge.
|
915 |
|
|
cal1_window_cnt <= 4'b0000;
|
916 |
|
|
cal1_found_window <= 1'b0;
|
917 |
|
|
cal1_found_rising <= 1'bx;
|
918 |
|
|
end
|
919 |
|
|
end
|
920 |
|
|
end
|
921 |
|
|
|
922 |
|
|
//*****************************************************************
|
923 |
|
|
// keep track of edge tap counts found, and whether we've
|
924 |
|
|
// incremented to the maximum number of taps allowed
|
925 |
|
|
//*****************************************************************
|
926 |
|
|
|
927 |
|
|
always @(posedge clkdiv)
|
928 |
|
|
if (cal1_state == CAL1_INIT) begin
|
929 |
|
|
cal1_idel_tap_limit_hit <= 1'b0;
|
930 |
|
|
cal1_idel_tap_cnt <= 6'b000000;
|
931 |
|
|
end else if (cal1_dlyce_dq) begin
|
932 |
|
|
if (cal1_dlyinc_dq) begin
|
933 |
|
|
cal1_idel_tap_cnt <= cal1_idel_tap_cnt + 1;
|
934 |
|
|
cal1_idel_tap_limit_hit <= (cal1_idel_tap_cnt == 6'b111110);
|
935 |
|
|
end else begin
|
936 |
|
|
cal1_idel_tap_cnt <= cal1_idel_tap_cnt - 1;
|
937 |
|
|
cal1_idel_tap_limit_hit <= 1'b0;
|
938 |
|
|
end
|
939 |
|
|
end
|
940 |
|
|
|
941 |
|
|
//*****************************************************************
|
942 |
|
|
// Pipeline for better timing - amount to decrement by if second
|
943 |
|
|
// edge not found
|
944 |
|
|
//*****************************************************************
|
945 |
|
|
// if only one edge found (possible for low frequencies), then:
|
946 |
|
|
// 1. Assume starting DQS-DQ phase has DQS in DQ window (aka "case 1")
|
947 |
|
|
// 2. We have to decrement by (63 - first_edge_tap_cnt) + (BIT_TIME_TAPS/2)
|
948 |
|
|
// (i.e. decrement by 63-first_edge_tap_cnt to get to right edge of
|
949 |
|
|
// DQ window. Then decrement again by (BIT_TIME_TAPS/2) to get to center
|
950 |
|
|
// of DQ window.
|
951 |
|
|
// 3. Clamp the above value at 63 to ensure we don't underflow IDELAY
|
952 |
|
|
// (note: clamping happens in the CAL1 state machine)
|
953 |
|
|
always @(posedge clkdiv)
|
954 |
|
|
cal1_low_freq_idel_dec
|
955 |
|
|
<= (7'b0111111 - {1'b0, cal1_first_edge_tap_cnt}) +
|
956 |
|
|
(BIT_TIME_TAPS/2);
|
957 |
|
|
|
958 |
|
|
//*****************************************************************
|
959 |
|
|
// Keep track of max taps used during stage 1, use this to limit
|
960 |
|
|
// the number of taps that can be used in stage 2
|
961 |
|
|
//*****************************************************************
|
962 |
|
|
|
963 |
|
|
always @(posedge clkdiv)
|
964 |
|
|
if (rstdiv) begin
|
965 |
|
|
cal1_idel_max_tap <= 6'b000000;
|
966 |
|
|
cal1_idel_max_tap_we <= 1'b0;
|
967 |
|
|
end else begin
|
968 |
|
|
// pipeline latch enable for CAL1_IDEL_MAX_TAP - we have plenty
|
969 |
|
|
// of time, tap count gets updated, then dead cycles waiting for
|
970 |
|
|
// IDELAY output to settle
|
971 |
|
|
cal1_idel_max_tap_we <= (cal1_idel_max_tap < cal1_idel_tap_cnt);
|
972 |
|
|
// record maximum # of taps used for stg 1 cal
|
973 |
|
|
if ((cal1_state == CAL1_DONE) && cal1_idel_max_tap_we)
|
974 |
|
|
cal1_idel_max_tap <= cal1_idel_tap_cnt;
|
975 |
|
|
end
|
976 |
|
|
|
977 |
|
|
//*****************************************************************
|
978 |
|
|
|
979 |
|
|
always @(posedge clkdiv)
|
980 |
|
|
if (rstdiv) begin
|
981 |
|
|
calib_done[0] <= 1'b0;
|
982 |
|
|
calib_done_tmp[0] <= 1'bx;
|
983 |
|
|
calib_err[0] <= 1'b0;
|
984 |
|
|
count_dq <= {DQ_BITS{1'b0}};
|
985 |
|
|
next_count_dq <= {DQ_BITS{1'b0}};
|
986 |
|
|
cal1_bit_time_tap_cnt <= 6'bxxxxxx;
|
987 |
|
|
cal1_data_chk_last <= 2'bxx;
|
988 |
|
|
cal1_data_chk_last_valid <= 1'bx;
|
989 |
|
|
cal1_dlyce_dq <= 1'b0;
|
990 |
|
|
cal1_dlyinc_dq <= 1'b0;
|
991 |
|
|
cal1_dqs_dq_init_phase <= 1'bx;
|
992 |
|
|
cal1_first_edge_done <= 1'bx;
|
993 |
|
|
cal1_found_second_edge <= 1'bx;
|
994 |
|
|
cal1_first_edge_tap_cnt <= 6'bxxxxxx;
|
995 |
|
|
cal1_idel_dec_cnt <= 7'bxxxxxxx;
|
996 |
|
|
cal1_idel_inc_cnt <= 6'bxxxxxx;
|
997 |
|
|
cal1_ref_req <= 1'b0;
|
998 |
|
|
cal1_state <= CAL1_IDLE;
|
999 |
|
|
end else begin
|
1000 |
|
|
// default values for all "pulse" outputs
|
1001 |
|
|
cal1_ref_req <= 1'b0;
|
1002 |
|
|
cal1_dlyce_dq <= 1'b0;
|
1003 |
|
|
cal1_dlyinc_dq <= 1'b0;
|
1004 |
|
|
|
1005 |
|
|
case (cal1_state)
|
1006 |
|
|
CAL1_IDLE: begin
|
1007 |
|
|
count_dq <= {DQ_BITS{1'b0}};
|
1008 |
|
|
next_count_dq <= {DQ_BITS{1'b0}};
|
1009 |
|
|
if (calib_start[0]) begin
|
1010 |
|
|
calib_done[0] <= 1'b0;
|
1011 |
|
|
calib_done_tmp[0] <= 1'b0;
|
1012 |
|
|
cal1_state <= CAL1_INIT;
|
1013 |
|
|
end
|
1014 |
|
|
end
|
1015 |
|
|
|
1016 |
|
|
CAL1_INIT: begin
|
1017 |
|
|
cal1_data_chk_last_valid <= 1'b0;
|
1018 |
|
|
cal1_found_second_edge <= 1'b0;
|
1019 |
|
|
cal1_dqs_dq_init_phase <= 1'b0;
|
1020 |
|
|
cal1_idel_inc_cnt <= 6'b000000;
|
1021 |
|
|
cal1_state <= CAL1_INC_IDEL;
|
1022 |
|
|
end
|
1023 |
|
|
|
1024 |
|
|
// increment DQ IDELAY so that either: (1) DQS starts somewhere in
|
1025 |
|
|
// first rising DQ window, or (2) DQS starts in first falling DQ
|
1026 |
|
|
// window. The amount to shift is frequency dependent (and is either
|
1027 |
|
|
// precalculated by MIG or possibly adjusted by the user)
|
1028 |
|
|
CAL1_INC_IDEL:
|
1029 |
|
|
if ((cal1_idel_inc_cnt == DQ_IDEL_INIT) && !idel_set_wait) begin
|
1030 |
|
|
cal1_state <= CAL1_FIND_FIRST_EDGE;
|
1031 |
|
|
end else if (cal1_idel_inc_cnt != DQ_IDEL_INIT) begin
|
1032 |
|
|
cal1_idel_inc_cnt <= cal1_idel_inc_cnt + 1;
|
1033 |
|
|
cal1_dlyce_dq <= 1'b1;
|
1034 |
|
|
cal1_dlyinc_dq <= 1'b1;
|
1035 |
|
|
end
|
1036 |
|
|
|
1037 |
|
|
// look for first edge
|
1038 |
|
|
CAL1_FIND_FIRST_EDGE: begin
|
1039 |
|
|
// Determine DQS-DQ phase if we can detect enough of a valid window
|
1040 |
|
|
if (cal1_found_window)
|
1041 |
|
|
cal1_dqs_dq_init_phase <= ~cal1_found_rising;
|
1042 |
|
|
// find first edge - if found then record position
|
1043 |
|
|
if (cal1_detect_edge) begin
|
1044 |
|
|
cal1_state <= CAL1_FOUND_FIRST_EDGE_WAIT;
|
1045 |
|
|
cal1_first_edge_done <= 1'b0;
|
1046 |
|
|
cal1_first_edge_tap_cnt <= cal1_idel_tap_cnt;
|
1047 |
|
|
cal1_data_chk_last_valid <= 1'b0;
|
1048 |
|
|
end else begin
|
1049 |
|
|
// otherwise, store the current value of DATA_CHK, increment
|
1050 |
|
|
// DQ IDELAY, and compare again
|
1051 |
|
|
cal1_state <= CAL1_FIRST_EDGE_IDEL_WAIT;
|
1052 |
|
|
cal1_data_chk_last <= cal1_data_chk_r;
|
1053 |
|
|
// avoid comparing against DATA_CHK_LAST for previous iteration
|
1054 |
|
|
cal1_data_chk_last_valid <= 1'b1;
|
1055 |
|
|
cal1_dlyce_dq <= 1'b1;
|
1056 |
|
|
cal1_dlyinc_dq <= 1'b1;
|
1057 |
|
|
end
|
1058 |
|
|
end
|
1059 |
|
|
|
1060 |
|
|
// wait for DQ IDELAY to settle
|
1061 |
|
|
CAL1_FIRST_EDGE_IDEL_WAIT:
|
1062 |
|
|
if (!idel_set_wait)
|
1063 |
|
|
cal1_state <= CAL1_FIND_FIRST_EDGE;
|
1064 |
|
|
|
1065 |
|
|
// delay state between finding first edge and looking for second
|
1066 |
|
|
// edge. Necessary in order to invalidate CAL1_FOUND_WINDOW before
|
1067 |
|
|
// starting to look for second edge
|
1068 |
|
|
CAL1_FOUND_FIRST_EDGE_WAIT:
|
1069 |
|
|
cal1_state <= CAL1_FIND_SECOND_EDGE;
|
1070 |
|
|
|
1071 |
|
|
// Try and find second edge
|
1072 |
|
|
CAL1_FIND_SECOND_EDGE: begin
|
1073 |
|
|
// When looking for 2nd edge, first make sure data stabilized (by
|
1074 |
|
|
// detecting valid data window) - needed to avoid false edges
|
1075 |
|
|
if (cal1_found_window) begin
|
1076 |
|
|
cal1_first_edge_done <= 1'b1;
|
1077 |
|
|
cal1_dqs_dq_init_phase <= cal1_found_rising;
|
1078 |
|
|
end
|
1079 |
|
|
// exit if run out of taps to increment
|
1080 |
|
|
if (cal1_idel_tap_limit_hit)
|
1081 |
|
|
cal1_state <= CAL1_CALC_IDEL;
|
1082 |
|
|
else begin
|
1083 |
|
|
// found second edge, record the current edge count
|
1084 |
|
|
if (cal1_first_edge_done && cal1_detect_edge) begin
|
1085 |
|
|
cal1_state <= CAL1_CALC_IDEL;
|
1086 |
|
|
cal1_found_second_edge <= 1'b1;
|
1087 |
|
|
cal1_bit_time_tap_cnt <= cal1_idel_tap_cnt -
|
1088 |
|
|
cal1_first_edge_tap_cnt + 1;
|
1089 |
|
|
end else begin
|
1090 |
|
|
cal1_state <= CAL1_SECOND_EDGE_IDEL_WAIT;
|
1091 |
|
|
cal1_data_chk_last <= cal1_data_chk_r;
|
1092 |
|
|
cal1_data_chk_last_valid <= 1'b1;
|
1093 |
|
|
cal1_dlyce_dq <= 1'b1;
|
1094 |
|
|
cal1_dlyinc_dq <= 1'b1;
|
1095 |
|
|
end
|
1096 |
|
|
end
|
1097 |
|
|
end
|
1098 |
|
|
|
1099 |
|
|
// wait for DQ IDELAY to settle, then store ISERDES output
|
1100 |
|
|
CAL1_SECOND_EDGE_IDEL_WAIT:
|
1101 |
|
|
if (!idel_set_wait)
|
1102 |
|
|
cal1_state <= CAL1_FIND_SECOND_EDGE;
|
1103 |
|
|
|
1104 |
|
|
// pipeline delay state to calculate amount to decrement DQ IDELAY
|
1105 |
|
|
// NOTE: We're calculating the amount to decrement by, not the
|
1106 |
|
|
// absolute setting for DQ IDELAY
|
1107 |
|
|
CAL1_CALC_IDEL: begin
|
1108 |
|
|
// if two edges found
|
1109 |
|
|
if (cal1_found_second_edge)
|
1110 |
|
|
// case 1: DQS was in DQ window to start with. First edge found
|
1111 |
|
|
// corresponds to left edge of DQ rising window. Backup by 1.5*BT
|
1112 |
|
|
// NOTE: In this particular case, it is possible to decrement
|
1113 |
|
|
// "below 0" in the case where DQS delay is less than 0.5*BT,
|
1114 |
|
|
// need to limit decrement to prevent IDELAY tap underflow
|
1115 |
|
|
if (!cal1_dqs_dq_init_phase)
|
1116 |
|
|
cal1_idel_dec_cnt <= {1'b0, cal1_bit_time_tap_cnt} +
|
1117 |
|
|
{1'b0, (cal1_bit_time_tap_cnt >> 1)};
|
1118 |
|
|
// case 2: DQS was in wrong DQ window (in DQ falling window).
|
1119 |
|
|
// First edge found is right edge of DQ rising window. Second
|
1120 |
|
|
// edge is left edge of DQ rising window. Backup by 0.5*BT
|
1121 |
|
|
else
|
1122 |
|
|
cal1_idel_dec_cnt <= {1'b0, (cal1_bit_time_tap_cnt >> 1)};
|
1123 |
|
|
// if only one edge found - assume will always be case 1 - DQS in
|
1124 |
|
|
// DQS window. Case 2 only possible if path delay on DQS > 5ns
|
1125 |
|
|
else
|
1126 |
|
|
cal1_idel_dec_cnt <= cal1_low_freq_idel_dec;
|
1127 |
|
|
cal1_state <= CAL1_DEC_IDEL;
|
1128 |
|
|
end
|
1129 |
|
|
|
1130 |
|
|
// decrement DQ IDELAY for final adjustment
|
1131 |
|
|
CAL1_DEC_IDEL:
|
1132 |
|
|
// once adjustment is complete, we're done with calibration for
|
1133 |
|
|
// this DQ, now return to IDLE state and repeat for next DQ
|
1134 |
|
|
// Add underflow protection for case of 2 edges found and DQS
|
1135 |
|
|
// starting in DQ window (see comments for above state) - note we
|
1136 |
|
|
// have to take into account delayed value of CAL1_IDEL_TAP_CNT -
|
1137 |
|
|
// gets updated one clock cycle after CAL1_DLYCE/INC_DQ
|
1138 |
|
|
if ((cal1_idel_dec_cnt == 7'b0000000) ||
|
1139 |
|
|
(cal1_dlyce_dq && (cal1_idel_tap_cnt == 6'b000001))) begin
|
1140 |
|
|
cal1_state <= CAL1_DONE;
|
1141 |
|
|
// stop when all DQ's calibrated, or DQ[0] cal'ed (for sim)
|
1142 |
|
|
if ((count_dq == DQ_WIDTH-1) || (SIM_ONLY != 0))
|
1143 |
|
|
calib_done_tmp[0] <= 1'b1;
|
1144 |
|
|
else
|
1145 |
|
|
// need for VHDL simulation to prevent out-of-index error
|
1146 |
|
|
next_count_dq <= count_dq + 1;
|
1147 |
|
|
end else begin
|
1148 |
|
|
// keep decrementing until final tap count reached
|
1149 |
|
|
cal1_idel_dec_cnt <= cal1_idel_dec_cnt - 1;
|
1150 |
|
|
cal1_dlyce_dq <= 1'b1;
|
1151 |
|
|
cal1_dlyinc_dq <= 1'b0;
|
1152 |
|
|
end
|
1153 |
|
|
|
1154 |
|
|
// delay state to allow count_dq and DATA_CHK to point to the next
|
1155 |
|
|
// DQ bit (allows us to potentially begin checking for an edge on
|
1156 |
|
|
// next DQ right away).
|
1157 |
|
|
CAL1_DONE:
|
1158 |
|
|
if (!idel_set_wait) begin
|
1159 |
|
|
count_dq <= next_count_dq;
|
1160 |
|
|
if (calib_done_tmp[0]) begin
|
1161 |
|
|
calib_done[0] <= 1'b1;
|
1162 |
|
|
cal1_state <= CAL1_IDLE;
|
1163 |
|
|
end else begin
|
1164 |
|
|
// request auto-refresh after every 8-bits calibrated to
|
1165 |
|
|
// avoid tRAS violation
|
1166 |
|
|
if (cal1_refresh) begin
|
1167 |
|
|
cal1_ref_req <= 1'b1;
|
1168 |
|
|
if (calib_ref_done)
|
1169 |
|
|
cal1_state <= CAL1_INIT;
|
1170 |
|
|
end else
|
1171 |
|
|
// if no need this time for refresh, proceed to next bit
|
1172 |
|
|
cal1_state <= CAL1_INIT;
|
1173 |
|
|
end
|
1174 |
|
|
end
|
1175 |
|
|
endcase
|
1176 |
|
|
end
|
1177 |
|
|
|
1178 |
|
|
//***************************************************************************
|
1179 |
|
|
// Second stage calibration: DQS-FPGA Clock
|
1180 |
|
|
// Algorithm Description:
|
1181 |
|
|
// 1. Assumes a training pattern that will produce a pattern oscillating at
|
1182 |
|
|
// half the core clock frequency each on rise and fall outputs, and such
|
1183 |
|
|
// that rise and fall outputs are 180 degrees out of phase from each
|
1184 |
|
|
// other. Note that since the calibration logic runs at half the speed
|
1185 |
|
|
// of the interface, expect that data sampled with the slow clock always
|
1186 |
|
|
// to be constant (either always = 1, or = 0, and rise data != fall data)
|
1187 |
|
|
// unless we cross the edge of the data valid window
|
1188 |
|
|
// 2. Start by setting RD_DATA_SEL = 0. This selects the rising capture data
|
1189 |
|
|
// sync'ed to rising edge of core clock, and falling edge data sync'ed
|
1190 |
|
|
// to falling edge of core clock
|
1191 |
|
|
// 3. Start looking for an edge. An edge is defined as either: (1) a
|
1192 |
|
|
// change in capture value or (2) an invalid capture value (e.g. rising
|
1193 |
|
|
// data != falling data for that same clock cycle).
|
1194 |
|
|
// 4. If an edge is found, go to step (6). If edge hasn't been found, then
|
1195 |
|
|
// set RD_DATA_SEL = 1, and try again.
|
1196 |
|
|
// 5. If no edge is found, then increment IDELAY and return to step (3)
|
1197 |
|
|
// 6. If an edge if found, then invert RD_DATA_SEL - this shifts the
|
1198 |
|
|
// capture point 180 degrees from the edge of the window (minus duty
|
1199 |
|
|
// cycle distortion, delay skew between rising/falling edge capture
|
1200 |
|
|
// paths, etc.)
|
1201 |
|
|
// 7. If no edge is found by CAL2_IDEL_TAP_LIMIT (= 63 - # taps used for
|
1202 |
|
|
// stage 1 calibration), then decrement IDELAY (without reinverting
|
1203 |
|
|
// RD_DATA_SEL) by CAL2_IDEL_TAP_LIMIT/2. This guarantees we at least
|
1204 |
|
|
// have CAL2_IDEL_TAP_LIMIT/2 of slack both before and after the
|
1205 |
|
|
// capture point (not optimal, but best we can do not having found an
|
1206 |
|
|
// of the window). This happens only for very low frequencies.
|
1207 |
|
|
// 8. Repeat for each DQS group.
|
1208 |
|
|
// NOTE: Step 6 is not optimal. A better (and perhaps more complicated)
|
1209 |
|
|
// algorithm might be to find both edges of the data valid window (using
|
1210 |
|
|
// the same polarity of RD_DATA_SEL), and then decrement to the midpoint.
|
1211 |
|
|
//***************************************************************************
|
1212 |
|
|
|
1213 |
|
|
// RD_DATA_SEL should be tagged with FROM-TO (multi-cycle) constraint in
|
1214 |
|
|
// UCF file to relax timing. This net is "pseudo-static" (after value is
|
1215 |
|
|
// changed, FSM waits number of cycles before using the output).
|
1216 |
|
|
// Note that we are adding one clock cycle of delay (to isolate it from
|
1217 |
|
|
// the other logic CAL2_RD_DATA_SEL feeds), make sure FSM waits long
|
1218 |
|
|
// enough to compensate (by default it does, it waits a few cycles more
|
1219 |
|
|
// than minimum # of clock cycles)
|
1220 |
|
|
genvar rd_i;
|
1221 |
|
|
generate
|
1222 |
|
|
for (rd_i = 0; rd_i < DQS_WIDTH; rd_i = rd_i+1) begin: gen_rd_data_sel
|
1223 |
|
|
FDRSE u_ff_rd_data_sel
|
1224 |
|
|
(
|
1225 |
|
|
.Q (rd_data_sel[rd_i]),
|
1226 |
|
|
.C (clkdiv),
|
1227 |
|
|
.CE (1'b1),
|
1228 |
|
|
.D (cal2_rd_data_sel[rd_i]),
|
1229 |
|
|
.R (1'b0),
|
1230 |
|
|
.S (1'b0)
|
1231 |
|
|
) /* synthesis syn_preserve = 1 */
|
1232 |
|
|
/* synthesis syn_replicate = 0 */;
|
1233 |
|
|
end
|
1234 |
|
|
endgenerate
|
1235 |
|
|
|
1236 |
|
|
//*****************************************************************
|
1237 |
|
|
// Max number of taps used for stg2 cal dependent on number of taps
|
1238 |
|
|
// used for stg1 (give priority to stg1 cal - let it use as many
|
1239 |
|
|
// taps as it needs - the remainder of the IDELAY taps can be used
|
1240 |
|
|
// by stg2)
|
1241 |
|
|
//*****************************************************************
|
1242 |
|
|
|
1243 |
|
|
always @(posedge clkdiv)
|
1244 |
|
|
cal2_idel_tap_limit <= 6'b111111 - cal1_idel_max_tap;
|
1245 |
|
|
|
1246 |
|
|
//*****************************************************************
|
1247 |
|
|
// second stage calibration uses readback pattern of "1100" (i.e.
|
1248 |
|
|
// 1st rising = 1, 1st falling = 1, 2nd rising = 0, 2nd falling = 0)
|
1249 |
|
|
// only look at the first bit of each DQS group
|
1250 |
|
|
//*****************************************************************
|
1251 |
|
|
|
1252 |
|
|
// deasserted when captured data has changed since IDELAY was
|
1253 |
|
|
// incremented, or when we're right on the edge (i.e. rise data =
|
1254 |
|
|
// fall data).
|
1255 |
|
|
assign cal2_detect_edge =
|
1256 |
|
|
((((rdd_rise_q1 != cal2_rd_data_rise_last_pos) ||
|
1257 |
|
|
(rdd_fall_q1 != cal2_rd_data_fall_last_pos)) &&
|
1258 |
|
|
cal2_rd_data_last_valid_pos && (!cal2_curr_sel)) ||
|
1259 |
|
|
(((rdd_rise_q1 != cal2_rd_data_rise_last_neg) ||
|
1260 |
|
|
(rdd_fall_q1 != cal2_rd_data_fall_last_neg)) &&
|
1261 |
|
|
cal2_rd_data_last_valid_neg && (cal2_curr_sel)) ||
|
1262 |
|
|
(rdd_rise_q1 != rdd_fall_q1));
|
1263 |
|
|
|
1264 |
|
|
//*****************************************************************
|
1265 |
|
|
// keep track of edge tap counts found, and whether we've
|
1266 |
|
|
// incremented to the maximum number of taps allowed
|
1267 |
|
|
// NOTE: Assume stage 2 cal always increments the tap count (never
|
1268 |
|
|
// decrements) when searching for edge of the data valid window
|
1269 |
|
|
//*****************************************************************
|
1270 |
|
|
|
1271 |
|
|
always @(posedge clkdiv)
|
1272 |
|
|
if (cal2_state == CAL2_INIT) begin
|
1273 |
|
|
cal2_idel_tap_limit_hit <= 1'b0;
|
1274 |
|
|
cal2_idel_tap_cnt <= 6'b000000;
|
1275 |
|
|
end else if (cal2_dlyce_dqs) begin
|
1276 |
|
|
cal2_idel_tap_cnt <= cal2_idel_tap_cnt + 1;
|
1277 |
|
|
cal2_idel_tap_limit_hit <= (cal2_idel_tap_cnt ==
|
1278 |
|
|
cal2_idel_tap_limit - 1);
|
1279 |
|
|
end
|
1280 |
|
|
|
1281 |
|
|
//*****************************************************************
|
1282 |
|
|
|
1283 |
|
|
always @(posedge clkdiv)
|
1284 |
|
|
if (rstdiv) begin
|
1285 |
|
|
calib_done[1] <= 1'b0;
|
1286 |
|
|
calib_done_tmp[1] <= 1'bx;
|
1287 |
|
|
calib_err[1] <= 1'b0;
|
1288 |
|
|
count_dqs <= 'b0;
|
1289 |
|
|
next_count_dqs <= 'b0;
|
1290 |
|
|
cal2_dlyce_dqs <= 1'b0;
|
1291 |
|
|
cal2_dlyinc_dqs <= 1'b0;
|
1292 |
|
|
cal2_idel_dec_cnt <= 6'bxxxxxx;
|
1293 |
|
|
cal2_rd_data_last_valid_neg <= 1'bx;
|
1294 |
|
|
cal2_rd_data_last_valid_pos <= 1'bx;
|
1295 |
|
|
cal2_rd_data_sel <= 'b0;
|
1296 |
|
|
cal2_ref_req <= 1'b0;
|
1297 |
|
|
cal2_state <= CAL2_IDLE;
|
1298 |
|
|
end else begin
|
1299 |
|
|
cal2_ref_req <= 1'b0;
|
1300 |
|
|
cal2_dlyce_dqs <= 1'b0;
|
1301 |
|
|
cal2_dlyinc_dqs <= 1'b0;
|
1302 |
|
|
|
1303 |
|
|
case (cal2_state)
|
1304 |
|
|
CAL2_IDLE: begin
|
1305 |
|
|
count_dqs <= 'b0;
|
1306 |
|
|
next_count_dqs <= 'b0;
|
1307 |
|
|
if (calib_start[1]) begin
|
1308 |
|
|
cal2_rd_data_sel <= {DQS_WIDTH{1'b0}};
|
1309 |
|
|
calib_done[1] <= 1'b0;
|
1310 |
|
|
calib_done_tmp[1] <= 1'b0;
|
1311 |
|
|
cal2_state <= CAL2_INIT;
|
1312 |
|
|
end
|
1313 |
|
|
end
|
1314 |
|
|
|
1315 |
|
|
// Pass through this state every time we calibrate a new DQS group
|
1316 |
|
|
CAL2_INIT: begin
|
1317 |
|
|
cal2_curr_sel <= 1'b0;
|
1318 |
|
|
cal2_rd_data_last_valid_neg <= 1'b0;
|
1319 |
|
|
cal2_rd_data_last_valid_pos <= 1'b0;
|
1320 |
|
|
cal2_state <= CAL2_INIT_IDEL_WAIT;
|
1321 |
|
|
end
|
1322 |
|
|
|
1323 |
|
|
// Stall state only used if calibration run more than once. Can take
|
1324 |
|
|
// this state out if design never runs calibration more than once.
|
1325 |
|
|
// We need this state to give time for MUX'ed data to settle after
|
1326 |
|
|
// resetting RD_DATA_SEL
|
1327 |
|
|
CAL2_INIT_IDEL_WAIT:
|
1328 |
|
|
if (!idel_set_wait)
|
1329 |
|
|
cal2_state <= CAL2_FIND_EDGE_POS;
|
1330 |
|
|
|
1331 |
|
|
// Look for an edge - first check "positive-edge" stage 2 capture
|
1332 |
|
|
CAL2_FIND_EDGE_POS: begin
|
1333 |
|
|
// if found an edge, then switch to the opposite edge stage 2
|
1334 |
|
|
// capture and we're done - no need to decrement the tap count,
|
1335 |
|
|
// since switching to the opposite edge will shift the capture
|
1336 |
|
|
// point by 180 degrees
|
1337 |
|
|
if (cal2_detect_edge) begin
|
1338 |
|
|
cal2_curr_sel <= 1'b1;
|
1339 |
|
|
cal2_state <= CAL2_DONE;
|
1340 |
|
|
// set all DQS groups to be the same for simulation
|
1341 |
|
|
if (SIM_ONLY != 0)
|
1342 |
|
|
cal2_rd_data_sel <= {DQS_WIDTH{1'b1}};
|
1343 |
|
|
else
|
1344 |
|
|
cal2_rd_data_sel[count_dqs] <= 1'b1;
|
1345 |
|
|
if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
|
1346 |
|
|
calib_done_tmp[1] <= 1'b1;
|
1347 |
|
|
else
|
1348 |
|
|
// MIG 2.1: Fix for simulation out-of-bounds error when
|
1349 |
|
|
// SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
|
1350 |
|
|
next_count_dqs <= count_dqs + 1;
|
1351 |
|
|
end else begin
|
1352 |
|
|
// otherwise, invert polarity of stage 2 capture and look for
|
1353 |
|
|
// an edge with opposite capture clock polarity
|
1354 |
|
|
cal2_curr_sel <= 1'b1;
|
1355 |
|
|
cal2_rd_data_sel[count_dqs] <= 1'b1;
|
1356 |
|
|
cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_POS;
|
1357 |
|
|
cal2_rd_data_rise_last_pos <= rdd_rise_q1;
|
1358 |
|
|
cal2_rd_data_fall_last_pos <= rdd_fall_q1;
|
1359 |
|
|
cal2_rd_data_last_valid_pos <= 1'b1;
|
1360 |
|
|
end
|
1361 |
|
|
end
|
1362 |
|
|
|
1363 |
|
|
// Give time to switch from positive-edge to negative-edge second
|
1364 |
|
|
// stage capture (need time for data to filter though pipe stages)
|
1365 |
|
|
CAL2_FIND_EDGE_IDEL_WAIT_POS:
|
1366 |
|
|
if (!idel_set_wait)
|
1367 |
|
|
cal2_state <= CAL2_FIND_EDGE_NEG;
|
1368 |
|
|
|
1369 |
|
|
// Look for an edge - check "negative-edge" stage 2 capture
|
1370 |
|
|
CAL2_FIND_EDGE_NEG:
|
1371 |
|
|
if (cal2_detect_edge) begin
|
1372 |
|
|
cal2_curr_sel <= 1'b0;
|
1373 |
|
|
cal2_state <= CAL2_DONE;
|
1374 |
|
|
// set all DQS groups to be the same for simulation
|
1375 |
|
|
if (SIM_ONLY != 0)
|
1376 |
|
|
cal2_rd_data_sel <= {DQS_WIDTH{1'b0}};
|
1377 |
|
|
else
|
1378 |
|
|
cal2_rd_data_sel[count_dqs] <= 1'b0;
|
1379 |
|
|
if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
|
1380 |
|
|
calib_done_tmp[1] <= 1'b1;
|
1381 |
|
|
else
|
1382 |
|
|
// MIG 2.1: Fix for simulation out-of-bounds error when
|
1383 |
|
|
// SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
|
1384 |
|
|
next_count_dqs <= count_dqs + 1;
|
1385 |
|
|
end else if (cal2_idel_tap_limit_hit) begin
|
1386 |
|
|
// otherwise, if we've run out of taps, then immediately
|
1387 |
|
|
// backoff by half # of taps used - that's our best estimate
|
1388 |
|
|
// for optimal calibration point. Doesn't matter whether which
|
1389 |
|
|
// polarity we're using for capture (we don't know which one is
|
1390 |
|
|
// best to use)
|
1391 |
|
|
cal2_idel_dec_cnt <= {1'b0, cal2_idel_tap_limit[5:1]};
|
1392 |
|
|
cal2_state <= CAL2_DEC_IDEL;
|
1393 |
|
|
if ((count_dqs == DQS_WIDTH-1) || (SIM_ONLY != 0))
|
1394 |
|
|
calib_done_tmp[1] <= 1'b1;
|
1395 |
|
|
else
|
1396 |
|
|
// MIG 2.1: Fix for simulation out-of-bounds error when
|
1397 |
|
|
// SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
|
1398 |
|
|
next_count_dqs <= count_dqs + 1;
|
1399 |
|
|
end else begin
|
1400 |
|
|
// otherwise, increment IDELAY, and start looking for edge again
|
1401 |
|
|
cal2_curr_sel <= 1'b0;
|
1402 |
|
|
cal2_rd_data_sel[count_dqs] <= 1'b0;
|
1403 |
|
|
cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_NEG;
|
1404 |
|
|
cal2_rd_data_rise_last_neg <= rdd_rise_q1;
|
1405 |
|
|
cal2_rd_data_fall_last_neg <= rdd_fall_q1;
|
1406 |
|
|
cal2_rd_data_last_valid_neg <= 1'b1;
|
1407 |
|
|
cal2_dlyce_dqs <= 1'b1;
|
1408 |
|
|
cal2_dlyinc_dqs <= 1'b1;
|
1409 |
|
|
end
|
1410 |
|
|
|
1411 |
|
|
CAL2_FIND_EDGE_IDEL_WAIT_NEG:
|
1412 |
|
|
if (!idel_set_wait)
|
1413 |
|
|
cal2_state <= CAL2_FIND_EDGE_POS;
|
1414 |
|
|
|
1415 |
|
|
// if no edge found, then decrement by half # of taps used
|
1416 |
|
|
CAL2_DEC_IDEL: begin
|
1417 |
|
|
if (cal2_idel_dec_cnt == 6'b000000)
|
1418 |
|
|
cal2_state <= CAL2_DONE;
|
1419 |
|
|
else begin
|
1420 |
|
|
cal2_idel_dec_cnt <= cal2_idel_dec_cnt - 1;
|
1421 |
|
|
cal2_dlyce_dqs <= 1'b1;
|
1422 |
|
|
cal2_dlyinc_dqs <= 1'b0;
|
1423 |
|
|
end
|
1424 |
|
|
end
|
1425 |
|
|
|
1426 |
|
|
// delay state to allow count_dqs and ISERDES data to point to next
|
1427 |
|
|
// DQ bit (DQS group) before going to INIT
|
1428 |
|
|
CAL2_DONE:
|
1429 |
|
|
if (!idel_set_wait) begin
|
1430 |
|
|
count_dqs <= next_count_dqs;
|
1431 |
|
|
if (calib_done_tmp[1]) begin
|
1432 |
|
|
calib_done[1] <= 1'b1;
|
1433 |
|
|
cal2_state <= CAL2_IDLE;
|
1434 |
|
|
end else begin
|
1435 |
|
|
// request auto-refresh after every DQS group calibrated to
|
1436 |
|
|
// avoid tRAS violation
|
1437 |
|
|
cal2_ref_req <= 1'b1;
|
1438 |
|
|
if (calib_ref_done)
|
1439 |
|
|
cal2_state <= CAL2_INIT;
|
1440 |
|
|
end
|
1441 |
|
|
end
|
1442 |
|
|
endcase
|
1443 |
|
|
end
|
1444 |
|
|
|
1445 |
|
|
//***************************************************************************
|
1446 |
|
|
// Stage 3 calibration: Read Enable
|
1447 |
|
|
// Description:
|
1448 |
|
|
// read enable calibration determines the "round-trip" time (in # of CLK0
|
1449 |
|
|
// cycles) between when a read command is issued by the controller, and
|
1450 |
|
|
// when the corresponding read data is synchronized by into the CLK0 domain
|
1451 |
|
|
// this is a long delay chain to delay read enable signal from controller/
|
1452 |
|
|
// initialization logic (i.e. this is used for both initialization and
|
1453 |
|
|
// during normal controller operation). Stage 3 calibration logic decides
|
1454 |
|
|
// which delayed version is appropriate to use (which is affected by the
|
1455 |
|
|
// round trip delay of DQ/DQS) as a "valid" signal to tell rest of logic
|
1456 |
|
|
// when the captured data output from ISERDES is valid.
|
1457 |
|
|
//***************************************************************************
|
1458 |
|
|
|
1459 |
|
|
//*****************************************************************
|
1460 |
|
|
// Delay chains: Use shift registers
|
1461 |
|
|
// Two sets of delay chains are used:
|
1462 |
|
|
// 1. One to delay RDEN from PHY_INIT module for calibration
|
1463 |
|
|
// purposes (delay required for RDEN for calibration is different
|
1464 |
|
|
// than during normal operation)
|
1465 |
|
|
// 2. One per DQS group to delay RDEN from controller for normal
|
1466 |
|
|
// operation - the value to delay for each DQS group can be different
|
1467 |
|
|
// as is determined during calibration
|
1468 |
|
|
//*****************************************************************
|
1469 |
|
|
|
1470 |
|
|
//*****************************************************************
|
1471 |
|
|
// First delay chain, use only for calibration
|
1472 |
|
|
// input = asserted on rising edge of RDEN from PHY_INIT module
|
1473 |
|
|
//*****************************************************************
|
1474 |
|
|
|
1475 |
|
|
always @(posedge clk) begin
|
1476 |
|
|
ctrl_rden_r <= ctrl_rden;
|
1477 |
|
|
phy_init_rden_r <= phy_init_rden;
|
1478 |
|
|
phy_init_rden_r1 <= phy_init_rden_r;
|
1479 |
|
|
calib_rden_edge_r <= phy_init_rden_r & ~phy_init_rden_r1;
|
1480 |
|
|
end
|
1481 |
|
|
|
1482 |
|
|
// Calibration shift register used for both Stage 3 and Stage 4 cal
|
1483 |
|
|
// (not strictly necessary for stage 4, but use as an additional check
|
1484 |
|
|
// to make sure we're checking for correct data on the right clock cycle)
|
1485 |
|
|
always @(posedge clkdiv)
|
1486 |
|
|
if (!calib_done[2])
|
1487 |
|
|
calib_rden_srl_a <= cal3_rden_srl_a;
|
1488 |
|
|
else
|
1489 |
|
|
calib_rden_srl_a <= cal4_rden_srl_a;
|
1490 |
|
|
|
1491 |
|
|
// Flops for targetting of multi-cycle path in UCF
|
1492 |
|
|
genvar cal_rden_ff_i;
|
1493 |
|
|
generate
|
1494 |
|
|
for (cal_rden_ff_i = 0; cal_rden_ff_i < 5;
|
1495 |
|
|
cal_rden_ff_i = cal_rden_ff_i+1) begin: gen_cal_rden_dly
|
1496 |
|
|
FDRSE u_ff_cal_rden_dly
|
1497 |
|
|
(
|
1498 |
|
|
.Q (calib_rden_srl_a_r[cal_rden_ff_i]),
|
1499 |
|
|
.C (clkdiv),
|
1500 |
|
|
.CE (1'b1),
|
1501 |
|
|
.D (calib_rden_srl_a[cal_rden_ff_i]),
|
1502 |
|
|
.R (1'b0),
|
1503 |
|
|
.S (1'b0)
|
1504 |
|
|
) /* synthesis syn_preserve = 1 */
|
1505 |
|
|
/* synthesis syn_replicate = 0 */;
|
1506 |
|
|
end
|
1507 |
|
|
endgenerate
|
1508 |
|
|
|
1509 |
|
|
SRLC32E u_calib_rden_srl
|
1510 |
|
|
(
|
1511 |
|
|
.Q (calib_rden_srl_out),
|
1512 |
|
|
.Q31 (),
|
1513 |
|
|
.A (calib_rden_srl_a_r),
|
1514 |
|
|
.CE (1'b1),
|
1515 |
|
|
.CLK (clk),
|
1516 |
|
|
.D (calib_rden_edge_r)
|
1517 |
|
|
);
|
1518 |
|
|
|
1519 |
|
|
FDRSE u_calib_rden_srl_out_r
|
1520 |
|
|
(
|
1521 |
|
|
.Q (calib_rden_srl_out_r),
|
1522 |
|
|
.C (clk),
|
1523 |
|
|
.CE (1'b1),
|
1524 |
|
|
.D (calib_rden_srl_out),
|
1525 |
|
|
.R (1'b0),
|
1526 |
|
|
.S (1'b0)
|
1527 |
|
|
) /* synthesis syn_preserve = 1 */;
|
1528 |
|
|
|
1529 |
|
|
// convert to CLKDIV domain. Two version are generated because we need
|
1530 |
|
|
// to be able to tell exactly which fast (clk) clock cycle the read
|
1531 |
|
|
// enable was asserted in. Only one of CALIB_DATA_VALID or
|
1532 |
|
|
// CALIB_DATA_VALID_STGD will be asserted for any given shift value
|
1533 |
|
|
always @(posedge clk)
|
1534 |
|
|
calib_rden_srl_out_r1 <= calib_rden_srl_out_r;
|
1535 |
|
|
|
1536 |
|
|
always @(posedge clkdiv) begin
|
1537 |
|
|
calib_rden_valid <= calib_rden_srl_out_r;
|
1538 |
|
|
calib_rden_valid_stgd <= calib_rden_srl_out_r1;
|
1539 |
|
|
end
|
1540 |
|
|
|
1541 |
|
|
//*****************************************************************
|
1542 |
|
|
// Second set of delays chain, use for normal reads
|
1543 |
|
|
// input = RDEN from controller
|
1544 |
|
|
//*****************************************************************
|
1545 |
|
|
|
1546 |
|
|
// Flops for targetting of multi-cycle path in UCF
|
1547 |
|
|
genvar rden_ff_i;
|
1548 |
|
|
generate
|
1549 |
|
|
for (rden_ff_i = 0; rden_ff_i < 5*DQS_WIDTH;
|
1550 |
|
|
rden_ff_i = rden_ff_i+1) begin: gen_rden_dly
|
1551 |
|
|
FDRSE u_ff_rden_dly
|
1552 |
|
|
(
|
1553 |
|
|
.Q (rden_dly_r[rden_ff_i]),
|
1554 |
|
|
.C (clkdiv),
|
1555 |
|
|
.CE (1'b1),
|
1556 |
|
|
.D (rden_dly[rden_ff_i]),
|
1557 |
|
|
.R (1'b0),
|
1558 |
|
|
.S (1'b0)
|
1559 |
|
|
) /* synthesis syn_preserve = 1 */
|
1560 |
|
|
/* synthesis syn_replicate = 0 */;
|
1561 |
|
|
end
|
1562 |
|
|
endgenerate
|
1563 |
|
|
|
1564 |
|
|
// NOTE: Comment this section explaining purpose of SRL's
|
1565 |
|
|
genvar rden_i;
|
1566 |
|
|
generate
|
1567 |
|
|
for (rden_i = 0; rden_i < DQS_WIDTH; rden_i = rden_i + 1) begin: gen_rden
|
1568 |
|
|
SRLC32E u_rden_srl
|
1569 |
|
|
(
|
1570 |
|
|
.Q (rden_srl_out[rden_i]),
|
1571 |
|
|
.Q31 (),
|
1572 |
|
|
.A ({rden_dly_r[(rden_i*5)+4],
|
1573 |
|
|
rden_dly_r[(rden_i*5)+3],
|
1574 |
|
|
rden_dly_r[(rden_i*5)+2],
|
1575 |
|
|
rden_dly_r[(rden_i*5)+1],
|
1576 |
|
|
rden_dly_r[(rden_i*5)]}),
|
1577 |
|
|
.CE (1'b1),
|
1578 |
|
|
.CLK (clk),
|
1579 |
|
|
.D (ctrl_rden_r)
|
1580 |
|
|
);
|
1581 |
|
|
FDRSE u_calib_rden_r
|
1582 |
|
|
(
|
1583 |
|
|
.Q (calib_rden[rden_i]),
|
1584 |
|
|
.C (clk),
|
1585 |
|
|
.CE (1'b1),
|
1586 |
|
|
.D (rden_srl_out[rden_i]),
|
1587 |
|
|
.R (1'b0),
|
1588 |
|
|
.S (1'b0)
|
1589 |
|
|
) /* synthesis syn_preserve = 1 */;
|
1590 |
|
|
end
|
1591 |
|
|
endgenerate
|
1592 |
|
|
|
1593 |
|
|
//*****************************************************************
|
1594 |
|
|
// indicates that current received data is the correct pattern. Check both
|
1595 |
|
|
// rising and falling data for first DQ in each DQS group. Note that
|
1596 |
|
|
// we're checking using a pipelined version of read data, so need to take
|
1597 |
|
|
// this inherent delay into account in determining final read valid delay
|
1598 |
|
|
// Data is written to the memory in the following order (first -> last):
|
1599 |
|
|
// 0x1, 0xE, 0xE, 0x1, 0x1, 0xE, 0xE, 0x1
|
1600 |
|
|
// Looking just at LSb, expect data in sequence (in binary):
|
1601 |
|
|
// 1, 0, 0, 1, 1, 0, 0, 1
|
1602 |
|
|
// Check for the presence of the first 7 words, and compensate read valid
|
1603 |
|
|
// delay accordingly. Don't check last falling edge data, it may be
|
1604 |
|
|
// corrupted by the DQS tri-state glitch at end of read postamble
|
1605 |
|
|
// (glitch protection not yet active until stage 4 cal)
|
1606 |
|
|
//*****************************************************************
|
1607 |
|
|
|
1608 |
|
|
always @(posedge clkdiv) begin
|
1609 |
|
|
rdd_rise_q1_r <= rdd_rise_q1;
|
1610 |
|
|
rdd_fall_q1_r <= rdd_fall_q1;
|
1611 |
|
|
rdd_rise_q2_r <= rdd_rise_q2;
|
1612 |
|
|
rdd_fall_q2_r <= rdd_fall_q2;
|
1613 |
|
|
rdd_rise_q1_r1 <= rdd_rise_q1_r;
|
1614 |
|
|
rdd_fall_q1_r1 <= rdd_fall_q1_r;
|
1615 |
|
|
end
|
1616 |
|
|
|
1617 |
|
|
always @(posedge clkdiv) begin
|
1618 |
|
|
// For the following sequence from memory:
|
1619 |
|
|
// rise[0], fall[0], rise[1], fall[1]
|
1620 |
|
|
// if data is aligned out of fabric ISERDES:
|
1621 |
|
|
// RDD_RISE_Q2 = rise[0]
|
1622 |
|
|
// RDD_FALL_Q2 = fall[0]
|
1623 |
|
|
// RDD_RISE_Q1 = rise[1]
|
1624 |
|
|
// RDD_FALL_Q1 = fall[1]
|
1625 |
|
|
cal3_data_match <= ((rdd_rise_q2_r == 1) &&
|
1626 |
|
|
(rdd_fall_q2_r == 0) &&
|
1627 |
|
|
(rdd_rise_q1_r == 0) &&
|
1628 |
|
|
(rdd_fall_q1_r == 1) &&
|
1629 |
|
|
(rdd_rise_q2 == 1) &&
|
1630 |
|
|
(rdd_fall_q2 == 0) &&
|
1631 |
|
|
(rdd_rise_q1 == 0));
|
1632 |
|
|
|
1633 |
|
|
// if data is staggered out of fabric ISERDES:
|
1634 |
|
|
// RDD_RISE_Q1_R = rise[0]
|
1635 |
|
|
// RDD_FALL_Q1_R = fall[0]
|
1636 |
|
|
// RDD_RISE_Q2 = rise[1]
|
1637 |
|
|
// RDD_FALL_Q2 = fall[1]
|
1638 |
|
|
cal3_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
|
1639 |
|
|
(rdd_fall_q1_r1 == 0) &&
|
1640 |
|
|
(rdd_rise_q2_r == 0) &&
|
1641 |
|
|
(rdd_fall_q2_r == 1) &&
|
1642 |
|
|
(rdd_rise_q1_r == 1) &&
|
1643 |
|
|
(rdd_fall_q1_r == 0) &&
|
1644 |
|
|
(rdd_rise_q2 == 0));
|
1645 |
|
|
end
|
1646 |
|
|
|
1647 |
|
|
assign cal3_rden_dly = cal3_rden_srl_a - CAL3_RDEN_SRL_DLY_DELTA;
|
1648 |
|
|
assign cal3_data_valid = (calib_rden_valid | calib_rden_valid_stgd);
|
1649 |
|
|
assign cal3_match_found
|
1650 |
|
|
= ((calib_rden_valid && cal3_data_match) ||
|
1651 |
|
|
(calib_rden_valid_stgd && cal3_data_match_stgd));
|
1652 |
|
|
|
1653 |
|
|
// when calibrating, check to see which clock cycle (after the read is
|
1654 |
|
|
// issued) does the expected data pattern arrive. Record this result
|
1655 |
|
|
// NOTE: Can add error checking here in case valid data not found on any
|
1656 |
|
|
// of the available pipeline stages
|
1657 |
|
|
always @(posedge clkdiv) begin
|
1658 |
|
|
if (rstdiv) begin
|
1659 |
|
|
cal3_rden_srl_a <= 5'bxxxxx;
|
1660 |
|
|
cal3_state <= CAL3_IDLE;
|
1661 |
|
|
calib_done[2] <= 1'b0;
|
1662 |
|
|
calib_err_2[0] <= 1'b0;
|
1663 |
|
|
count_rden <= {DQS_WIDTH{1'b0}};
|
1664 |
|
|
rden_dly <= {5*DQS_WIDTH{1'b0}};
|
1665 |
|
|
end else begin
|
1666 |
|
|
|
1667 |
|
|
case (cal3_state)
|
1668 |
|
|
CAL3_IDLE: begin
|
1669 |
|
|
count_rden <= {DQS_WIDTH{1'b0}};
|
1670 |
|
|
if (calib_start[2]) begin
|
1671 |
|
|
calib_done[2] <= 1'b0;
|
1672 |
|
|
cal3_state <= CAL3_INIT;
|
1673 |
|
|
end
|
1674 |
|
|
end
|
1675 |
|
|
|
1676 |
|
|
CAL3_INIT: begin
|
1677 |
|
|
cal3_rden_srl_a <= RDEN_BASE_DELAY;
|
1678 |
|
|
// let SRL pipe clear after loading initial shift value
|
1679 |
|
|
cal3_state <= CAL3_RDEN_PIPE_CLR_WAIT;
|
1680 |
|
|
end
|
1681 |
|
|
|
1682 |
|
|
CAL3_DETECT:
|
1683 |
|
|
if (cal3_data_valid)
|
1684 |
|
|
// if match found at the correct clock cycle
|
1685 |
|
|
if (cal3_match_found) begin
|
1686 |
|
|
|
1687 |
|
|
// For simulation, load SRL addresses for all DQS with same value
|
1688 |
|
|
if (SIM_ONLY != 0) begin
|
1689 |
|
|
for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_rden_dly
|
1690 |
|
|
rden_dly[(i*5)] <= cal3_rden_dly[0];
|
1691 |
|
|
rden_dly[(i*5)+1] <= cal3_rden_dly[1];
|
1692 |
|
|
rden_dly[(i*5)+2] <= cal3_rden_dly[2];
|
1693 |
|
|
rden_dly[(i*5)+3] <= cal3_rden_dly[3];
|
1694 |
|
|
rden_dly[(i*5)+4] <= cal3_rden_dly[4];
|
1695 |
|
|
end
|
1696 |
|
|
end else begin
|
1697 |
|
|
rden_dly[(count_rden*5)] <= cal3_rden_dly[0];
|
1698 |
|
|
rden_dly[(count_rden*5)+1] <= cal3_rden_dly[1];
|
1699 |
|
|
rden_dly[(count_rden*5)+2] <= cal3_rden_dly[2];
|
1700 |
|
|
rden_dly[(count_rden*5)+3] <= cal3_rden_dly[3];
|
1701 |
|
|
rden_dly[(count_rden*5)+4] <= cal3_rden_dly[4];
|
1702 |
|
|
end
|
1703 |
|
|
|
1704 |
|
|
// Use for stage 4 calibration
|
1705 |
|
|
calib_rden_dly[(count_rden*5)] <= cal3_rden_srl_a[0];
|
1706 |
|
|
calib_rden_dly[(count_rden*5)+1] <= cal3_rden_srl_a[1];
|
1707 |
|
|
calib_rden_dly[(count_rden*5)+2] <= cal3_rden_srl_a[2];
|
1708 |
|
|
calib_rden_dly[(count_rden*5)+3] <= cal3_rden_srl_a[3];
|
1709 |
|
|
calib_rden_dly[(count_rden*5)+4] <= cal3_rden_srl_a[4];
|
1710 |
|
|
cal3_state <= CAL3_DONE;
|
1711 |
|
|
end else begin
|
1712 |
|
|
// If we run out of stages to shift, without finding correct
|
1713 |
|
|
// result, the stop and assert error
|
1714 |
|
|
if (cal3_rden_srl_a == 5'b11111) begin
|
1715 |
|
|
calib_err_2[0] <= 1'b1;
|
1716 |
|
|
cal3_state <= CAL3_IDLE;
|
1717 |
|
|
end else begin
|
1718 |
|
|
// otherwise, increase the shift value and try again
|
1719 |
|
|
cal3_rden_srl_a <= cal3_rden_srl_a + 1;
|
1720 |
|
|
cal3_state <= CAL3_RDEN_PIPE_CLR_WAIT;
|
1721 |
|
|
end
|
1722 |
|
|
end
|
1723 |
|
|
|
1724 |
|
|
// give additional time for RDEN_R pipe to clear from effects of
|
1725 |
|
|
// previous pipeline or IDELAY tap change
|
1726 |
|
|
CAL3_RDEN_PIPE_CLR_WAIT:
|
1727 |
|
|
if (calib_rden_pipe_cnt == 5'b00000)
|
1728 |
|
|
cal3_state <= CAL3_DETECT;
|
1729 |
|
|
|
1730 |
|
|
CAL3_DONE: begin
|
1731 |
|
|
if ((count_rden == DQS_WIDTH-1) || (SIM_ONLY != 0)) begin
|
1732 |
|
|
calib_done[2] <= 1'b1;
|
1733 |
|
|
cal3_state <= CAL3_IDLE;
|
1734 |
|
|
end else begin
|
1735 |
|
|
count_rden <= count_rden + 1;
|
1736 |
|
|
cal3_state <= CAL3_INIT;
|
1737 |
|
|
end
|
1738 |
|
|
end
|
1739 |
|
|
endcase
|
1740 |
|
|
end
|
1741 |
|
|
end
|
1742 |
|
|
|
1743 |
|
|
//*****************************************************************
|
1744 |
|
|
// Last part of stage 3 calibration - compensate for differences
|
1745 |
|
|
// in delay between different DQS groups. Assume that in the worst
|
1746 |
|
|
// case, DQS groups can only differ by one clock cycle. Data for
|
1747 |
|
|
// certain DQS groups must be delayed by one clock cycle.
|
1748 |
|
|
// NOTE: May need to increase allowable variation to greater than
|
1749 |
|
|
// one clock cycle in certain customer designs.
|
1750 |
|
|
// Algorithm is:
|
1751 |
|
|
// 1. Record shift delay value for DQS[0]
|
1752 |
|
|
// 2. Compare each DQS[x] delay value to that of DQS[0]:
|
1753 |
|
|
// - If different, than record this fact (RDEN_MUX)
|
1754 |
|
|
// - If greater than DQS[0], set RDEN_INC. Assume greater by
|
1755 |
|
|
// one clock cycle only - this is a key assumption, assume no
|
1756 |
|
|
// more than a one clock cycle variation.
|
1757 |
|
|
// - If less than DQS[0], set RDEN_DEC
|
1758 |
|
|
// 3. After calibration is complete, set control for DQS group
|
1759 |
|
|
// delay (CALIB_RDEN_SEL):
|
1760 |
|
|
// - If RDEN_DEC = 1, then assume that DQS[0] is the lowest
|
1761 |
|
|
// delay (and at least one other DQS group has a higher
|
1762 |
|
|
// delay).
|
1763 |
|
|
// - If RDEN_INC = 1, then assume that DQS[0] is the highest
|
1764 |
|
|
// delay (and that all other DQS groups have the same or
|
1765 |
|
|
// lower delay).
|
1766 |
|
|
// - If both RDEN_INC and RDEN_DEC = 1, then flag error
|
1767 |
|
|
// (variation is too high for this algorithm to handle)
|
1768 |
|
|
//*****************************************************************
|
1769 |
|
|
|
1770 |
|
|
always @(posedge clkdiv) begin
|
1771 |
|
|
if (rstdiv) begin
|
1772 |
|
|
calib_err_2[1] <= 1'b0;
|
1773 |
|
|
calib_rden_sel <= {DQS_WIDTH{1'bx}};
|
1774 |
|
|
rden_dec <= 1'b0;
|
1775 |
|
|
rden_dly_0 <= 5'bxxxxx;
|
1776 |
|
|
rden_inc <= 1'b0;
|
1777 |
|
|
rden_mux <= {DQS_WIDTH{1'b0}};
|
1778 |
|
|
end else begin
|
1779 |
|
|
// if a match if found, then store the value of rden_dly
|
1780 |
|
|
if (!calib_done[2]) begin
|
1781 |
|
|
if ((cal3_state == CAL3_DETECT) && cal3_match_found) begin
|
1782 |
|
|
// store the value for DQS[0] as a reference
|
1783 |
|
|
if (count_rden == 0) begin
|
1784 |
|
|
// for simulation, RDEN calibration only happens for DQS[0]
|
1785 |
|
|
// set RDEN_MUX for all DQS groups to be the same as DQS[0]
|
1786 |
|
|
if (SIM_ONLY != 0)
|
1787 |
|
|
rden_mux <= {DQS_WIDTH{1'b0}};
|
1788 |
|
|
else begin
|
1789 |
|
|
// otherwise, load values for DQS[0]
|
1790 |
|
|
rden_dly_0 <= cal3_rden_srl_a;
|
1791 |
|
|
rden_mux[0] <= 1'b0;
|
1792 |
|
|
end
|
1793 |
|
|
end else if (SIM_ONLY == 0) begin
|
1794 |
|
|
// for all other DQS groups, compare RDEN_DLY delay value with
|
1795 |
|
|
// that of DQS[0]
|
1796 |
|
|
if (rden_dly_0 != cal3_rden_srl_a) begin
|
1797 |
|
|
// record that current DQS group has a different delay
|
1798 |
|
|
// than DQS[0] (the "reference" DQS group)
|
1799 |
|
|
rden_mux[count_rden] <= 1'b1;
|
1800 |
|
|
if (rden_dly_0 > cal3_rden_srl_a)
|
1801 |
|
|
rden_inc <= 1'b1;
|
1802 |
|
|
else if (rden_dly_0 < cal3_rden_srl_a)
|
1803 |
|
|
rden_dec <= 1'b1;
|
1804 |
|
|
// otherwise, if current DQS group has same delay as DQS[0],
|
1805 |
|
|
// then rden_mux[count_rden] remains at 0 (since rden_mux
|
1806 |
|
|
// array contents initialized to 0)
|
1807 |
|
|
end
|
1808 |
|
|
end
|
1809 |
|
|
end
|
1810 |
|
|
end else begin
|
1811 |
|
|
// Otherwise - if we're done w/ stage 2 calibration:
|
1812 |
|
|
// set final value for RDEN data delay
|
1813 |
|
|
// flag error if there's more than one cycle variation from DQS[0]
|
1814 |
|
|
calib_err_2[1] <= (rden_inc && rden_dec);
|
1815 |
|
|
if (rden_inc)
|
1816 |
|
|
// if DQS[0] delay represents max delay
|
1817 |
|
|
calib_rden_sel <= ~rden_mux;
|
1818 |
|
|
else
|
1819 |
|
|
// if DQS[0] delay represents min delay (or all the delays are
|
1820 |
|
|
// the same between DQS groups)
|
1821 |
|
|
calib_rden_sel <= rden_mux;
|
1822 |
|
|
end
|
1823 |
|
|
end
|
1824 |
|
|
end
|
1825 |
|
|
|
1826 |
|
|
// flag error for stage 3 if appropriate
|
1827 |
|
|
always @(posedge clkdiv)
|
1828 |
|
|
calib_err[2] <= calib_err_2[0] | calib_err_2[1];
|
1829 |
|
|
|
1830 |
|
|
//***************************************************************************
|
1831 |
|
|
// Stage 4 calibration: DQS gate
|
1832 |
|
|
//***************************************************************************
|
1833 |
|
|
|
1834 |
|
|
//*****************************************************************
|
1835 |
|
|
// indicates that current received data is the correct pattern. Same as
|
1836 |
|
|
// for READ VALID calibration, except that the expected data sequence is
|
1837 |
|
|
// different since DQS gate is asserted after the 6th word.
|
1838 |
|
|
// Data sequence:
|
1839 |
|
|
// Arrives from memory (at FPGA input) (R, F): 1 0 0 1 1 0 0 1
|
1840 |
|
|
// After gating the sequence looks like: 1 0 0 1 1 0 1 0 (7th word =
|
1841 |
|
|
// 5th word, 8th word = 6th word)
|
1842 |
|
|
// What is the gate timing is off? Need to make sure we can distinquish
|
1843 |
|
|
// between the results of correct vs. incorrect gate timing. We also use
|
1844 |
|
|
// the "read_valid" signal from stage 3 calibration to help us determine
|
1845 |
|
|
// when to check for a valid sequence for stage 4 calibration (i.e. use
|
1846 |
|
|
// CAL4_DATA_VALID in addition to CAL4_DATA_MATCH/CAL4_DATA_MATCH_STGD)
|
1847 |
|
|
// Note that since the gate signal from the CLK0 domain is synchronized
|
1848 |
|
|
// to the falling edge of DQS, that the effect of the gate will only be
|
1849 |
|
|
// seen starting with a rising edge data (although it is possible
|
1850 |
|
|
// the GATE IDDR output could go metastable and cause a unexpected result
|
1851 |
|
|
// on the first rising and falling edges after the gate is enabled).
|
1852 |
|
|
// Also note that the actual DQS glitch can come more than 0.5*tCK after
|
1853 |
|
|
// the last falling edge of DQS and the constraint for this path is can
|
1854 |
|
|
// be > 0.5*tCK; however, this means when calibrating, the output of the
|
1855 |
|
|
// GATE IDDR may miss the setup time requirement of the rising edge flop
|
1856 |
|
|
// and only meet it for the falling edge flop. Therefore the rising
|
1857 |
|
|
// edge data immediately following the assertion of the gate can either
|
1858 |
|
|
// be a 1 or 0 (can rely on either)
|
1859 |
|
|
// As the timing on the gate is varied, we expect to see (sequence of
|
1860 |
|
|
// captured read data shown below):
|
1861 |
|
|
// - 1 0 0 1 1 0 0 1 (gate is really early, starts and ends before
|
1862 |
|
|
// read burst even starts)
|
1863 |
|
|
// - x 0 0 1 1 0 0 1 (gate pulse starts before the burst, and ends
|
1864 |
|
|
// - x y 0 1 1 0 0 1 sometime during the burst; x,y = 0, or 1, but
|
1865 |
|
|
// - x y x 1 1 0 0 1 all bits that show an x are the same value,
|
1866 |
|
|
// - x y x y 1 0 0 1 and y are the same value)
|
1867 |
|
|
// - x y x y x 0 0 1
|
1868 |
|
|
// - x y x y x y 0 1 (gate starts just before start of burst)
|
1869 |
|
|
// - 1 0 x 0 x 0 x 0 (gate starts after 1st falling word. The "x"
|
1870 |
|
|
// represents possiblity that gate may not disable
|
1871 |
|
|
// clock for 2nd rising word in time)
|
1872 |
|
|
// - 1 0 0 1 x 1 x 1 (gate starts after 2nd falling word)
|
1873 |
|
|
// - 1 0 0 1 1 0 x 0 (gate starts after 3rd falling word - GOOD!!)
|
1874 |
|
|
// - 1 0 0 1 1 0 0 1 (gate starts after burst is already done)
|
1875 |
|
|
//*****************************************************************
|
1876 |
|
|
|
1877 |
|
|
assign cal4_data_valid = calib_rden_valid | calib_rden_valid_stgd;
|
1878 |
|
|
assign cal4_data_good = (calib_rden_valid &
|
1879 |
|
|
cal4_data_match) |
|
1880 |
|
|
(calib_rden_valid_stgd &
|
1881 |
|
|
cal4_data_match_stgd);
|
1882 |
|
|
|
1883 |
|
|
always @(posedge clkdiv) begin
|
1884 |
|
|
// if data is aligned out of fabric ISERDES:
|
1885 |
|
|
cal4_data_match <= ((rdd_rise_q2_r == 1) &&
|
1886 |
|
|
(rdd_fall_q2_r == 0) &&
|
1887 |
|
|
(rdd_rise_q1_r == 0) &&
|
1888 |
|
|
(rdd_fall_q1_r == 1) &&
|
1889 |
|
|
(rdd_rise_q2 == 1) &&
|
1890 |
|
|
(rdd_fall_q2 == 0) &&
|
1891 |
|
|
// MIG 2.1: Last rising edge data value not
|
1892 |
|
|
// guaranteed to be certain value at higher
|
1893 |
|
|
// frequencies
|
1894 |
|
|
// (rdd_rise_q1 == 0) &&
|
1895 |
|
|
(rdd_fall_q1 == 0));
|
1896 |
|
|
// if data is staggered out of fabric ISERDES:
|
1897 |
|
|
cal4_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
|
1898 |
|
|
(rdd_fall_q1_r1 == 0) &&
|
1899 |
|
|
(rdd_rise_q2_r == 0) &&
|
1900 |
|
|
(rdd_fall_q2_r == 1) &&
|
1901 |
|
|
(rdd_rise_q1_r == 1) &&
|
1902 |
|
|
(rdd_fall_q1_r == 0) &&
|
1903 |
|
|
// MIG 2.1: Last rising edge data value not
|
1904 |
|
|
// guaranteed to be certain value at higher
|
1905 |
|
|
// frequencies
|
1906 |
|
|
// (rdd_rise_q2 == 0) &&
|
1907 |
|
|
(rdd_fall_q2 == 0));
|
1908 |
|
|
end
|
1909 |
|
|
|
1910 |
|
|
//*****************************************************************
|
1911 |
|
|
// DQS gate enable generation:
|
1912 |
|
|
// This signal gets synchronized to DQS domain, and drives IDDR
|
1913 |
|
|
// register that in turn asserts/deasserts CE to all 4 or 8 DQ
|
1914 |
|
|
// IDDR's in that DQS group.
|
1915 |
|
|
// 1. During normal (post-cal) operation, this is only for 2 clock
|
1916 |
|
|
// cycles following the end of a burst. Check for falling edge
|
1917 |
|
|
// of RDEN. But must also make sure NOT assert for a read-idle-
|
1918 |
|
|
// read (two non-consecutive reads, separated by exactly one
|
1919 |
|
|
// idle cycle) - in this case, don't assert the gate because:
|
1920 |
|
|
// (1) we don't have enough time to deassert the gate before the
|
1921 |
|
|
// first rising edge of DQS for second burst (b/c of fact
|
1922 |
|
|
// that DQS gate is generated in the fabric only off rising
|
1923 |
|
|
// edge of CLK0 - if we somehow had an ODDR in fabric, we
|
1924 |
|
|
// could pull this off, (2) assumption is that the DQS glitch
|
1925 |
|
|
// will not rise enough to cause a glitch because the
|
1926 |
|
|
// post-amble of the first burst is followed immediately by
|
1927 |
|
|
// the pre-amble of the next burst
|
1928 |
|
|
// 2. During stage 4 calibration, assert for 3 clock cycles
|
1929 |
|
|
// (assert gate enable one clock cycle early), since we gate out
|
1930 |
|
|
// the last two words (in addition to the crap on the DQ bus after
|
1931 |
|
|
// the DQS read postamble).
|
1932 |
|
|
// NOTE: PHY_INIT_RDEN and CTRL_RDEN have slightly different timing w/r
|
1933 |
|
|
// to when they are asserted w/r to the start of the read burst
|
1934 |
|
|
// (PHY_INIT_RDEN is one cycle earlier than CTRL_RDEN).
|
1935 |
|
|
//*****************************************************************
|
1936 |
|
|
|
1937 |
|
|
// register for timing purposes for fast clock path - currently only
|
1938 |
|
|
// calib_done_r[2] used
|
1939 |
|
|
always @(posedge clk)
|
1940 |
|
|
calib_done_r <= calib_done;
|
1941 |
|
|
|
1942 |
|
|
always @(*) begin
|
1943 |
|
|
calib_ctrl_rden = ctrl_rden;
|
1944 |
|
|
calib_init_rden = calib_done_r[2] & phy_init_rden;
|
1945 |
|
|
end
|
1946 |
|
|
|
1947 |
|
|
assign calib_ctrl_rden_negedge = ~calib_ctrl_rden & calib_ctrl_rden_r;
|
1948 |
|
|
// check for read-idle-read before asserting DQS pulse at end of read
|
1949 |
|
|
assign calib_ctrl_gate_pulse = calib_ctrl_rden_negedge_r &
|
1950 |
|
|
~calib_ctrl_rden;
|
1951 |
|
|
always @(posedge clk) begin
|
1952 |
|
|
calib_ctrl_rden_r <= calib_ctrl_rden;
|
1953 |
|
|
calib_ctrl_rden_negedge_r <= calib_ctrl_rden_negedge;
|
1954 |
|
|
calib_ctrl_gate_pulse_r <= calib_ctrl_gate_pulse;
|
1955 |
|
|
end
|
1956 |
|
|
|
1957 |
|
|
assign calib_init_gate_pulse = ~calib_init_rden & calib_init_rden_r;
|
1958 |
|
|
always @(posedge clk) begin
|
1959 |
|
|
calib_init_rden_r <= calib_init_rden;
|
1960 |
|
|
calib_init_gate_pulse_r <= calib_init_gate_pulse;
|
1961 |
|
|
calib_init_gate_pulse_r1 <= calib_init_gate_pulse_r;
|
1962 |
|
|
end
|
1963 |
|
|
|
1964 |
|
|
// Gate is asserted: (1) during cal, for 3 cycles, starting 1 cycle
|
1965 |
|
|
// after falling edge of CTRL_RDEN, (2) during normal ops, for 2
|
1966 |
|
|
// cycles, starting 2 cycles after falling edge of CTRL_RDEN
|
1967 |
|
|
assign gate_srl_in = ~((calib_ctrl_gate_pulse |
|
1968 |
|
|
calib_ctrl_gate_pulse_r) |
|
1969 |
|
|
(calib_init_gate_pulse |
|
1970 |
|
|
calib_init_gate_pulse_r |
|
1971 |
|
|
calib_init_gate_pulse_r1));
|
1972 |
|
|
|
1973 |
|
|
//*****************************************************************
|
1974 |
|
|
// generate DQS enable signal for each DQS group
|
1975 |
|
|
// There are differences between DQS gate signal for calibration vs. during
|
1976 |
|
|
// normal operation:
|
1977 |
|
|
// * calibration gates the second to last clock cycle of the burst,
|
1978 |
|
|
// rather than after the last word (e.g. for a 8-word, 4-cycle burst,
|
1979 |
|
|
// cycle 4 is gated for calibration; during normal operation, cycle
|
1980 |
|
|
// 5 (i.e. cycle after the last word) is gated)
|
1981 |
|
|
// enable for DQS is deasserted for two clock cycles, except when
|
1982 |
|
|
// we have the preamble for the next read immediately following
|
1983 |
|
|
// the postamble of the current read - assume DQS does not glitch
|
1984 |
|
|
// during this time, that it stays low. Also if we did have to gate
|
1985 |
|
|
// the DQS for this case, then we don't have enough time to deassert
|
1986 |
|
|
// the gate in time for the first rising edge of DQS for the second
|
1987 |
|
|
// read
|
1988 |
|
|
//*****************************************************************
|
1989 |
|
|
|
1990 |
|
|
// Flops for targetting of multi-cycle path in UCF
|
1991 |
|
|
genvar gate_ff_i;
|
1992 |
|
|
generate
|
1993 |
|
|
for (gate_ff_i = 0; gate_ff_i < 5*DQS_WIDTH;
|
1994 |
|
|
gate_ff_i = gate_ff_i+1) begin: gen_gate_dly
|
1995 |
|
|
FDRSE u_ff_gate_dly
|
1996 |
|
|
(
|
1997 |
|
|
.Q (gate_dly_r[gate_ff_i]),
|
1998 |
|
|
.C (clkdiv),
|
1999 |
|
|
.CE (1'b1),
|
2000 |
|
|
.D (gate_dly[gate_ff_i]),
|
2001 |
|
|
.R (1'b0),
|
2002 |
|
|
.S (1'b0)
|
2003 |
|
|
) /* synthesis syn_preserve = 1 */
|
2004 |
|
|
/* synthesis syn_replicate = 0 */;
|
2005 |
|
|
end
|
2006 |
|
|
endgenerate
|
2007 |
|
|
|
2008 |
|
|
genvar gate_i;
|
2009 |
|
|
generate
|
2010 |
|
|
for (gate_i = 0; gate_i < DQS_WIDTH; gate_i = gate_i + 1) begin: gen_gate
|
2011 |
|
|
SRLC32E u_gate_srl
|
2012 |
|
|
(
|
2013 |
|
|
.Q (gate_srl_out[gate_i]),
|
2014 |
|
|
.Q31 (),
|
2015 |
|
|
.A ({gate_dly_r[(gate_i*5)+4],
|
2016 |
|
|
gate_dly_r[(gate_i*5)+3],
|
2017 |
|
|
gate_dly_r[(gate_i*5)+2],
|
2018 |
|
|
gate_dly_r[(gate_i*5)+1],
|
2019 |
|
|
gate_dly_r[(gate_i*5)]}),
|
2020 |
|
|
.CE (1'b1),
|
2021 |
|
|
.CLK (clk),
|
2022 |
|
|
.D (gate_srl_in)
|
2023 |
|
|
);
|
2024 |
|
|
|
2025 |
|
|
// For GATE_BASE_DELAY > 0, have one extra cycle to register outputs
|
2026 |
|
|
// from controller before generating DQS gate pulse. In PAR, the
|
2027 |
|
|
// location of the controller logic can be far from the DQS gate
|
2028 |
|
|
// logic (DQS gate logic located near the DQS I/O's), contributing
|
2029 |
|
|
// to large net delays. Registering the controller outputs for
|
2030 |
|
|
// CL >= 4 (above 200MHz) adds a stage of pipelining to reduce net
|
2031 |
|
|
// delays
|
2032 |
|
|
if (GATE_BASE_DELAY > 0) begin: gen_gate_base_dly_gt3
|
2033 |
|
|
// add flop between SRL32 and EN_DQS flop (which is located near the
|
2034 |
|
|
// DDR2 IOB's)
|
2035 |
|
|
FDRSE u_gate_srl_ff
|
2036 |
|
|
(
|
2037 |
|
|
.Q (gate_srl_out_r[gate_i]),
|
2038 |
|
|
.C (clk),
|
2039 |
|
|
.CE (1'b1),
|
2040 |
|
|
.D (gate_srl_out[gate_i]),
|
2041 |
|
|
.R (1'b0),
|
2042 |
|
|
.S (1'b0)
|
2043 |
|
|
) /* synthesis syn_preserve = 1 */;
|
2044 |
|
|
end else begin: gen_gate_base_dly_le3
|
2045 |
|
|
assign gate_srl_out_r[gate_i] = gate_srl_out[gate_i];
|
2046 |
|
|
end
|
2047 |
|
|
|
2048 |
|
|
FDRSE u_en_dqs_ff
|
2049 |
|
|
(
|
2050 |
|
|
.Q (en_dqs[gate_i]),
|
2051 |
|
|
.C (clk),
|
2052 |
|
|
.CE (1'b1),
|
2053 |
|
|
.D (gate_srl_out_r[gate_i]),
|
2054 |
|
|
.R (1'b0),
|
2055 |
|
|
.S (1'b0)
|
2056 |
|
|
) /* synthesis syn_preserve = 1 */
|
2057 |
|
|
/* synthesis syn_replicate = 0 */;
|
2058 |
|
|
end
|
2059 |
|
|
endgenerate
|
2060 |
|
|
|
2061 |
|
|
//*****************************************************************
|
2062 |
|
|
// Find valid window: keep track of how long we've been in the same data
|
2063 |
|
|
// window. If it's been long enough, then declare that we've found a stable
|
2064 |
|
|
// valid window - in particular, that we're past any region of instability
|
2065 |
|
|
// associated with the edge of the window. Use only when finding left edge
|
2066 |
|
|
//*****************************************************************
|
2067 |
|
|
|
2068 |
|
|
always @(posedge clkdiv)
|
2069 |
|
|
// reset before we start to look for window
|
2070 |
|
|
if (cal4_state == CAL4_INIT) begin
|
2071 |
|
|
cal4_window_cnt <= 4'b0000;
|
2072 |
|
|
cal4_stable_window <= 1'b0;
|
2073 |
|
|
end else if ((cal4_state == CAL4_FIND_EDGE) && cal4_seek_left) begin
|
2074 |
|
|
// if we're looking for left edge, and incrementing IDELAY, count
|
2075 |
|
|
// consecutive taps over which we're in the window
|
2076 |
|
|
if (cal4_data_valid) begin
|
2077 |
|
|
if (cal4_data_good)
|
2078 |
|
|
cal4_window_cnt <= cal4_window_cnt + 1;
|
2079 |
|
|
else
|
2080 |
|
|
cal4_window_cnt <= 4'b0000;
|
2081 |
|
|
end
|
2082 |
|
|
|
2083 |
|
|
if (cal4_window_cnt == MIN_WIN_SIZE-1)
|
2084 |
|
|
cal4_stable_window <= 1'b1;
|
2085 |
|
|
end
|
2086 |
|
|
|
2087 |
|
|
//*****************************************************************
|
2088 |
|
|
// keep track of edge tap counts found, and whether we've
|
2089 |
|
|
// incremented to the maximum number of taps allowed
|
2090 |
|
|
//*****************************************************************
|
2091 |
|
|
|
2092 |
|
|
always @(posedge clkdiv)
|
2093 |
|
|
if ((cal4_state == CAL4_INIT) || cal4_dlyrst_gate) begin
|
2094 |
|
|
cal4_idel_max_tap <= 1'b0;
|
2095 |
|
|
cal4_idel_bit_tap <= 1'b0;
|
2096 |
|
|
cal4_idel_tap_cnt <= 6'b000000;
|
2097 |
|
|
end else if (cal4_dlyce_gate) begin
|
2098 |
|
|
if (cal4_dlyinc_gate) begin
|
2099 |
|
|
cal4_idel_tap_cnt <= cal4_idel_tap_cnt + 1;
|
2100 |
|
|
cal4_idel_bit_tap <= (cal4_idel_tap_cnt == CAL4_IDEL_BIT_VAL-2);
|
2101 |
|
|
cal4_idel_max_tap <= (cal4_idel_tap_cnt == 6'b111110);
|
2102 |
|
|
end else begin
|
2103 |
|
|
cal4_idel_tap_cnt <= cal4_idel_tap_cnt - 1;
|
2104 |
|
|
cal4_idel_bit_tap <= 1'b0;
|
2105 |
|
|
cal4_idel_max_tap <= 1'b0;
|
2106 |
|
|
end
|
2107 |
|
|
end
|
2108 |
|
|
|
2109 |
|
|
always @(posedge clkdiv)
|
2110 |
|
|
if ((cal4_state != CAL4_RDEN_PIPE_CLR_WAIT) &&
|
2111 |
|
|
(cal3_state != CAL3_RDEN_PIPE_CLR_WAIT))
|
2112 |
|
|
calib_rden_pipe_cnt <= CALIB_RDEN_PIPE_LEN-1;
|
2113 |
|
|
else
|
2114 |
|
|
calib_rden_pipe_cnt <= calib_rden_pipe_cnt - 1;
|
2115 |
|
|
|
2116 |
|
|
//*****************************************************************
|
2117 |
|
|
// Stage 4 cal state machine
|
2118 |
|
|
//*****************************************************************
|
2119 |
|
|
|
2120 |
|
|
always @(posedge clkdiv)
|
2121 |
|
|
if (rstdiv) begin
|
2122 |
|
|
calib_done[3] <= 1'b0;
|
2123 |
|
|
calib_done_tmp[3] <= 1'b0;
|
2124 |
|
|
calib_err[3] <= 1'b0;
|
2125 |
|
|
count_gate <= 'b0;
|
2126 |
|
|
gate_dly <= 'b0;
|
2127 |
|
|
next_count_gate <= 'b0;
|
2128 |
|
|
cal4_idel_adj_cnt <= 6'bxxxxxx;
|
2129 |
|
|
cal4_dlyce_gate <= 1'b0;
|
2130 |
|
|
cal4_dlyinc_gate <= 1'b0;
|
2131 |
|
|
cal4_dlyrst_gate <= 1'b0; // reset handled elsewhere in code
|
2132 |
|
|
cal4_gate_srl_a <= 5'bxxxxx;
|
2133 |
|
|
cal4_rden_srl_a <= 5'bxxxxx;
|
2134 |
|
|
cal4_ref_req <= 1'b0;
|
2135 |
|
|
cal4_seek_left <= 1'bx;
|
2136 |
|
|
cal4_state <= CAL4_IDLE;
|
2137 |
|
|
end else begin
|
2138 |
|
|
cal4_ref_req <= 1'b0;
|
2139 |
|
|
cal4_dlyce_gate <= 1'b0;
|
2140 |
|
|
cal4_dlyinc_gate <= 1'b0;
|
2141 |
|
|
cal4_dlyrst_gate <= 1'b0;
|
2142 |
|
|
|
2143 |
|
|
case (cal4_state)
|
2144 |
|
|
CAL4_IDLE: begin
|
2145 |
|
|
count_gate <= 'b0;
|
2146 |
|
|
next_count_gate <= 'b0;
|
2147 |
|
|
if (calib_start[3]) begin
|
2148 |
|
|
gate_dly <= 'b0;
|
2149 |
|
|
calib_done[3] <= 1'b0;
|
2150 |
|
|
cal4_state <= CAL4_INIT;
|
2151 |
|
|
end
|
2152 |
|
|
end
|
2153 |
|
|
|
2154 |
|
|
CAL4_INIT: begin
|
2155 |
|
|
// load: (1) initial value of gate delay SRL, (2) appropriate
|
2156 |
|
|
// value of RDEN SRL (so that we get correct "data valid" timing)
|
2157 |
|
|
cal4_gate_srl_a <= GATE_BASE_INIT;
|
2158 |
|
|
cal4_rden_srl_a <= {calib_rden_dly[(count_gate*5)+4],
|
2159 |
|
|
calib_rden_dly[(count_gate*5)+3],
|
2160 |
|
|
calib_rden_dly[(count_gate*5)+2],
|
2161 |
|
|
calib_rden_dly[(count_gate*5)+1],
|
2162 |
|
|
calib_rden_dly[(count_gate*5)]};
|
2163 |
|
|
// let SRL pipe clear after loading initial shift value
|
2164 |
|
|
cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
|
2165 |
|
|
end
|
2166 |
|
|
|
2167 |
|
|
// sort of an initial state - start checking to see whether we're
|
2168 |
|
|
// already in the window or not
|
2169 |
|
|
CAL4_FIND_WINDOW:
|
2170 |
|
|
// decide right away if we start in the proper window - this
|
2171 |
|
|
// determines if we are then looking for the left (trailing) or
|
2172 |
|
|
// right (leading) edge of the data valid window
|
2173 |
|
|
if (cal4_data_valid) begin
|
2174 |
|
|
// if we find a match - then we're already in window, now look
|
2175 |
|
|
// for left edge. Otherwise, look for right edge of window
|
2176 |
|
|
cal4_seek_left <= cal4_data_good;
|
2177 |
|
|
cal4_state <= CAL4_FIND_EDGE;
|
2178 |
|
|
end
|
2179 |
|
|
|
2180 |
|
|
CAL4_FIND_EDGE:
|
2181 |
|
|
// don't do anything until the exact clock cycle when to check that
|
2182 |
|
|
// readback data is valid or not
|
2183 |
|
|
if (cal4_data_valid) begin
|
2184 |
|
|
// we're currently in the window, look for left edge of window
|
2185 |
|
|
if (cal4_seek_left) begin
|
2186 |
|
|
// make sure we've passed the right edge before trying to detect
|
2187 |
|
|
// the left edge (i.e. avoid any edge "instability") - else, we
|
2188 |
|
|
// may detect an "false" edge too soon. By design, if we start in
|
2189 |
|
|
// the data valid window, always expect at least
|
2190 |
|
|
// MIN(BIT_TIME_TAPS,32) (-/+ jitter, see below) taps of valid
|
2191 |
|
|
// window before we hit the left edge (this is because when stage
|
2192 |
|
|
// 4 calibration first begins (i.e., gate_dly = 00, and IDELAY =
|
2193 |
|
|
// 00), we're guaranteed to NOT be in the window, and we always
|
2194 |
|
|
// start searching for MIN(BIT_TIME_TAPS,32) for the right edge
|
2195 |
|
|
// of window. If we don't find it, increment gate_dly, and if we
|
2196 |
|
|
// now start in the window, we have at least approximately
|
2197 |
|
|
// CLK_PERIOD-MIN(BIT_TIME_TAPS,32) = MIN(BIT_TIME_TAPS,32) taps.
|
2198 |
|
|
// It's approximately because jitter, noise, etc. can bring this
|
2199 |
|
|
// value down slightly. Because of this (although VERY UNLIKELY),
|
2200 |
|
|
// we have to protect against not decrementing IDELAY below 0
|
2201 |
|
|
// during adjustment phase).
|
2202 |
|
|
if (cal4_stable_window && !cal4_data_good) begin
|
2203 |
|
|
// found left edge of window, dec by MIN(BIT_TIME_TAPS,32)
|
2204 |
|
|
cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
|
2205 |
|
|
cal4_idel_adj_inc <= 1'b0;
|
2206 |
|
|
cal4_state <= CAL4_ADJ_IDEL;
|
2207 |
|
|
end else begin
|
2208 |
|
|
// Otherwise, keep looking for left edge:
|
2209 |
|
|
if (cal4_idel_max_tap) begin
|
2210 |
|
|
// ran out of taps looking for left edge (max=63) - happens
|
2211 |
|
|
// for low frequency case, decrement by 32
|
2212 |
|
|
cal4_idel_adj_cnt <= 6'b100000;
|
2213 |
|
|
cal4_idel_adj_inc <= 1'b0;
|
2214 |
|
|
cal4_state <= CAL4_ADJ_IDEL;
|
2215 |
|
|
end else begin
|
2216 |
|
|
cal4_dlyce_gate <= 1'b1;
|
2217 |
|
|
cal4_dlyinc_gate <= 1'b1;
|
2218 |
|
|
cal4_state <= CAL4_IDEL_WAIT;
|
2219 |
|
|
end
|
2220 |
|
|
end
|
2221 |
|
|
end else begin
|
2222 |
|
|
// looking for right edge of window:
|
2223 |
|
|
// look for the first match - this means we've found the right
|
2224 |
|
|
// (leading) edge of the data valid window, increment by
|
2225 |
|
|
// MIN(BIT_TIME_TAPS,32)
|
2226 |
|
|
if (cal4_data_good) begin
|
2227 |
|
|
cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
|
2228 |
|
|
cal4_idel_adj_inc <= 1'b1;
|
2229 |
|
|
cal4_state <= CAL4_ADJ_IDEL;
|
2230 |
|
|
end else begin
|
2231 |
|
|
// Otherwise, keep looking:
|
2232 |
|
|
// only look for MIN(BIT_TIME_TAPS,32) taps for right edge,
|
2233 |
|
|
// if we haven't found it, then inc gate delay, try again
|
2234 |
|
|
if (cal4_idel_bit_tap) begin
|
2235 |
|
|
// if we're already maxed out on gate delay, then error out
|
2236 |
|
|
// (simulation only - calib_err isn't currently connected)
|
2237 |
|
|
if (cal4_gate_srl_a == 5'b11111) begin
|
2238 |
|
|
calib_err[3] <= 1'b1;
|
2239 |
|
|
cal4_state <= CAL4_IDLE;
|
2240 |
|
|
end else begin
|
2241 |
|
|
// otherwise, increment gate delay count, and start
|
2242 |
|
|
// over again
|
2243 |
|
|
cal4_gate_srl_a <= cal4_gate_srl_a + 1;
|
2244 |
|
|
cal4_dlyrst_gate <= 1'b1;
|
2245 |
|
|
cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
|
2246 |
|
|
end
|
2247 |
|
|
end else begin
|
2248 |
|
|
// keep looking for right edge
|
2249 |
|
|
cal4_dlyce_gate <= 1'b1;
|
2250 |
|
|
cal4_dlyinc_gate <= 1'b1;
|
2251 |
|
|
cal4_state <= CAL4_IDEL_WAIT;
|
2252 |
|
|
end
|
2253 |
|
|
end
|
2254 |
|
|
end
|
2255 |
|
|
end
|
2256 |
|
|
|
2257 |
|
|
// wait for GATE IDELAY to settle, after reset or increment
|
2258 |
|
|
CAL4_IDEL_WAIT: begin
|
2259 |
|
|
// For simulation, load SRL addresses for all DQS with same value
|
2260 |
|
|
if (SIM_ONLY != 0) begin
|
2261 |
|
|
for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly
|
2262 |
|
|
gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
|
2263 |
|
|
gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
|
2264 |
|
|
gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
|
2265 |
|
|
gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
|
2266 |
|
|
gate_dly[(i*5)] <= cal4_gate_srl_a[0];
|
2267 |
|
|
end
|
2268 |
|
|
end else begin
|
2269 |
|
|
gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
|
2270 |
|
|
gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
|
2271 |
|
|
gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
|
2272 |
|
|
gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
|
2273 |
|
|
gate_dly[(count_gate*5)] <= cal4_gate_srl_a[0];
|
2274 |
|
|
end
|
2275 |
|
|
// check to see if we've found edge of window
|
2276 |
|
|
if (!idel_set_wait)
|
2277 |
|
|
cal4_state <= CAL4_FIND_EDGE;
|
2278 |
|
|
end
|
2279 |
|
|
|
2280 |
|
|
// give additional time for RDEN_R pipe to clear from effects of
|
2281 |
|
|
// previous pipeline (and IDELAY reset)
|
2282 |
|
|
CAL4_RDEN_PIPE_CLR_WAIT: begin
|
2283 |
|
|
// MIG 2.2: Bug fix - make sure to update GATE_DLY count, since
|
2284 |
|
|
// possible for FIND_EDGE->RDEN_PIPE_CLR_WAIT->FIND_WINDOW
|
2285 |
|
|
// transition (i.e. need to make sure the gate count updated in
|
2286 |
|
|
// FIND_EDGE gets reflected in GATE_DLY by the time we reach
|
2287 |
|
|
// state FIND_WINDOW) - previously GATE_DLY only being updated
|
2288 |
|
|
// during state CAL4_IDEL_WAIT
|
2289 |
|
|
if (SIM_ONLY != 0) begin
|
2290 |
|
|
for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly_pipe
|
2291 |
|
|
gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
|
2292 |
|
|
gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
|
2293 |
|
|
gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
|
2294 |
|
|
gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
|
2295 |
|
|
gate_dly[(i*5)] <= cal4_gate_srl_a[0];
|
2296 |
|
|
end
|
2297 |
|
|
end else begin
|
2298 |
|
|
gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
|
2299 |
|
|
gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
|
2300 |
|
|
gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
|
2301 |
|
|
gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
|
2302 |
|
|
gate_dly[(count_gate*5)] <= cal4_gate_srl_a[0];
|
2303 |
|
|
end
|
2304 |
|
|
// look for new window
|
2305 |
|
|
if (calib_rden_pipe_cnt == 5'b00000)
|
2306 |
|
|
cal4_state <= CAL4_FIND_WINDOW;
|
2307 |
|
|
end
|
2308 |
|
|
|
2309 |
|
|
// increment/decrement DQS/DQ IDELAY for final adjustment
|
2310 |
|
|
CAL4_ADJ_IDEL:
|
2311 |
|
|
// add underflow protection for corner case when left edge found
|
2312 |
|
|
// using fewer than MIN(BIT_TIME_TAPS,32) taps
|
2313 |
|
|
if ((cal4_idel_adj_cnt == 6'b000000) ||
|
2314 |
|
|
(cal4_dlyce_gate && !cal4_dlyinc_gate &&
|
2315 |
|
|
(cal4_idel_tap_cnt == 6'b000001))) begin
|
2316 |
|
|
cal4_state <= CAL4_DONE;
|
2317 |
|
|
// stop when all gates calibrated, or gate[0] cal'ed (for sim)
|
2318 |
|
|
if ((count_gate == DQS_WIDTH-1) || (SIM_ONLY != 0))
|
2319 |
|
|
calib_done_tmp[3] <= 1'b1;
|
2320 |
|
|
else
|
2321 |
|
|
// need for VHDL simulation to prevent out-of-index error
|
2322 |
|
|
next_count_gate <= count_gate + 1;
|
2323 |
|
|
end else begin
|
2324 |
|
|
cal4_idel_adj_cnt <= cal4_idel_adj_cnt - 1;
|
2325 |
|
|
cal4_dlyce_gate <= 1'b1;
|
2326 |
|
|
// whether inc or dec depends on whether left or right edge found
|
2327 |
|
|
cal4_dlyinc_gate <= cal4_idel_adj_inc;
|
2328 |
|
|
end
|
2329 |
|
|
|
2330 |
|
|
// wait for IDELAY output to settle after decrement. Check current
|
2331 |
|
|
// COUNT_GATE value and decide if we're done
|
2332 |
|
|
CAL4_DONE:
|
2333 |
|
|
if (!idel_set_wait) begin
|
2334 |
|
|
count_gate <= next_count_gate;
|
2335 |
|
|
if (calib_done_tmp[3]) begin
|
2336 |
|
|
calib_done[3] <= 1'b1;
|
2337 |
|
|
cal4_state <= CAL4_IDLE;
|
2338 |
|
|
end else begin
|
2339 |
|
|
// request auto-refresh after every DQS group calibrated to
|
2340 |
|
|
// avoid tRAS violation
|
2341 |
|
|
cal4_ref_req <= 1'b1;
|
2342 |
|
|
if (calib_ref_done)
|
2343 |
|
|
cal4_state <= CAL4_INIT;
|
2344 |
|
|
end
|
2345 |
|
|
end
|
2346 |
|
|
endcase
|
2347 |
|
|
end
|
2348 |
|
|
|
2349 |
|
|
endmodule
|