1 |
201 |
dgisselq |
////////////////////////////////////////////////////////////////////////////////
|
2 |
|
|
//
|
3 |
|
|
// Filename: dcache.v
|
4 |
|
|
//
|
5 |
|
|
// Project: Zip CPU -- a small, lightweight, RISC CPU soft core
|
6 |
|
|
//
|
7 |
|
|
// Purpose: To provide a simple data cache for the ZipCPU. The cache is
|
8 |
|
|
// designed to be a drop in replacement for the pipememm memory
|
9 |
|
|
// unit currently existing within the ZipCPU. The goal of this unit is
|
10 |
|
|
// to achieve single cycle read access to any memory in the last cache line
|
11 |
|
|
// used, or two cycle access to any memory currently in the cache.
|
12 |
|
|
//
|
13 |
|
|
// The cache separates between four types of accesses, one write and three
|
14 |
|
|
// read access types. The read accesses are split between those that are
|
15 |
|
|
// not cacheable, those that are in the cache, and those that are not.
|
16 |
|
|
//
|
17 |
|
|
// 1. Write accesses always create writes to the bus. For these reasons,
|
18 |
|
|
// these may always be considered cache misses.
|
19 |
|
|
//
|
20 |
|
|
// Writes to memory locations within the cache must also update
|
21 |
|
|
// cache memory immediately, to keep the cache in synch.
|
22 |
|
|
//
|
23 |
|
|
// It is our goal to be able to maintain single cycle write
|
24 |
|
|
// accesses for memory bursts.
|
25 |
|
|
//
|
26 |
|
|
// 2. Read access to non-cacheable memory locations will also immediately
|
27 |
|
|
// go to the bus, just as all write accesses go to the bus.
|
28 |
|
|
//
|
29 |
|
|
// 3. Read accesses to cacheable memory locations will immediately read
|
30 |
|
|
// from the appropriate cache line. However, since thee valid
|
31 |
|
|
// line will take a second clock to read, it may take up to two
|
32 |
|
|
// clocks to know if the memory was in cache. For this reason,
|
33 |
|
|
// we bypass the test for the last validly accessed cache line.
|
34 |
|
|
//
|
35 |
|
|
// We shall design these read accesses so that reads to the cache
|
36 |
|
|
// may take place concurrently with other writes to the bus.
|
37 |
|
|
//
|
38 |
|
|
// Errors in cache reads will void the entire cache line. For this reason,
|
39 |
|
|
// cache lines must always be of a smaller in size than any associated
|
40 |
|
|
// virtual page size--lest in the middle of reading a page a TLB miss
|
41 |
|
|
// take place referencing only a part of the cacheable page.
|
42 |
|
|
//
|
43 |
|
|
//
|
44 |
|
|
//
|
45 |
|
|
//
|
46 |
|
|
// Creator: Dan Gisselquist, Ph.D.
|
47 |
|
|
// Gisselquist Technology, LLC
|
48 |
|
|
//
|
49 |
|
|
////////////////////////////////////////////////////////////////////////////////
|
50 |
|
|
//
|
51 |
|
|
// Copyright (C) 2016, Gisselquist Technology, LLC
|
52 |
|
|
//
|
53 |
|
|
// This program is free software (firmware): you can redistribute it and/or
|
54 |
|
|
// modify it under the terms of the GNU General Public License as published
|
55 |
|
|
// by the Free Software Foundation, either version 3 of the License, or (at
|
56 |
|
|
// your option) any later version.
|
57 |
|
|
//
|
58 |
|
|
// This program is distributed in the hope that it will be useful, but WITHOUT
|
59 |
|
|
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
|
60 |
|
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
61 |
|
|
// for more details.
|
62 |
|
|
//
|
63 |
|
|
// License: GPL, v3, as defined and found on www.gnu.org,
|
64 |
|
|
// http://www.gnu.org/licenses/gpl.html
|
65 |
|
|
//
|
66 |
|
|
//
|
67 |
|
|
////////////////////////////////////////////////////////////////////////////////
|
68 |
|
|
//
|
69 |
|
|
//
|
70 |
|
|
module dcache(i_clk, i_rst, i_pipe_stb, i_lock,
|
71 |
|
|
i_op, i_addr, i_data, i_oreg,
|
72 |
|
|
o_busy, o_pipe_stalled, o_valid, o_err, o_wreg,o_data,
|
73 |
|
|
o_wb_cyc_gbl, o_wb_cyc_lcl, o_wb_stb_gbl, o_wb_stb_lcl,
|
74 |
|
|
o_wb_we, o_wb_addr, o_wb_data,
|
75 |
|
|
i_wb_ack, i_wb_stall, i_wb_err, i_wb_data);
|
76 |
|
|
parameter LGCACHELEN = 8,
|
77 |
|
|
ADDRESS_WIDTH=32,
|
78 |
|
|
LGNLINES=5, // Log of the number of separate cache lines
|
79 |
|
|
IMPLEMENT_LOCK=0,
|
80 |
|
|
NAUX=5; // # of aux d-wires to keep aligned w/memops
|
81 |
|
|
localparam SDRAM_BIT = 26;
|
82 |
|
|
localparam FLASH_BIT = 22;
|
83 |
|
|
localparam BLKRAM_BIT= 15;
|
84 |
|
|
localparam AW = ADDRESS_WIDTH; // Just for ease of notation below
|
85 |
|
|
localparam CS = LGCACHELEN; // Number of bits in a cache address
|
86 |
|
|
localparam LS = CS-LGNLINES; // Bits to spec position w/in cline
|
87 |
|
|
localparam LGAUX = 3; // log_2 of the maximum number of piped data
|
88 |
|
|
input i_clk, i_rst;
|
89 |
|
|
// Interface from the CPU
|
90 |
|
|
input i_pipe_stb, i_lock;
|
91 |
|
|
input i_op;
|
92 |
|
|
input [31:0] i_addr;
|
93 |
|
|
input [31:0] i_data;
|
94 |
|
|
input [(NAUX-1):0] i_oreg; // Aux data, such as reg to write to
|
95 |
|
|
// Outputs, going back to the CPU
|
96 |
|
|
output wire o_busy, o_pipe_stalled, o_valid, o_err;
|
97 |
|
|
output reg [(NAUX-1):0] o_wreg;
|
98 |
|
|
output reg [31:0] o_data;
|
99 |
|
|
// Wishbone bus master outputs
|
100 |
|
|
output wire o_wb_cyc_gbl, o_wb_cyc_lcl;
|
101 |
|
|
output reg o_wb_stb_gbl, o_wb_stb_lcl;
|
102 |
|
|
output reg o_wb_we;
|
103 |
|
|
output reg [(AW-1):0] o_wb_addr;
|
104 |
|
|
output reg [31:0] o_wb_data;
|
105 |
|
|
// Wishbone bus slave response inputs
|
106 |
|
|
input i_wb_ack, i_wb_stall, i_wb_err;
|
107 |
|
|
input [31:0] i_wb_data;
|
108 |
|
|
|
109 |
|
|
|
110 |
|
|
reg cyc, stb, last_ack, end_of_line, last_line_stb;
|
111 |
|
|
|
112 |
|
|
|
113 |
|
|
reg [((1<<LGNLINES)-1):0] c_v; // One bit per cache line, is it valid?
|
114 |
|
|
reg [(AW-LS-1):0] c_vtags [0:((1<<LGNLINES)-1)];
|
115 |
|
|
reg [31:0] c_mem [0:((1<<CS)-1)];
|
116 |
|
|
// reg [((1<<LGNLINES)-1):0] c_wr; // Is the cache line writable?
|
117 |
|
|
// reg c_wdata;
|
118 |
|
|
// reg c_waddr;
|
119 |
|
|
|
120 |
|
|
// To simplify writing to the cache, and the job of the synthesizer to
|
121 |
|
|
// recognize that a cache write needs to take place, we'll take an extra
|
122 |
|
|
// clock to get there, and use these c_w... registers to capture the
|
123 |
|
|
// data in the meantime.
|
124 |
|
|
reg c_wr;
|
125 |
|
|
reg [31:0] c_wdata;
|
126 |
|
|
reg [(CS-1):0] c_waddr;
|
127 |
|
|
|
128 |
|
|
reg [(AW-LS-1):0] last_tag;
|
129 |
|
|
|
130 |
|
|
|
131 |
|
|
wire [(LGNLINES-1):0] i_cline;
|
132 |
|
|
wire [(CS-1):0] i_caddr;
|
133 |
|
|
wire [(AW-LS-1):0] i_ctag;
|
134 |
|
|
|
135 |
|
|
assign i_cline = i_addr[(CS-1):LS];
|
136 |
|
|
assign i_caddr = i_addr[(CS-1):0];
|
137 |
|
|
assign i_ctag = i_addr[(AW-1):LS];
|
138 |
|
|
|
139 |
|
|
wire cache_miss_inow, w_cachable;
|
140 |
|
|
assign cache_miss_inow = (last_tag != i_addr[31:LS])||(!c_v[i_cline]);
|
141 |
|
|
assign w_cachable = (i_addr[31:30]!=2'b11)&&(!i_lock)&&(
|
142 |
|
|
((SDRAM_BIT>0)&&(i_addr[SDRAM_BIT]))
|
143 |
|
|
||((FLASH_BIT>0)&&(i_addr[FLASH_BIT]))
|
144 |
|
|
||((BLKRAM_BIT>0)&&(i_addr[BLKRAM_BIT])));
|
145 |
|
|
|
146 |
|
|
reg r_cachable, r_svalid, r_dvalid, r_rd, r_cache_miss, r_rvalid;
|
147 |
|
|
reg [(AW-1):0] r_addr;
|
148 |
|
|
reg [31:0] r_idata, r_ddata, r_rdata;
|
149 |
|
|
wire [(LGNLINES-1):0] r_cline;
|
150 |
|
|
wire [(CS-1):0] r_caddr;
|
151 |
|
|
wire [(AW-LS-1):0] r_ctag;
|
152 |
|
|
|
153 |
|
|
assign r_cline = r_addr[(CS-1):LS];
|
154 |
|
|
assign r_caddr = r_addr[(CS-1):0];
|
155 |
|
|
assign r_ctag = r_addr[(AW-1):LS];
|
156 |
|
|
|
157 |
|
|
|
158 |
|
|
reg wr_cstb, r_iv, pipeable_op, non_pipeable_op, in_cache;
|
159 |
|
|
reg [(AW-LS-1):0] r_itag;
|
160 |
|
|
|
161 |
|
|
//
|
162 |
|
|
// The one-clock delayed read values from the cache.
|
163 |
|
|
//
|
164 |
|
|
initial r_rd = 1'b0;
|
165 |
|
|
initial r_cachable = 1'b0;
|
166 |
|
|
initial r_svalid = 1'b0;
|
167 |
|
|
initial r_dvalid = 1'b0;
|
168 |
|
|
always @(posedge i_clk)
|
169 |
|
|
begin
|
170 |
|
|
// The single clock path
|
171 |
|
|
r_idata <= c_mem[i_addr[(CS-1):0]];
|
172 |
|
|
// The valid for the single clock path
|
173 |
|
|
// Only ... we need to wait if we are currently writing
|
174 |
|
|
// to our cache.
|
175 |
|
|
r_svalid<= (!i_op)&&(!cache_miss_inow)&&(w_cachable)
|
176 |
|
|
&&(i_pipe_stb)&&(!c_wr)&&(!wr_cstb);
|
177 |
|
|
|
178 |
|
|
//
|
179 |
|
|
// The two clock in-cache path
|
180 |
|
|
//
|
181 |
|
|
// Some preliminaries that needed to be calculated on the first
|
182 |
|
|
// clock
|
183 |
|
|
if (!o_busy)
|
184 |
|
|
begin
|
185 |
|
|
r_iv <= c_v[i_cline];
|
186 |
|
|
r_itag <= c_vtags[i_cline];
|
187 |
|
|
r_addr <= i_addr;
|
188 |
|
|
r_cachable <= (!i_op)&&(w_cachable)&&(i_pipe_stb);
|
189 |
|
|
end else begin
|
190 |
|
|
r_iv <= c_v[r_cline];
|
191 |
|
|
r_itag <= c_vtags[r_cline];
|
192 |
|
|
end
|
193 |
|
|
// r_idata still contains the right answer
|
194 |
|
|
r_rd <= (i_pipe_stb)&&(!i_op);
|
195 |
|
|
r_ddata <= r_idata;
|
196 |
|
|
// r_itag contains the tag we didn't have available to us on the
|
197 |
|
|
// last clock, r_ctag is a bit select from r_addr containing a
|
198 |
|
|
// one clock delayed address.
|
199 |
|
|
r_dvalid <= (r_itag == r_ctag)&&(r_iv)&&(r_cachable);
|
200 |
|
|
if ((r_itag == r_ctag)&&(r_iv)&&(r_cachable))
|
201 |
|
|
last_tag <= r_ctag;
|
202 |
|
|
|
203 |
|
|
// r_cache miss takes a clock cycle. It is only ever true for
|
204 |
|
|
// something that should be cachable, but isn't in the cache.
|
205 |
|
|
// A cache miss is only true _if_
|
206 |
|
|
// 1. A read was requested
|
207 |
|
|
// 2. It is for a cachable address, AND
|
208 |
|
|
// 3. It isn't in the cache on the first read
|
209 |
|
|
// or the second read
|
210 |
|
|
// 4. The read hasn't yet started to get this address
|
211 |
|
|
r_cache_miss <= ((!cyc)||(o_wb_we))&&(r_cachable)
|
212 |
|
|
// One clock path -- miss
|
213 |
|
|
&&(!r_svalid)
|
214 |
|
|
// Two clock path -- misses as well
|
215 |
|
|
&&(r_rd)&&(!r_svalid)
|
216 |
|
|
&&((r_itag != r_ctag)||(!r_iv));
|
217 |
|
|
|
218 |
|
|
r_rdata <= c_mem[r_addr[(CS-1):0]];
|
219 |
|
|
r_rvalid<= ((i_wb_ack)&&(last_ack));
|
220 |
|
|
end
|
221 |
|
|
|
222 |
|
|
`define DC_IDLE 2'b00
|
223 |
|
|
`define DC_WRITE 2'b01
|
224 |
|
|
`define DC_READS 2'b10
|
225 |
|
|
`define DC_READC 2'b11
|
226 |
|
|
reg [1:0] state;
|
227 |
|
|
|
228 |
|
|
reg [(AW-LS-1):0] wr_wtag, wr_vtag;
|
229 |
|
|
reg [31:0] wr_data;
|
230 |
|
|
reg [(CS-1):0] wr_addr;
|
231 |
|
|
always @(posedge i_clk)
|
232 |
|
|
begin
|
233 |
|
|
// By default, update the cache from the write 1-clock ago
|
234 |
|
|
c_wr <= (wr_cstb)&&(wr_wtag == wr_vtag);
|
235 |
|
|
c_wdata <= wr_data;
|
236 |
|
|
c_waddr <= wr_addr[(CS-1):0];
|
237 |
|
|
|
238 |
|
|
wr_cstb <= 1'b0;
|
239 |
|
|
wr_vtag <= c_vtags[o_wb_addr[(CS-LS-1):0]];
|
240 |
|
|
wr_wtag <= o_wb_addr[(AW-LS-1):0];
|
241 |
|
|
wr_data <= o_wb_data;
|
242 |
|
|
wr_addr <= o_wb_addr[(CS-1):0];
|
243 |
|
|
|
244 |
|
|
|
245 |
|
|
if (LS <= 1)
|
246 |
|
|
end_of_line <= 1'b1;
|
247 |
|
|
else
|
248 |
|
|
end_of_line<=(cyc)&&((c_waddr[(LS-1):1]=={(LS-1){1'b1}})
|
249 |
|
|
||((i_wb_ack)
|
250 |
|
|
&&(c_waddr[(LS-1):0]=={{(LS-2){1'b1}},2'b01})));
|
251 |
|
|
|
252 |
|
|
if (LS <= 1)
|
253 |
|
|
last_line_stb <= 1'b1;
|
254 |
|
|
else
|
255 |
|
|
last_line_stb <= (stb)&&
|
256 |
|
|
((o_wb_addr[(LS-1):1]=={(LS-1){1'b1}})
|
257 |
|
|
||((!i_wb_stall)
|
258 |
|
|
&&(o_wb_addr[(LS-1):0]
|
259 |
|
|
=={{(LS-2){1'b1}},2'b01})));
|
260 |
|
|
|
261 |
|
|
//
|
262 |
|
|
if (state == `DC_IDLE)
|
263 |
|
|
pipeable_op <= 1'b0;
|
264 |
|
|
if (state == `DC_IDLE)
|
265 |
|
|
non_pipeable_op <= 1'b0;
|
266 |
|
|
|
267 |
|
|
|
268 |
|
|
if (state == `DC_IDLE)
|
269 |
|
|
begin
|
270 |
|
|
o_wb_we <= 1'b0;
|
271 |
|
|
o_wb_data <= i_data;
|
272 |
|
|
pipeable_op <= 1'b0;
|
273 |
|
|
non_pipeable_op <= 1'b1;
|
274 |
|
|
|
275 |
|
|
cyc <= 1'b0;
|
276 |
|
|
stb <= 1'b0;
|
277 |
|
|
|
278 |
|
|
r_wb_cyc_gbl <= 1'b0;
|
279 |
|
|
r_wb_cyc_lcl <= 1'b0;
|
280 |
|
|
o_wb_stb_gbl <= 1'b0;
|
281 |
|
|
o_wb_stb_lcl <= 1'b0;
|
282 |
|
|
|
283 |
|
|
in_cache <= (i_op)&&(w_cachable);
|
284 |
|
|
if ((i_pipe_stb)&&(i_op))
|
285 |
|
|
begin // Write operation
|
286 |
|
|
state <= `DC_WRITE;
|
287 |
|
|
o_wb_addr <= i_addr;
|
288 |
|
|
o_wb_we <= 1'b1;
|
289 |
|
|
pipeable_op <= 1'b1;
|
290 |
|
|
|
291 |
|
|
cyc <= 1'b1;
|
292 |
|
|
stb <= 1'b1;
|
293 |
|
|
|
294 |
|
|
r_wb_cyc_gbl <= (i_addr[31:30]!=2'b11);
|
295 |
|
|
r_wb_cyc_lcl <= (i_addr[31:30]==2'b11);
|
296 |
|
|
o_wb_stb_gbl <= (i_addr[31:30]!=2'b11);
|
297 |
|
|
o_wb_stb_lcl <= (i_addr[31:30]==2'b11);
|
298 |
|
|
|
299 |
|
|
end else if (r_cache_miss)
|
300 |
|
|
begin
|
301 |
|
|
state <= `DC_READC;
|
302 |
|
|
o_wb_addr <= { i_ctag, {(LS){1'b0}} };
|
303 |
|
|
non_pipeable_op <= 1'b1;
|
304 |
|
|
|
305 |
|
|
cyc <= 1'b1;
|
306 |
|
|
stb <= 1'b1;
|
307 |
|
|
r_wb_cyc_gbl <= 1'b1;
|
308 |
|
|
o_wb_stb_gbl <= 1'b1;
|
309 |
|
|
end else if ((i_pipe_stb)&&(!w_cachable))
|
310 |
|
|
begin // Read non-cachable memory area
|
311 |
|
|
state <= `DC_READS;
|
312 |
|
|
o_wb_addr <= i_addr;
|
313 |
|
|
pipeable_op <= 1'b1;
|
314 |
|
|
|
315 |
|
|
cyc <= 1'b1;
|
316 |
|
|
stb <= 1'b1;
|
317 |
|
|
r_wb_cyc_gbl <= (i_addr[31:30]!=2'b11);
|
318 |
|
|
r_wb_cyc_lcl <= (i_addr[31:30]==2'b11);
|
319 |
|
|
o_wb_stb_gbl <= (i_addr[31:30]!=2'b11);
|
320 |
|
|
o_wb_stb_lcl <= (i_addr[31:30]==2'b11);
|
321 |
|
|
end // else we stay idle
|
322 |
|
|
|
323 |
|
|
end else if (state == `DC_READC)
|
324 |
|
|
begin
|
325 |
|
|
// We enter here once we have committed to reading
|
326 |
|
|
// data into a cache line.
|
327 |
|
|
if ((stb)&&(!i_wb_stall))
|
328 |
|
|
begin
|
329 |
|
|
stb <= (!last_line_stb);
|
330 |
|
|
o_wb_stb_gbl <= (!last_line_stb);
|
331 |
|
|
o_wb_addr[(LS-1):0] <= o_wb_addr[(LS-1):0]+1'b1;
|
332 |
|
|
end
|
333 |
|
|
|
334 |
|
|
if(stb)
|
335 |
|
|
c_v[o_wb_addr[(CS-LS-1):0]] <= 1'b0;
|
336 |
|
|
|
337 |
|
|
c_wr <= (i_wb_ack);
|
338 |
|
|
c_wdata <= o_wb_data;
|
339 |
|
|
c_waddr <= ((c_wr)?(c_waddr+1'b1):c_waddr);
|
340 |
|
|
|
341 |
|
|
c_vtags[o_wb_addr[(CS-LS-1):0]]<= o_wb_addr[(AW-LS-1):0];
|
342 |
|
|
|
343 |
|
|
if (((i_wb_ack)&&(end_of_line))||(i_wb_err))
|
344 |
|
|
begin
|
345 |
|
|
state <= `DC_IDLE;
|
346 |
|
|
non_pipeable_op <= 1'b0;
|
347 |
|
|
cyc <= 1'b0;
|
348 |
|
|
r_wb_cyc_gbl <= 1'b0;
|
349 |
|
|
r_wb_cyc_lcl <= 1'b0;
|
350 |
|
|
//
|
351 |
|
|
c_v[o_wb_addr[(CS-LS-1):0]] <= i_wb_ack;
|
352 |
|
|
end
|
353 |
|
|
end else if (state == `DC_READS)
|
354 |
|
|
begin
|
355 |
|
|
// We enter here once we have committed to reading
|
356 |
|
|
// data that cannot go into a cache line
|
357 |
|
|
if ((!i_wb_stall)&&(!i_pipe_stb))
|
358 |
|
|
begin
|
359 |
|
|
stb <= 1'b0;
|
360 |
|
|
o_wb_stb_gbl <= 1'b0;
|
361 |
|
|
o_wb_stb_lcl <= 1'b0;
|
362 |
|
|
pipeable_op <= 1'b0;
|
363 |
|
|
end
|
364 |
|
|
|
365 |
|
|
if ((!i_wb_stall)&&(i_pipe_stb))
|
366 |
|
|
o_wb_addr <= i_data;
|
367 |
|
|
|
368 |
|
|
c_wr <= 1'b0;
|
369 |
|
|
|
370 |
|
|
if (((i_wb_ack)&&(last_ack))||(i_wb_err))
|
371 |
|
|
begin
|
372 |
|
|
state <= `DC_IDLE;
|
373 |
|
|
cyc <= 1'b0;
|
374 |
|
|
r_wb_cyc_gbl <= 1'b0;
|
375 |
|
|
r_wb_cyc_lcl <= 1'b0;
|
376 |
|
|
end
|
377 |
|
|
end else if (state == `DC_WRITE)
|
378 |
|
|
begin
|
379 |
|
|
// c_wr <= (c_v[])&&(c_tag[])&&(in_cache)&&(stb);
|
380 |
|
|
c_wdata <= o_wb_data;
|
381 |
|
|
c_waddr <= (state == `DC_IDLE)?i_caddr
|
382 |
|
|
: ((c_wr)?(c_waddr+1'b1):c_waddr);
|
383 |
|
|
|
384 |
|
|
if ((!i_wb_stall)&&(!i_pipe_stb))
|
385 |
|
|
begin
|
386 |
|
|
stb <= 1'b0;
|
387 |
|
|
o_wb_stb_gbl <= 1'b0;
|
388 |
|
|
o_wb_stb_lcl <= 1'b0;
|
389 |
|
|
pipeable_op <= 1'b0;
|
390 |
|
|
end
|
391 |
|
|
|
392 |
|
|
wr_cstb <= (stb)&&(!i_wb_stall)&&(in_cache);
|
393 |
|
|
|
394 |
|
|
if ((stb)&&(!i_wb_stall)&&(i_pipe_stb))
|
395 |
|
|
o_wb_addr <= i_addr;
|
396 |
|
|
if ((stb)&&(!i_wb_stall)&&(i_pipe_stb))
|
397 |
|
|
o_wb_data <= i_data;
|
398 |
|
|
|
399 |
|
|
if (((i_wb_ack)&&(last_ack))||(i_wb_err))
|
400 |
|
|
begin
|
401 |
|
|
state <= `DC_IDLE;
|
402 |
|
|
cyc <= 1'b0;
|
403 |
|
|
r_wb_cyc_gbl <= 1'b0;
|
404 |
|
|
r_wb_cyc_lcl <= 1'b0;
|
405 |
|
|
end
|
406 |
|
|
end
|
407 |
|
|
end
|
408 |
|
|
|
409 |
|
|
//
|
410 |
|
|
// Writes to the cache
|
411 |
|
|
//
|
412 |
|
|
// These have been made as simple as possible. Note that the c_wr
|
413 |
|
|
// line has already been determined, as have the write value and address
|
414 |
|
|
// on the last clock. Further, this structure is defined to match the
|
415 |
|
|
// block RAM design of as many architectures as possible.
|
416 |
|
|
//
|
417 |
|
|
always @(posedge i_clk)
|
418 |
|
|
if (c_wr)
|
419 |
|
|
c_mem[c_waddr] <= c_wdata;
|
420 |
|
|
|
421 |
|
|
//
|
422 |
|
|
// Reads from the cache
|
423 |
|
|
//
|
424 |
|
|
// Some architectures require that all reads be registered. We
|
425 |
|
|
// accomplish that here. Whether or not the result of this read is
|
426 |
|
|
// going to be our output will need to be determined with combinatorial
|
427 |
|
|
// logic on the output.
|
428 |
|
|
//
|
429 |
|
|
reg [31:0] cached_idata, cached_rdata;
|
430 |
|
|
always @(posedge i_clk)
|
431 |
|
|
cached_idata <= c_mem[i_caddr];
|
432 |
|
|
|
433 |
|
|
always @(posedge i_clk)
|
434 |
|
|
cached_rdata <= c_mem[r_caddr];
|
435 |
|
|
|
436 |
|
|
// o_data can come from one of three places:
|
437 |
|
|
// 1. The cache, assuming the data was in the last cache line
|
438 |
|
|
// 2. The cache, second clock, assuming the data was in the cache at all
|
439 |
|
|
// 3. The cache, after filling the cache
|
440 |
|
|
// 4. The wishbone state machine, upon reading the value desired.
|
441 |
|
|
always @(posedge i_clk)
|
442 |
|
|
if (r_svalid)
|
443 |
|
|
o_data <= cached_idata;
|
444 |
|
|
else if ((i_wb_ack)&&(pipeable_op))
|
445 |
|
|
o_data <= i_wb_data;
|
446 |
|
|
else
|
447 |
|
|
o_data <= cached_rdata;
|
448 |
|
|
always @(posedge i_clk)
|
449 |
|
|
o_valid <= (r_svalid)||((i_wb_ack)&&(pipeable_op))
|
450 |
|
|
||(r_dvalid)||(r_rvalid);
|
451 |
|
|
always @(posedge i_clk)
|
452 |
|
|
o_err <= (cyc)&&(i_wb_err);
|
453 |
|
|
|
454 |
|
|
assign o_busy = (state != `DC_IDLE);
|
455 |
|
|
|
456 |
|
|
|
457 |
|
|
//
|
458 |
|
|
// Handle our auxilliary data lines.
|
459 |
|
|
//
|
460 |
|
|
// These just go into a FIFO upon request, and then get fed back out
|
461 |
|
|
// upon completion of an OP.
|
462 |
|
|
//
|
463 |
|
|
// These are currently designed for handling bursts of writes or
|
464 |
|
|
// non-cachable reads.
|
465 |
|
|
//
|
466 |
|
|
// A very similar structure will be used once we switch to using an
|
467 |
|
|
// MMU, in order to make certain memory operations are synchronous
|
468 |
|
|
// enough to deal with bus errors.
|
469 |
|
|
//
|
470 |
|
|
reg [(LGAUX-1):0] aux_head, aux_tail;
|
471 |
|
|
reg [(NAUX-1):0] aux_fifo [0:((1<<LGAUX)-1)];
|
472 |
|
|
initial aux_head = 0;
|
473 |
|
|
initial aux_tail = 0;
|
474 |
|
|
always @(posedge i_clk)
|
475 |
|
|
begin
|
476 |
|
|
if ((i_rst)||(i_wb_err))
|
477 |
|
|
aux_head <= 0;
|
478 |
|
|
else if ((i_pipe_stb)&&(!o_busy))
|
479 |
|
|
aux_head <= aux_head + 1'b1;
|
480 |
|
|
aux_fifo[aux_head] <= i_oreg;
|
481 |
|
|
end
|
482 |
|
|
always @(posedge i_clk)
|
483 |
|
|
begin
|
484 |
|
|
if ((i_rst)||(i_wb_err))
|
485 |
|
|
aux_tail <= 0;
|
486 |
|
|
else if (o_valid) // ||(aux_tail[WBIT])&&(no-mmu-error)
|
487 |
|
|
aux_tail <= aux_tail + 1'b1;
|
488 |
|
|
o_wreg <= aux_fifo[aux_tail];
|
489 |
|
|
end
|
490 |
|
|
|
491 |
|
|
//
|
492 |
|
|
// We can use our FIFO addresses to pre-calculate when an ACK is going
|
493 |
|
|
// to be the last_noncachable_ack.
|
494 |
|
|
|
495 |
|
|
|
496 |
|
|
assign o_pipe_stalled=((pipeable_op)&&(i_wb_stall))||(non_pipeable_op);
|
497 |
|
|
// pipeable_op must become zero when stb goes low
|
498 |
|
|
|
499 |
|
|
always @(posedge i_clk)
|
500 |
|
|
begin
|
501 |
|
|
lock_gbl <= (i_lock)&&((r_wb_cyc_gbl)||(lock_gbl));
|
502 |
|
|
lock_lcl <= (i_lock)&&((r_wb_cyc_lcl)||(lock_lcl));
|
503 |
|
|
end
|
504 |
|
|
|
505 |
|
|
assign o_wb_cyc_gbl = (r_wb_cyc_gbl)||(lock_gbl);
|
506 |
|
|
assign o_wb_cyc_lcl = (r_wb_cyc_lcl)||(lock_lcl);
|
507 |
|
|
endmodule
|