OpenCores
URL https://opencores.org/ocsvn/zipcpu/zipcpu/trunk

Subversion Repositories zipcpu

[/] [zipcpu/] [trunk/] [rtl/] [core/] [dcache.v] - Blame information for rev 207

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 201 dgisselq
////////////////////////////////////////////////////////////////////////////////
2
//
3
// Filename:    dcache.v
4
//
5
// Project:     Zip CPU -- a small, lightweight, RISC CPU soft core
6
//
7
// Purpose:     To provide a simple data cache for the ZipCPU.  The cache is
8
//              designed to be a drop in replacement for the pipememm memory
9
//      unit currently existing within the ZipCPU.  The goal of this unit is
10
//      to achieve single cycle read access to any memory in the last cache line
11
//      used, or two cycle access to any memory currently in the cache.
12
//
13
//      The cache separates between four types of accesses, one write and three
14
//      read access types.  The read accesses are split between those that are
15
//      not cacheable, those that are in the cache, and those that are not.
16
//
17
//      1. Write accesses always create writes to the bus.  For these reasons,
18
//              these may always be considered cache misses.
19
//
20
//              Writes to memory locations within the cache must also update
21
//              cache memory immediately, to keep the cache in synch.
22
//
23
//              It is our goal to be able to maintain single cycle write
24
//              accesses for memory bursts.
25
//
26
//      2. Read access to non-cacheable memory locations will also immediately
27
//              go to the bus, just as all write accesses go to the bus.
28
//
29
//      3. Read accesses to cacheable memory locations will immediately read
30
//              from the appropriate cache line.  However, since thee valid
31
//              line will take a second clock to read, it may take up to two
32
//              clocks to know if the memory was in cache.  For this reason,
33
//              we bypass the test for the last validly accessed cache line.
34
//
35
//              We shall design these read accesses so that reads to the cache
36
//              may take place concurrently with other writes to the bus.
37
//
38
//      Errors in cache reads will void the entire cache line.  For this reason,
39
//      cache lines must always be of a smaller in size than any associated
40
//      virtual page size--lest in the middle of reading a page a TLB miss
41
//      take place referencing only a part of the cacheable page.
42
//
43
//      
44
//
45
//
46
// Creator:     Dan Gisselquist, Ph.D.
47
//              Gisselquist Technology, LLC
48
//
49
////////////////////////////////////////////////////////////////////////////////
50
//
51
// Copyright (C) 2016, Gisselquist Technology, LLC
52
//
53
// This program is free software (firmware): you can redistribute it and/or
54
// modify it under the terms of  the GNU General Public License as published
55
// by the Free Software Foundation, either version 3 of the License, or (at
56
// your option) any later version.
57
//
58
// This program is distributed in the hope that it will be useful, but WITHOUT
59
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
60
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
61
// for more details.
62
//
63
// License:     GPL, v3, as defined and found on www.gnu.org,
64
//              http://www.gnu.org/licenses/gpl.html
65
//
66
//
67
////////////////////////////////////////////////////////////////////////////////
68
//
69
//
70
module  dcache(i_clk, i_rst, i_pipe_stb, i_lock,
71
                i_op, i_addr, i_data, i_oreg,
72
                        o_busy, o_pipe_stalled, o_valid, o_err, o_wreg,o_data,
73
                o_wb_cyc_gbl, o_wb_cyc_lcl, o_wb_stb_gbl, o_wb_stb_lcl,
74
                        o_wb_we, o_wb_addr, o_wb_data,
75
                i_wb_ack, i_wb_stall, i_wb_err, i_wb_data);
76
        parameter       LGCACHELEN = 8,
77
                        ADDRESS_WIDTH=32,
78
                        LGNLINES=5, // Log of the number of separate cache lines
79
                        IMPLEMENT_LOCK=0,
80
                        NAUX=5; // # of aux d-wires to keep aligned w/memops
81
        localparam      SDRAM_BIT = 26;
82
        localparam      FLASH_BIT = 22;
83
        localparam      BLKRAM_BIT= 15;
84
        localparam      AW = ADDRESS_WIDTH; // Just for ease of notation below
85
        localparam      CS = LGCACHELEN; // Number of bits in a cache address
86
        localparam      LS = CS-LGNLINES; // Bits to spec position w/in cline
87
        localparam      LGAUX = 3; // log_2 of the maximum number of piped data 
88
        input                   i_clk, i_rst;
89
        // Interface from the CPU
90
        input                   i_pipe_stb, i_lock;
91
        input                   i_op;
92
        input   [31:0]           i_addr;
93
        input   [31:0]           i_data;
94
        input   [(NAUX-1):0]     i_oreg; // Aux data, such as reg to write to
95
        // Outputs, going back to the CPU
96
        output  wire            o_busy, o_pipe_stalled, o_valid, o_err;
97
        output reg [(NAUX-1):0]  o_wreg;
98
        output  reg     [31:0]   o_data;
99
        // Wishbone bus master outputs
100
        output  wire            o_wb_cyc_gbl, o_wb_cyc_lcl;
101
        output  reg             o_wb_stb_gbl, o_wb_stb_lcl;
102
        output  reg             o_wb_we;
103
        output  reg     [(AW-1):0]       o_wb_addr;
104
        output  reg     [31:0]           o_wb_data;
105
        // Wishbone bus slave response inputs
106
        input                           i_wb_ack, i_wb_stall, i_wb_err;
107
        input           [31:0]           i_wb_data;
108
 
109
 
110
        reg     cyc, stb, last_ack, end_of_line, last_line_stb;
111
 
112
 
113
        reg     [((1<<LGNLINES)-1):0] c_v;       // One bit per cache line, is it valid?
114
        reg     [(AW-LS-1):0]    c_vtags [0:((1<<LGNLINES)-1)];
115
        reg     [31:0]           c_mem   [0:((1<<CS)-1)];
116
        // reg  [((1<<LGNLINES)-1):0]   c_wr; // Is the cache line writable?
117
        // reg  c_wdata;
118
        // reg  c_waddr;
119
 
120
        // To simplify writing to the cache, and the job of the synthesizer to
121
        // recognize that a cache write needs to take place, we'll take an extra
122
        // clock to get there, and use these c_w... registers to capture the
123
        // data in the meantime.
124
        reg                     c_wr;
125
        reg     [31:0]           c_wdata;
126
        reg     [(CS-1):0]       c_waddr;
127
 
128
        reg     [(AW-LS-1):0]    last_tag;
129
 
130
 
131
        wire    [(LGNLINES-1):0] i_cline;
132
        wire    [(CS-1):0]       i_caddr;
133
        wire    [(AW-LS-1):0]    i_ctag;
134
 
135
        assign  i_cline = i_addr[(CS-1):LS];
136
        assign  i_caddr = i_addr[(CS-1):0];
137
        assign  i_ctag  = i_addr[(AW-1):LS];
138
 
139
        wire    cache_miss_inow, w_cachable;
140
        assign  cache_miss_inow = (last_tag != i_addr[31:LS])||(!c_v[i_cline]);
141
        assign  w_cachable = (i_addr[31:30]!=2'b11)&&(!i_lock)&&(
142
                                ((SDRAM_BIT>0)&&(i_addr[SDRAM_BIT]))
143
                                ||((FLASH_BIT>0)&&(i_addr[FLASH_BIT]))
144
                                ||((BLKRAM_BIT>0)&&(i_addr[BLKRAM_BIT])));
145
 
146
        reg     r_cachable, r_svalid, r_dvalid, r_rd, r_cache_miss, r_rvalid;
147
        reg     [(AW-1):0]       r_addr;
148
        reg     [31:0]           r_idata, r_ddata, r_rdata;
149
        wire    [(LGNLINES-1):0] r_cline;
150
        wire    [(CS-1):0]       r_caddr;
151
        wire    [(AW-LS-1):0]    r_ctag;
152
 
153
        assign  r_cline = r_addr[(CS-1):LS];
154
        assign  r_caddr = r_addr[(CS-1):0];
155
        assign  r_ctag  = r_addr[(AW-1):LS];
156
 
157
 
158
        reg     wr_cstb, r_iv, pipeable_op, non_pipeable_op, in_cache;
159
        reg     [(AW-LS-1):0]    r_itag;
160
 
161
        //
162
        // The one-clock delayed read values from the cache.
163
        //
164
        initial r_rd = 1'b0;
165
        initial r_cachable = 1'b0;
166
        initial r_svalid = 1'b0;
167
        initial r_dvalid = 1'b0;
168
        always @(posedge i_clk)
169
        begin
170
                // The single clock path
171
                r_idata <= c_mem[i_addr[(CS-1):0]];
172
                // The valid for the single clock path
173
                //      Only ... we need to wait if we are currently writing
174
                //      to our cache.
175
                r_svalid<= (!i_op)&&(!cache_miss_inow)&&(w_cachable)
176
                                &&(i_pipe_stb)&&(!c_wr)&&(!wr_cstb);
177
 
178
                //
179
                // The two clock in-cache path
180
                //
181
                // Some preliminaries that needed to be calculated on the first
182
                // clock
183
                if (!o_busy)
184
                begin
185
                        r_iv   <= c_v[i_cline];
186
                        r_itag <= c_vtags[i_cline];
187
                        r_addr <= i_addr;
188
                        r_cachable <= (!i_op)&&(w_cachable)&&(i_pipe_stb);
189
                end else begin
190
                        r_iv   <= c_v[r_cline];
191
                        r_itag <= c_vtags[r_cline];
192
                end
193
                // r_idata still contains the right answer
194
                r_rd <= (i_pipe_stb)&&(!i_op);
195
                r_ddata  <= r_idata;
196
                // r_itag contains the tag we didn't have available to us on the
197
                // last clock, r_ctag is a bit select from r_addr containing a
198
                // one clock delayed address.
199
                r_dvalid <= (r_itag == r_ctag)&&(r_iv)&&(r_cachable);
200
                if ((r_itag == r_ctag)&&(r_iv)&&(r_cachable))
201
                        last_tag <= r_ctag;
202
 
203
                // r_cache miss takes a clock cycle.  It is only ever true for
204
                // something that should be cachable, but isn't in the cache.
205
                // A cache miss is only true _if_
206
                // 1. A read was requested
207
                // 2. It is for a cachable address, AND
208
                // 3. It isn't in the cache on the first read
209
                //      or the second read
210
                // 4. The read hasn't yet started to get this address
211
                r_cache_miss <= ((!cyc)||(o_wb_we))&&(r_cachable)
212
                                // One clock path -- miss
213
                                &&(!r_svalid)
214
                                // Two clock path -- misses as well
215
                                &&(r_rd)&&(!r_svalid)
216
                                &&((r_itag != r_ctag)||(!r_iv));
217
 
218
                r_rdata <= c_mem[r_addr[(CS-1):0]];
219
                r_rvalid<= ((i_wb_ack)&&(last_ack));
220
        end
221
 
222
`define DC_IDLE         2'b00
223
`define DC_WRITE        2'b01
224
`define DC_READS        2'b10
225
`define DC_READC        2'b11
226
        reg     [1:0]    state;
227
 
228
        reg     [(AW-LS-1):0]    wr_wtag, wr_vtag;
229
        reg     [31:0]           wr_data;
230
        reg     [(CS-1):0]       wr_addr;
231
        always @(posedge i_clk)
232
        begin
233
                // By default, update the cache from the write 1-clock ago
234
                c_wr <= (wr_cstb)&&(wr_wtag == wr_vtag);
235
                c_wdata <= wr_data;
236
                c_waddr <= wr_addr[(CS-1):0];
237
 
238
                wr_cstb <= 1'b0;
239
                wr_vtag <= c_vtags[o_wb_addr[(CS-LS-1):0]];
240
                wr_wtag <= o_wb_addr[(AW-LS-1):0];
241
                wr_data <= o_wb_data;
242
                wr_addr <= o_wb_addr[(CS-1):0];
243
 
244
 
245
                if (LS <= 1)
246
                        end_of_line <= 1'b1;
247
                else
248
                        end_of_line<=(cyc)&&((c_waddr[(LS-1):1]=={(LS-1){1'b1}})
249
                                ||((i_wb_ack)
250
                                &&(c_waddr[(LS-1):0]=={{(LS-2){1'b1}},2'b01})));
251
 
252
                if (LS <= 1)
253
                        last_line_stb <= 1'b1;
254
                else
255
                        last_line_stb <= (stb)&&
256
                                ((o_wb_addr[(LS-1):1]=={(LS-1){1'b1}})
257
                                ||((!i_wb_stall)
258
                                        &&(o_wb_addr[(LS-1):0]
259
                                                =={{(LS-2){1'b1}},2'b01})));
260
 
261
                //
262
                if (state == `DC_IDLE)
263
                        pipeable_op <= 1'b0;
264
                if (state == `DC_IDLE)
265
                        non_pipeable_op <= 1'b0;
266
 
267
 
268
                if (state == `DC_IDLE)
269
                begin
270
                        o_wb_we <= 1'b0;
271
                        o_wb_data <= i_data;
272
                        pipeable_op <= 1'b0;
273
                        non_pipeable_op <= 1'b1;
274
 
275
                        cyc <= 1'b0;
276
                        stb <= 1'b0;
277
 
278
                        r_wb_cyc_gbl <= 1'b0;
279
                        r_wb_cyc_lcl <= 1'b0;
280
                        o_wb_stb_gbl <= 1'b0;
281
                        o_wb_stb_lcl <= 1'b0;
282
 
283
                        in_cache <= (i_op)&&(w_cachable);
284
                        if ((i_pipe_stb)&&(i_op))
285
                        begin // Write  operation
286
                                state <= `DC_WRITE;
287
                                o_wb_addr <= i_addr;
288
                                o_wb_we <= 1'b1;
289
                                pipeable_op <= 1'b1;
290
 
291
                                cyc <= 1'b1;
292
                                stb <= 1'b1;
293
 
294
                                r_wb_cyc_gbl <= (i_addr[31:30]!=2'b11);
295
                                r_wb_cyc_lcl <= (i_addr[31:30]==2'b11);
296
                                o_wb_stb_gbl <= (i_addr[31:30]!=2'b11);
297
                                o_wb_stb_lcl <= (i_addr[31:30]==2'b11);
298
 
299
                        end else if (r_cache_miss)
300
                        begin
301
                                state <= `DC_READC;
302
                                o_wb_addr <= { i_ctag, {(LS){1'b0}} };
303
                                non_pipeable_op <= 1'b1;
304
 
305
                                cyc <= 1'b1;
306
                                stb <= 1'b1;
307
                                r_wb_cyc_gbl <= 1'b1;
308
                                o_wb_stb_gbl <= 1'b1;
309
                        end else if ((i_pipe_stb)&&(!w_cachable))
310
                        begin // Read non-cachable memory area
311
                                state <= `DC_READS;
312
                                o_wb_addr <= i_addr;
313
                                pipeable_op <= 1'b1;
314
 
315
                                cyc <= 1'b1;
316
                                stb <= 1'b1;
317
                                r_wb_cyc_gbl <= (i_addr[31:30]!=2'b11);
318
                                r_wb_cyc_lcl <= (i_addr[31:30]==2'b11);
319
                                o_wb_stb_gbl <= (i_addr[31:30]!=2'b11);
320
                                o_wb_stb_lcl <= (i_addr[31:30]==2'b11);
321
                        end // else we stay idle
322
 
323
                end else if (state == `DC_READC)
324
                begin
325
                        // We enter here once we have committed to reading
326
                        // data into a cache line.
327
                        if ((stb)&&(!i_wb_stall))
328
                        begin
329
                                stb <= (!last_line_stb);
330
                                o_wb_stb_gbl <= (!last_line_stb);
331
                                o_wb_addr[(LS-1):0] <= o_wb_addr[(LS-1):0]+1'b1;
332
                        end
333
 
334
                        if(stb)
335
                                c_v[o_wb_addr[(CS-LS-1):0]] <= 1'b0;
336
 
337
                        c_wr <= (i_wb_ack);
338
                        c_wdata <= o_wb_data;
339
                        c_waddr <= ((c_wr)?(c_waddr+1'b1):c_waddr);
340
 
341
                        c_vtags[o_wb_addr[(CS-LS-1):0]]<= o_wb_addr[(AW-LS-1):0];
342
 
343
                        if (((i_wb_ack)&&(end_of_line))||(i_wb_err))
344
                        begin
345
                                state           <= `DC_IDLE;
346
                                non_pipeable_op <= 1'b0;
347
                                cyc <= 1'b0;
348
                                r_wb_cyc_gbl <= 1'b0;
349
                                r_wb_cyc_lcl <= 1'b0;
350
                                //
351
                                c_v[o_wb_addr[(CS-LS-1):0]] <= i_wb_ack;
352
                        end
353
                end else if (state == `DC_READS)
354
                begin
355
                        // We enter here once we have committed to reading
356
                        // data that cannot go into a cache line
357
                        if ((!i_wb_stall)&&(!i_pipe_stb))
358
                        begin
359
                                stb <= 1'b0;
360
                                o_wb_stb_gbl <= 1'b0;
361
                                o_wb_stb_lcl <= 1'b0;
362
                                pipeable_op <= 1'b0;
363
                        end
364
 
365
                        if ((!i_wb_stall)&&(i_pipe_stb))
366
                                o_wb_addr <= i_data;
367
 
368
                        c_wr <= 1'b0;
369
 
370
                        if (((i_wb_ack)&&(last_ack))||(i_wb_err))
371
                        begin
372
                                state        <= `DC_IDLE;
373
                                cyc          <= 1'b0;
374
                                r_wb_cyc_gbl <= 1'b0;
375
                                r_wb_cyc_lcl <= 1'b0;
376
                        end
377
                end else if (state == `DC_WRITE)
378
                begin
379
                        // c_wr    <= (c_v[])&&(c_tag[])&&(in_cache)&&(stb);
380
                        c_wdata <= o_wb_data;
381
                        c_waddr <= (state == `DC_IDLE)?i_caddr
382
                                : ((c_wr)?(c_waddr+1'b1):c_waddr);
383
 
384
                        if ((!i_wb_stall)&&(!i_pipe_stb))
385
                        begin
386
                                stb          <= 1'b0;
387
                                o_wb_stb_gbl <= 1'b0;
388
                                o_wb_stb_lcl <= 1'b0;
389
                                pipeable_op  <= 1'b0;
390
                        end
391
 
392
                        wr_cstb  <= (stb)&&(!i_wb_stall)&&(in_cache);
393
 
394
                        if ((stb)&&(!i_wb_stall)&&(i_pipe_stb))
395
                                o_wb_addr <= i_addr;
396
                        if ((stb)&&(!i_wb_stall)&&(i_pipe_stb))
397
                                o_wb_data <= i_data;
398
 
399
                        if (((i_wb_ack)&&(last_ack))||(i_wb_err))
400
                        begin
401
                                state        <= `DC_IDLE;
402
                                cyc          <= 1'b0;
403
                                r_wb_cyc_gbl <= 1'b0;
404
                                r_wb_cyc_lcl <= 1'b0;
405
                        end
406
                end
407
        end
408
 
409
        //
410
        // Writes to the cache
411
        //
412
        // These have been made as simple as possible.  Note that the c_wr
413
        // line has already been determined, as have the write value and address
414
        // on the last clock.  Further, this structure is defined to match the
415
        // block RAM design of as many architectures as possible.
416
        // 
417
        always @(posedge i_clk)
418
                if (c_wr)
419
                        c_mem[c_waddr] <= c_wdata;
420
 
421
        //
422
        // Reads from the cache
423
        //
424
        // Some architectures require that all reads be registered.  We
425
        // accomplish that here.  Whether or not the result of this read is
426
        // going to be our output will need to be determined with combinatorial
427
        // logic on the output.
428
        //
429
        reg     [31:0]   cached_idata, cached_rdata;
430
        always @(posedge i_clk)
431
                cached_idata <= c_mem[i_caddr];
432
 
433
        always @(posedge i_clk)
434
                cached_rdata <= c_mem[r_caddr];
435
 
436
// o_data can come from one of three places:
437
// 1. The cache, assuming the data was in the last cache line
438
// 2. The cache, second clock, assuming the data was in the cache at all
439
// 3. The cache, after filling the cache
440
// 4. The wishbone state machine, upon reading the value desired.
441
        always @(posedge i_clk)
442
                if (r_svalid)
443
                        o_data <= cached_idata;
444
                else if ((i_wb_ack)&&(pipeable_op))
445
                        o_data <= i_wb_data;
446
                else
447
                        o_data <= cached_rdata;
448
        always @(posedge i_clk)
449
                o_valid <= (r_svalid)||((i_wb_ack)&&(pipeable_op))
450
                                ||(r_dvalid)||(r_rvalid);
451
        always @(posedge i_clk)
452
                o_err <= (cyc)&&(i_wb_err);
453
 
454
        assign  o_busy = (state != `DC_IDLE);
455
 
456
 
457
        //
458
        // Handle our auxilliary data lines.
459
        //
460
        // These just go into a FIFO upon request, and then get fed back out
461
        // upon completion of an OP.
462
        //
463
        // These are currently designed for handling bursts of writes or
464
        // non-cachable  reads.
465
        //
466
        // A very similar structure will be used once we switch to using an
467
        // MMU, in order to make certain memory operations are synchronous
468
        // enough to deal with bus errors.
469
        //
470
        reg     [(LGAUX-1):0]    aux_head, aux_tail;
471
        reg     [(NAUX-1):0]     aux_fifo [0:((1<<LGAUX)-1)];
472
        initial aux_head = 0;
473
        initial aux_tail = 0;
474
        always @(posedge i_clk)
475
        begin
476
                if ((i_rst)||(i_wb_err))
477
                        aux_head <= 0;
478
                else if ((i_pipe_stb)&&(!o_busy))
479
                        aux_head <= aux_head + 1'b1;
480
                aux_fifo[aux_head] <= i_oreg;
481
        end
482
        always @(posedge i_clk)
483
        begin
484
                if ((i_rst)||(i_wb_err))
485
                        aux_tail <= 0;
486
                else if (o_valid) // ||(aux_tail[WBIT])&&(no-mmu-error)
487
                        aux_tail <= aux_tail + 1'b1;
488
                o_wreg <= aux_fifo[aux_tail];
489
        end
490
 
491
        //
492
        // We can use our FIFO addresses to pre-calculate when an ACK is going
493
        // to be the last_noncachable_ack.
494
 
495
 
496
        assign o_pipe_stalled=((pipeable_op)&&(i_wb_stall))||(non_pipeable_op);
497
        // pipeable_op must become zero when stb goes low
498
 
499
        always @(posedge i_clk)
500
        begin
501
                lock_gbl <= (i_lock)&&((r_wb_cyc_gbl)||(lock_gbl));
502
                lock_lcl <= (i_lock)&&((r_wb_cyc_lcl)||(lock_lcl));
503
        end
504
 
505
        assign  o_wb_cyc_gbl = (r_wb_cyc_gbl)||(lock_gbl);
506
        assign  o_wb_cyc_lcl = (r_wb_cyc_lcl)||(lock_lcl);
507
endmodule

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.