URL
https://opencores.org/ocsvn/openarty/openarty/trunk
Subversion Repositories openarty
[/] [openarty/] [trunk/] [rtl/] [cpu/] [fastcache.v] - Rev 42
Go to most recent revision | Compare with Previous | Blame | View Log
//////////////////////////////////////////////////////////////////////////////// // // Filename: fastcache.v // // Project: Zip CPU -- a small, lightweight, RISC CPU soft core // // Purpose: Keeping our CPU fed with instructions, at one per clock and // with no stalls. An unusual feature of this cache is the // requirement that the entire cache may be cleared (if necessary). // // Creator: Dan Gisselquist, Ph.D. // Gisselquist Technology, LLC // //////////////////////////////////////////////////////////////////////////////// // // Copyright (C) 2015, Gisselquist Technology, LLC // // This program is free software (firmware): you can redistribute it and/or // modify it under the terms of the GNU General Public License as published // by the Free Software Foundation, either version 3 of the License, or (at // your option) any later version. // // This program is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // // License: GPL, v3, as defined and found on www.gnu.org, // http://www.gnu.org/licenses/gpl.html // // //////////////////////////////////////////////////////////////////////////////// // module fastcache(i_clk, i_rst, i_new_pc, i_clear_cache, // i_early_branch, i_from_addr, i_stall_n, i_pc, o_i, o_pc, o_v, o_wb_cyc, o_wb_stb, o_wb_we, o_wb_addr, o_wb_data, i_wb_ack, i_wb_stall, i_wb_err, i_wb_data, o_illegal); parameter LGCACHELEN = 8, ADDRESS_WIDTH=24, CACHELEN=(1<<LGCACHELEN), BUSW=32, AW=ADDRESS_WIDTH, CW=LGCACHELEN, PW=LGCACHELEN-5; input i_clk, i_rst, i_new_pc; input i_clear_cache; input i_stall_n; input [(AW-1):0] i_pc; output wire [(BUSW-1):0] o_i; output wire [(AW-1):0] o_pc; output wire o_v; // output reg o_wb_cyc, o_wb_stb; output wire o_wb_we; output reg [(AW-1):0] o_wb_addr; output wire [(BUSW-1):0] o_wb_data; // input i_wb_ack, i_wb_stall, i_wb_err; input [(BUSW-1):0] i_wb_data; // output reg o_illegal; // Fixed bus outputs: we read from the bus only, never write. // Thus the output data is ... irrelevant and don't care. We set it // to zero just to set it to something. assign o_wb_we = 1'b0; assign o_wb_data = 0; wire r_v; reg [(BUSW-1):0] cache [0:((1<<CW)-1)]; reg [(AW-CW-1):0] tags [0:((1<<(CW-PW))-1)]; reg [((1<<(CW-PW))-1):0] vmask; reg [(AW-1):0] lastpc; reg [(CW-1):0] rdaddr; reg [(AW-1):CW] tagvalipc, tagvallst; wire [(AW-1):CW] tagval; wire [(AW-1):PW] lasttag; reg illegal_valid; reg [(AW-1):PW] illegal_cache; // initial o_i = 32'h76_00_00_00; // A NOOP instruction // initial o_pc = 0; reg [(BUSW-1):0] r_pc_cache, r_last_cache; reg [(AW-1):0] r_pc, r_lastpc; reg isrc; always @(posedge i_clk) if (~r_v) isrc <= 1'b0; else if ((i_stall_n)||(i_new_pc)) isrc <= 1'b1; always @(posedge i_clk) r_pc_cache <= cache[i_pc[(CW-1):0]]; always @(posedge i_clk) r_last_cache <= cache[lastpc[(CW-1):0]]; always @(posedge i_clk) r_pc <= i_pc; always @(posedge i_clk) r_lastpc <= lastpc; assign o_pc = (isrc) ? r_pc : r_lastpc; assign o_i = (isrc) ? r_pc_cache : r_last_cache; reg tagsrc; always @(posedge i_clk) // It may be possible to recover a clock once the cache line // has been filled, but our prior attempt to do so has lead // to a race condition, so we keep this logic simple. if (((r_v)&&(i_stall_n))||(i_clear_cache)||(i_new_pc)) tagsrc <= 1'b1; else tagsrc <= 1'b0; initial tagvalipc = 0; always @(posedge i_clk) tagvalipc <= tags[i_pc[(CW-1):PW]]; initial tagvallst = 0; always @(posedge i_clk) tagvallst <= tags[lastpc[(CW-1):PW]]; assign tagval = (tagsrc)?tagvalipc : tagvallst; // i_pc will only increment when everything else isn't stalled, thus // we can set it without worrying about that. Doing this enables // us to work in spite of stalls. For example, if the next address // isn't valid, but the decoder is stalled, get the next address // anyway. initial lastpc = 0; always @(posedge i_clk) if (((r_v)&&(i_stall_n))||(i_clear_cache)||(i_new_pc)) lastpc <= i_pc; assign lasttag = lastpc[(AW-1):PW]; // initial lasttag = 0; // always @(posedge i_clk) // if (((r_v)&&(i_stall_n))||(i_clear_cache)||(i_new_pc)) // lasttag <= i_pc[(AW-1):PW]; wire w_v_from_pc, w_v_from_last; assign w_v_from_pc = ((i_pc[(AW-1):PW] == lasttag) &&(tagvalipc == i_pc[(AW-1):CW]) &&(vmask[i_pc[(CW-1):PW]])); assign w_v_from_last = ( //(lastpc[(AW-1):PW] == lasttag)&& (tagvallst == lastpc[(AW-1):CW]) &&(vmask[lastpc[(CW-1):PW]])); reg [1:0] delay; initial delay = 2'h3; reg rvsrc; always @(posedge i_clk) if ((i_rst)||(i_clear_cache)||(i_new_pc)||((r_v)&&(i_stall_n))) begin // r_v <= r_v_from_pc; rvsrc <= 1'b1; delay <= 2'h2; end else if (~r_v) begin // Otherwise, r_v was true and we were // stalled, hence only if ~r_v rvsrc <= 1'b0; if (o_wb_cyc) delay <= 2'h2; else if (delay != 0) delay <= delay + 2'b11; // i.e. delay -= 1; end reg r_v_from_pc, r_v_from_last; always @(posedge i_clk) r_v_from_pc <= w_v_from_pc; always @(posedge i_clk) r_v_from_last <= w_v_from_last; assign r_v = ((rvsrc)?(r_v_from_pc):(r_v_from_last)); assign o_v = ((rvsrc)?(r_v_from_pc):(r_v_from_last))&&(~i_new_pc); reg last_ack; initial last_ack <= 1'b0; always @(posedge i_clk) last_ack <= (o_wb_cyc)&&( (rdaddr[(PW-1):1]=={(PW){1'b1}}) &&((rdaddr[0])||(i_wb_ack))); reg needload; initial needload = 1'b0; always @(posedge i_clk) needload <= ((~r_v)&&(delay==0) &&((tagvallst != lastpc[(AW-1):CW]) ||(~vmask[lastpc[(CW-1):PW]])) &&((~illegal_valid) ||(lastpc[(AW-1):PW] != illegal_cache))); reg last_addr; initial last_addr = 1'b0; always @(posedge i_clk) last_addr <= (o_wb_cyc)&&(o_wb_addr[(PW-2):1] == {(PW-1){1'b1}}) &&((~i_wb_stall)|(o_wb_addr[0])); initial o_wb_cyc = 1'b0; initial o_wb_stb = 1'b0; initial o_wb_addr = {(AW){1'b0}}; initial rdaddr = 0; always @(posedge i_clk) if ((i_rst)||(i_clear_cache)) begin o_wb_cyc <= 1'b0; o_wb_stb <= 1'b0; end else if (o_wb_cyc) begin if (i_wb_err) o_wb_stb <= 1'b0; else if ((o_wb_stb)&&(~i_wb_stall)&&(last_addr)) o_wb_stb <= 1'b0; if (((i_wb_ack)&&(last_ack))||(i_wb_err)) o_wb_cyc <= 1'b0; // else if (rdaddr[(PW-1):1] == {(PW-1){1'b1}}) // tags[lastpc[(CW-1):PW]] <= lastpc[(AW-1):CW]; end else if (needload) begin o_wb_cyc <= 1'b1; o_wb_stb <= 1'b1; end always @(posedge i_clk) if (o_wb_cyc) // &&(i_wb_ack) tags[o_wb_addr[(CW-1):PW]] <= o_wb_addr[(AW-1):CW]; always @(posedge i_clk) if ((o_wb_cyc)&&(i_wb_ack)) rdaddr <= rdaddr + 1; else if (~o_wb_cyc) rdaddr <= { lastpc[(CW-1):PW], {(PW){1'b0}} }; always @(posedge i_clk) if ((o_wb_stb)&&(~i_wb_stall)&&(~last_addr)) o_wb_addr[(PW-1):0] <= o_wb_addr[(PW-1):0]+1; else if (~o_wb_cyc) o_wb_addr <= { lastpc[(AW-1):PW], {(PW){1'b0}} }; // Can't initialize an array, so leave cache uninitialized // We'll also never get an ack without sys being active, so skip // that check. Or rather, let's just use o_wb_cyc instead. This // will work because multiple writes to the same address, ending with // a valid write, aren't a problem. always @(posedge i_clk) if (o_wb_cyc) // &&(i_wb_ack) cache[rdaddr] <= i_wb_data; // VMask ... is a section loaded? initial vmask = 0; always @(posedge i_clk) if ((i_rst)||(i_clear_cache)) vmask <= 0; else begin if ((o_wb_cyc)&&(i_wb_ack)&&(last_ack)) vmask[rdaddr[(CW-1):PW]] <= 1'b1; if ((~o_wb_cyc)&&(needload)) vmask[lastpc[(CW-1):PW]] <= 1'b0; end initial illegal_cache = 0; initial illegal_valid = 0; always @(posedge i_clk) if ((i_rst)||(i_clear_cache)) begin illegal_cache <= 0; illegal_valid <= 0; end else if ((o_wb_cyc)&&(i_wb_err)) begin illegal_cache <= o_wb_addr[(AW-1):PW]; illegal_valid <= 1'b1; end initial o_illegal = 1'b0; always @(posedge i_clk) if ((i_rst)||(i_clear_cache)) o_illegal <= 1'b0; else o_illegal <= (illegal_valid) &&(illegal_cache == i_pc[(AW-1):PW]); endmodule
Go to most recent revision | Compare with Previous | Blame | View Log