OpenCores
URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /neorv32/trunk/rtl/core
    from Rev 44 to Rev 45
    Reverse comparison

Rev 44 → Rev 45

/neorv32_cache.vhd File deleted
/neorv32_cpu.vhd
166,8 → 166,8
 
-- Instruction prefetch buffer size --
assert not (is_power_of_two_f(ipb_entries_c) = false) report "NEORV32 CPU CONFIG ERROR! Number of entries in instruction prefetch buffer <ipb_entries_c> has to be a power of two." severity error;
-- A extension - only lr.w and sc.w supported yet --
assert not (CPU_EXTENSION_RISCV_A = true) report "NEORV32 CPU CONFIG WARNING! Atomic operations extension (A) only supports >lr.w< and >sc.w< instructions yet." severity warning;
-- A extension - only lr.w and sc.w are supported yet --
assert not (CPU_EXTENSION_RISCV_A = true) report "NEORV32 CPU CONFIG WARNING! Atomic operations extension (A) only supports <lr.w> and <sc.w> instructions." severity warning;
 
-- Bit manipulation notifier --
assert not (CPU_EXTENSION_RISCV_B = true) report "NEORV32 CPU CONFIG WARNING! Bit manipulation extension (B) only supports 'base' instruction sub-set (Zbb) yet and is still 'unofficial' (not-ratified)." severity warning;
253,7 → 253,7
 
-- Register File --------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
neorv32_regfile_inst: neorv32_cpu_regfile
neorv32_cpu_regfile_inst: neorv32_cpu_regfile
generic map (
CPU_EXTENSION_RISCV_E => CPU_EXTENSION_RISCV_E -- implement embedded RF extension?
)
/neorv32_cpu_control.vhd
669,8 → 669,8
-- PC update --
if (execute_engine.pc_we = '1') then
case execute_engine.pc_mux_sel is
when "00" => execute_engine.pc <= execute_engine.next_pc(data_width_c-1 downto 1) & '0'; -- normal (linear) increment
when "01" => execute_engine.pc <= alu_add_i(data_width_c-1 downto 1) & '0'; -- jump/taken_branch
when "00" => execute_engine.pc <= alu_add_i(data_width_c-1 downto 1) & '0'; -- jump/taken_branch
when "01" => execute_engine.pc <= execute_engine.next_pc(data_width_c-1 downto 1) & '0'; -- normal (linear) increment
when "10" => execute_engine.pc <= csr.mtvec(data_width_c-1 downto 1) & '0'; -- trap enter
when others => execute_engine.pc <= csr.mepc(data_width_c-1 downto 1) & '0'; -- trap exit
end case;
813,7 → 813,7
execute_engine.sleep_nxt <= execute_engine.sleep;
execute_engine.if_rst_nxt <= execute_engine.if_rst;
--
execute_engine.pc_mux_sel <= (others => '0');
execute_engine.pc_mux_sel <= (others => '0'); -- select "slowest path" as default
execute_engine.pc_we <= '0';
 
-- instruction dispatch --
874,10 → 874,10
 
when DISPATCH => -- Get new command from instruction issue engine
-- ------------------------------------------------------------
execute_engine.pc_mux_sel <= "00"; -- linear next PC
-- IR update --
execute_engine.is_ci_nxt <= cmd_issue.data(32); -- flag to indicate a de-compressed instruction beeing executed
execute_engine.i_reg_nxt <= cmd_issue.data(31 downto 0);
execute_engine.pc_mux_sel <= "01"; -- linear next PC
execute_engine.is_ci_nxt <= cmd_issue.data(32); -- flag to indicate a de-compressed instruction beeing executed
execute_engine.i_reg_nxt <= cmd_issue.data(31 downto 0);
--
if (cmd_issue.valid = '1') then -- instruction available?
-- IR update - exceptions --
1032,11 → 1032,15
 
when opcode_syscsr_c => -- system/csr access
-- ------------------------------------------------------------
csr.re_nxt <= csr_acc_valid; -- always read CSR if valid access, only relevant for CSR-instructions
if (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_env_c) then -- system/environment
execute_engine.state_nxt <= SYS_ENV;
else -- CSR access
execute_engine.state_nxt <= CSR_ACCESS;
if (CPU_EXTENSION_RISCV_Zicsr = true) then
csr.re_nxt <= csr_acc_valid; -- always read CSR if valid access, only relevant for CSR-instructions
if (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_env_c) then -- system/environment
execute_engine.state_nxt <= SYS_ENV;
else -- CSR access
execute_engine.state_nxt <= CSR_ACCESS;
end if;
else
execute_engine.state_nxt <= SYS_WAIT;
end if;
 
when others => -- undefined
1051,16 → 1055,16
execute_engine.pc_mux_sel <= "11"; -- csr.mepc (only relevant for MRET)
case execute_engine.i_reg(instr_funct12_msb_c downto instr_funct12_lsb_c) is
when funct12_ecall_c => -- ECALL
trap_ctrl.env_call <= '1';
trap_ctrl.env_call <= '1';
when funct12_ebreak_c => -- EBREAK
trap_ctrl.break_point <= '1';
trap_ctrl.break_point <= '1';
when funct12_mret_c => -- MRET
trap_ctrl.env_end <= '1';
execute_engine.pc_we <= '1'; -- update PC from MEPC
fetch_engine.reset <= '1';
trap_ctrl.env_end <= '1';
execute_engine.pc_we <= '1'; -- update PC from MEPC
fetch_engine.reset <= '1';
execute_engine.if_rst_nxt <= '1'; -- this is a non-linear PC modification
when funct12_wfi_c => -- WFI
execute_engine.sleep_nxt <= '1'; -- good night
execute_engine.sleep_nxt <= '1'; -- good night
when others => -- undefined
NULL;
end case;
1079,7 → 1083,7
csr.we_nxt <= '0';
end case;
-- register file write back --
ctrl_nxt(ctrl_rf_in_mux_msb_c downto ctrl_rf_in_mux_lsb_c) <= "11"; -- RF input = CSR output
ctrl_nxt(ctrl_rf_in_mux_msb_c downto ctrl_rf_in_mux_lsb_c) <= "11"; -- RF input <= CSR output
ctrl_nxt(ctrl_rf_wb_en_c) <= '1'; -- valid RF write-back
execute_engine.state_nxt <= DISPATCH;
 
1109,7 → 1113,7
ctrl_nxt(ctrl_rf_in_mux_msb_c) <= '0'; -- RF input = ALU result
ctrl_nxt(ctrl_rf_wb_en_c) <= execute_engine.i_reg(instr_opcode_lsb_c+2); -- valid RF write-back? (is jump-and-link?)
-- destination address --
execute_engine.pc_mux_sel <= "01"; -- alu.add = branch/jump destination
execute_engine.pc_mux_sel <= "00"; -- alu.add = branch/jump destination
if (execute_engine.i_reg(instr_opcode_lsb_c+2) = '1') or (execute_engine.branch_taken = '1') then -- JAL/JALR or taken branch
execute_engine.pc_we <= '1'; -- update PC
fetch_engine.reset <= '1'; -- trigger new instruction fetch from modified PC
1123,9 → 1127,9
when FENCE_OP => -- fence operations - execution
-- ------------------------------------------------------------
execute_engine.state_nxt <= SYS_WAIT;
execute_engine.pc_mux_sel <= "00"; -- linear next PC = "refetch" next instruction (only relevant for fence.i)
execute_engine.pc_mux_sel <= "01"; -- linear next PC = "refetch" next instruction (only relevant for fence.i)
-- FENCE.I --
if (execute_engine.i_reg(instr_funct3_lsb_c) = funct3_fencei_c(0)) and (CPU_EXTENSION_RISCV_Zifencei = true) then
if (CPU_EXTENSION_RISCV_Zifencei = true) and (execute_engine.i_reg(instr_funct3_lsb_c) = funct3_fencei_c(0)) then
execute_engine.pc_we <= '1';
execute_engine.if_rst_nxt <= '1'; -- this is a non-linear PC modification
fetch_engine.reset <= '1';
2076,7 → 2080,7
csr.minstreth <= std_ulogic_vector(unsigned(csr.minstreth) + 1);
end if;
 
-- [machine] high performance counters --
-- [machine] hardware performance monitors (counters) --
for i in 0 to HPM_NUM_CNTS-1 loop
-- [m]hpmcounter* --
if (csr.we = '1') and (csr.addr = std_ulogic_vector(unsigned(csr_mhpmcounter3_c) + i)) then -- write access
2137,6 → 2141,7
cnt_event_nxt(hpmcnt_event_cir_c) <= '1' when (execute_engine.state = EXECUTE) and (execute_engine.is_ci = '1') else '0'; -- retired compressed instruction
cnt_event_nxt(hpmcnt_event_wait_if_c) <= '1' when (fetch_engine.state = IFETCH_ISSUE) and (fetch_engine.state_prev = IFETCH_ISSUE) else '0'; -- instruction fetch memory wait cycle
cnt_event_nxt(hpmcnt_event_wait_ii_c) <= '1' when (execute_engine.state = DISPATCH) and (execute_engine.state_prev = DISPATCH) else '0'; -- instruction issue wait cycle
cnt_event_nxt(hpmcnt_event_wait_mc_c) <= '1' when (execute_engine.state = ALU_WAIT) and (execute_engine.state_prev = ALU_WAIT) else '0'; -- multi-cycle alu-operation wait cycle
 
cnt_event_nxt(hpmcnt_event_load_c) <= '1' when (execute_engine.state = LOADSTORE_1) and (ctrl(ctrl_bus_rd_c) = '1') else '0'; -- load operation
cnt_event_nxt(hpmcnt_event_store_c) <= '1' when (execute_engine.state = LOADSTORE_1) and (ctrl(ctrl_bus_wr_c) = '1') else '0'; -- store operation
/neorv32_cpu_cp_bitmanip.vhd
1,8 → 1,9
-- #################################################################################################
-- # << NEORV32 - CPU Co-Processor: Bit manipulation unit (RISC-V "B" Extension) >> #
-- # ********************************************************************************************* #
-- # The bit manipulation unit is implemted as co-processor that has a processing latency of at #
-- # least 3 cycles. Only the "base" bit manipulation subset ('Zbb') is supported yet. #
-- # The bit manipulation unit is implemted as co-processor that has a processing latency of 1 #
-- # cycle for logic/arithmetic operations and 3+shamt (=shift amount) cycles for shift(-related) #
-- # operations. #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License #
-- # #
81,12 → 82,15
constant op_width_c : natural := 15;
 
-- controller --
type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT, S_BUSY_LOGIC);
type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT);
signal ctrl_state : ctrl_state_t;
signal cmd, cmd_buf : std_ulogic_vector(op_width_c-1 downto 0);
signal valid : std_ulogic;
 
-- operand buffers --
signal rs1_reg, rs2_reg : std_ulogic_vector(data_width_c-1 downto 0);
signal rs1_reg : std_ulogic_vector(data_width_c-1 downto 0);
signal rs2_reg : std_ulogic_vector(data_width_c-1 downto 0);
signal less_ff : std_ulogic;
 
-- shift amount (immediate or register) --
signal shamt : std_ulogic_vector(index_size_f(data_width_c)-1 downto 0);
146,12 → 150,13
cmd_buf <= (others => '0');
rs1_reg <= (others => '0');
rs2_reg <= (others => '0');
less_ff <= '0';
shifter.start <= '0';
valid_o <= '0';
valid <= '0';
elsif rising_edge(clk_i) then
-- defaults --
shifter.start <= '0';
valid_o <= '0';
valid <= '0';
 
-- fsm --
case ctrl_state is
159,6 → 164,7
when S_IDLE => -- wait for operation trigger
-- ------------------------------------------------------------
if (start_i = '1') then
less_ff <= cmp_i(alu_cmp_less_c);
cmd_buf <= cmd;
rs1_reg <= rs1_i;
rs2_reg <= rs2_i;
166,7 → 172,8
shifter.start <= '1';
ctrl_state <= S_START_SHIFT;
else
ctrl_state <= S_BUSY_LOGIC;
valid <= '1';
ctrl_state <= S_IDLE;
end if;
end if;
 
177,14 → 184,10
when S_BUSY_SHIFT => -- wait for multi-cycle shift operation to finish
-- ------------------------------------------------------------
if (shifter.run = '0') then
ctrl_state <= S_BUSY_LOGIC;
valid <= '1';
ctrl_state <= S_IDLE;
end if;
 
when S_BUSY_LOGIC => -- single-cycle logic operation (and output)
-- ------------------------------------------------------------
valid_o <= '1';
ctrl_state <= S_IDLE;
 
when others => -- undefined
-- ------------------------------------------------------------
ctrl_state <= S_IDLE;
262,8 → 265,8
res_int(op_cpop_c)(shifter.bcnt'left downto 0) <= shifter.bcnt;
 
-- min/max select --
res_int(op_min_c) <= rs1_reg when (cmp_i(alu_cmp_less_c) = '1') else rs2_reg;
res_int(op_max_c) <= rs2_reg when (cmp_i(alu_cmp_less_c) = '1') else rs1_reg;
res_int(op_min_c) <= rs1_reg when (less_ff = '1') else rs2_reg;
res_int(op_max_c) <= rs2_reg when (less_ff = '1') else rs1_reg;
 
-- sign-extension --
res_int(op_sextb_c)(data_width_c-1 downto 8) <= (others => rs1_reg(7));
314,22 → 317,24
 
-- Output Gate ----------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
output_gate: process(clk_i)
output_gate: process(valid, res_out)
begin
if rising_edge(clk_i) then
if (valid = '1') then
res_o <= res_out(op_clz_c) or res_out(op_ctz_c) or res_out(op_cpop_c) or
res_out(op_min_c) or res_out(op_max_c) or
res_out(op_sextb_c) or res_out(op_sexth_c) or
res_out(op_andn_c) or res_out(op_orn_c) or res_out(op_xnor_c) or
res_out(op_pack_c) or
res_out(op_ror_c) or res_out(op_rol_c) or
res_out(op_rev8_c) or
res_out(op_orcb_c);
else
res_o <= (others => '0');
if (ctrl_state = S_BUSY_LOGIC) then
res_o <= res_out(op_clz_c) or res_out(op_ctz_c) or res_out(op_cpop_c) or
res_out(op_min_c) or res_out(op_max_c) or
res_out(op_sextb_c) or res_out(op_sexth_c) or
res_out(op_andn_c) or res_out(op_orn_c) or res_out(op_xnor_c) or
res_out(op_pack_c) or
res_out(op_ror_c) or res_out(op_rol_c) or
res_out(op_rev8_c) or
res_out(op_orcb_c);
end if;
end if;
end process output_gate;
 
-- valid output --
valid_o <= valid;
 
 
end neorv32_cpu_cp_bitmanip_rtl;
/neorv32_cpu_cp_muldiv.vhd
1,5 → 1,5
-- #################################################################################################
-- # << NEORV32 - CPU Co-Processor: MULDIV unit (RISC-V "M" Extension)>> #
-- # << NEORV32 - CPU Co-Processor: Integer Multiplier/Divider Unit (RISC-V "M" Extension)>> #
-- # ********************************************************************************************* #
-- # Multiplier and Divider unit. Implements the RISC-V RV32-M CPU extension. #
-- # Multiplier core (signed/unsigned) uses serial algorithm. -> 32+4 cycles latency #
86,7 → 86,7
signal cp_op_ff : std_ulogic_vector(2 downto 0); -- operation that was executed
signal start : std_ulogic;
signal operation : std_ulogic;
signal opx, opy : std_ulogic_vector(data_width_c-1 downto 0); -- input operands
signal rs1, opx, opy : std_ulogic_vector(data_width_c-1 downto 0); -- input operands
signal opx_is_signed : std_ulogic;
signal opy_is_signed : std_ulogic;
signal opy_is_zero : std_ulogic;
121,6 → 121,7
state <= IDLE;
opx <= (others => '0');
opy <= (others => '0');
rs1 <= (others => '0');
cnt <= (others => '0');
start <= '0';
valid <= '0';
136,9 → 137,10
-- FSM --
case state is
when IDLE =>
opx <= rs1_i;
opy <= rs2_i;
if (start_i = '1') then
opx <= rs1_i;
rs1 <= rs1_i;
opy <= rs2_i;
state <= DECODE;
end if;
 
300,7 → 302,7
 
-- Data Output ----------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
operation_result: process(valid, cp_op_ff, mul_product, div_res, quotient, opy_is_zero, rs1_i, remainder)
operation_result: process(valid, cp_op_ff, mul_product, div_res, quotient, opy_is_zero, rs1, remainder)
begin
if (valid = '1') then
case cp_op_ff is
316,7 → 318,7
if (opy_is_zero = '0') then
res_o <= div_res;
else
res_o <= rs1_i;
res_o <= rs1;
end if;
when others => -- cp_op_remu_c
res_o <= remainder;
/neorv32_cpu_regfile.vhd
1,14 → 1,19
-- #################################################################################################
-- # << NEORV32 - CPU Data Register File >> #
-- # << NEORV32 - CPU General Purpose Data Register File >> #
-- # ********************************************************************************************* #
-- # General purpose data register file. 32 entries for normal mode (I), 16 entries for embedded #
-- # mode (E) when RISC-V "E" extension is enabled. Register zero (r0) is a "normal" physical reg #
-- # that has to be initialized to zero by the CPU control system. For normal operations r0 cannot #
-- # be written. The register file uses synchronous reads so it can be mapped to FPGA block RAM. #
-- # General purpose data register file. 32 entries (= 1024 bit) for normal mode (RV32I), #
-- # 16 entries (= 512 bit) for embedded mode (RV32E) when RISC-V "E" extension is enabled. #
-- # #
-- # Register zero (r0/x0) is a "normal" physical reg that has to be initialized to zero by the #
-- # CPU control system. For normal operations register zero cannot be written. #
-- # #
-- # The register file uses synchronous read accesses and a *single* (multiplexed) address port #
-- # for writing and reading rs1 and a single read-only port for rs2. Therefore, the whole #
-- # register file can be mapped to a single true dual-port block RAM. #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License #
-- # #
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. #
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved. #
-- # #
-- # Redistribution and use in source and binary forms, with or without modification, are #
-- # permitted provided that the following conditions are met: #
74,46 → 79,51
signal rd_is_r0 : std_ulogic; -- writing to r0?
signal rf_we : std_ulogic;
signal dst_addr : std_ulogic_vector(4 downto 0); -- destination address
signal opa_addr : std_ulogic_vector(4 downto 0); -- rs1/dst address
signal opb_addr : std_ulogic_vector(4 downto 0); -- rs2 address
 
begin
 
-- Register file read/write access --------------------------------------------------------
-- Data Input Mux -------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
rf_mux_data <= mem_i when (ctrl_i(ctrl_rf_in_mux_lsb_c) = '0') else csr_i;
rf_write_data <= alu_i when (ctrl_i(ctrl_rf_in_mux_msb_c) = '0') else rf_mux_data;
 
 
-- Register File Access -------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
rf_access: process(clk_i)
begin
if rising_edge(clk_i) then -- sync read and write
if (CPU_EXTENSION_RISCV_E = false) then -- normal register file with 32 entries
if (rf_we = '1') then
reg_file(to_integer(unsigned(dst_addr(4 downto 0)))) <= rf_write_data;
else
rs1_o <= reg_file(to_integer(unsigned(ctrl_i(ctrl_rf_rs1_adr4_c downto ctrl_rf_rs1_adr0_c))));
rs2_o <= reg_file(to_integer(unsigned(ctrl_i(ctrl_rf_rs2_adr4_c downto ctrl_rf_rs2_adr0_c))));
reg_file(to_integer(unsigned(opa_addr(4 downto 0)))) <= rf_write_data;
end if;
rs1_o <= reg_file(to_integer(unsigned(opa_addr(4 downto 0))));
rs2_o <= reg_file(to_integer(unsigned(opb_addr(4 downto 0))));
else -- embedded register file with 16 entries
if (rf_we = '1') then
reg_file_emb(to_integer(unsigned(dst_addr(3 downto 0)))) <= rf_write_data;
else
rs1_o <= reg_file_emb(to_integer(unsigned(ctrl_i(ctrl_rf_rs1_adr3_c downto ctrl_rf_rs1_adr0_c))));
rs2_o <= reg_file_emb(to_integer(unsigned(ctrl_i(ctrl_rf_rs2_adr3_c downto ctrl_rf_rs2_adr0_c))));
reg_file_emb(to_integer(unsigned(opa_addr(3 downto 0)))) <= rf_write_data;
end if;
rs1_o <= reg_file_emb(to_integer(unsigned(opa_addr(3 downto 0))));
rs2_o <= reg_file_emb(to_integer(unsigned(opb_addr(3 downto 0))));
end if;
end if;
end process rf_access;
 
-- data input mux --
rf_write_data <= alu_i when (ctrl_i(ctrl_rf_in_mux_msb_c) = '0') else rf_mux_data;
rf_mux_data <= mem_i when (ctrl_i(ctrl_rf_in_mux_lsb_c) = '0') else csr_i;
 
-- check if we are writing to x0 --
rd_is_r0 <= not or_all_f(ctrl_i(ctrl_rf_rd_adr4_c downto ctrl_rf_rd_adr0_c)) when (CPU_EXTENSION_RISCV_E = false) else
not or_all_f(ctrl_i(ctrl_rf_rd_adr3_c downto ctrl_rf_rd_adr0_c));
 
-- valid RF write access --
-- valid RF write access? --
rf_we <= (ctrl_i(ctrl_rf_wb_en_c) and (not rd_is_r0)) or ctrl_i(ctrl_rf_r0_we_c);
 
-- destination address --
dst_addr <= ctrl_i(ctrl_rf_rd_adr4_c downto ctrl_rf_rd_adr0_c) when (ctrl_i(ctrl_rf_r0_we_c) = '0') else (others => '0'); -- force dst=r0?
 
-- access addresses --
opa_addr <= dst_addr when (rf_we = '1') else ctrl_i(ctrl_rf_rs1_adr4_c downto ctrl_rf_rs1_adr0_c); -- rd/rs1
opb_addr <= ctrl_i(ctrl_rf_rs2_adr4_c downto ctrl_rf_rs2_adr0_c); -- rs2
 
 
end neorv32_cpu_regfile_rtl;
/neorv32_icache.vhd
0,0 → 1,614
-- #################################################################################################
-- # << NEORV32 - Processor-Internal Instruction Cache >> #
-- # ********************************************************************************************* #
-- # Direct mapped (CACHE_NUM_SETS = 1) or 2-way set-associative (CACHE_NUM_SETS = 2). #
-- # Least recently used replacement policy (if CACHE_NUM_SETS > 1). #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License #
-- # #
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. #
-- # #
-- # Redistribution and use in source and binary forms, with or without modification, are #
-- # permitted provided that the following conditions are met: #
-- # #
-- # 1. Redistributions of source code must retain the above copyright notice, this list of #
-- # conditions and the following disclaimer. #
-- # #
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of #
-- # conditions and the following disclaimer in the documentation and/or other materials #
-- # provided with the distribution. #
-- # #
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to #
-- # endorse or promote products derived from this software without specific prior written #
-- # permission. #
-- # #
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS #
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF #
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE #
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED #
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING #
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED #
-- # OF THE POSSIBILITY OF SUCH DAMAGE. #
-- # ********************************************************************************************* #
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting #
-- #################################################################################################
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
 
library neorv32;
use neorv32.neorv32_package.all;
 
entity neorv32_icache is
generic (
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2
CACHE_NUM_SETS : natural := 1 -- associativity / number of sets (1=direct_mapped), has to be a power of 2
);
port (
-- global control --
clk_i : in std_ulogic; -- global clock, rising edge
rstn_i : in std_ulogic; -- global reset, low-active, async
clear_i : in std_ulogic; -- cache clear
-- host controller interface --
host_addr_i : in std_ulogic_vector(data_width_c-1 downto 0); -- bus access address
host_rdata_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus read data
host_wdata_i : in std_ulogic_vector(data_width_c-1 downto 0); -- bus write data
host_ben_i : in std_ulogic_vector(03 downto 0); -- byte enable
host_we_i : in std_ulogic; -- write enable
host_re_i : in std_ulogic; -- read enable
host_cancel_i : in std_ulogic; -- cancel current bus transaction
host_lock_i : in std_ulogic; -- locked/exclusive access
host_ack_o : out std_ulogic; -- bus transfer acknowledge
host_err_o : out std_ulogic; -- bus transfer error
-- peripheral bus interface --
bus_addr_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus access address
bus_rdata_i : in std_ulogic_vector(data_width_c-1 downto 0); -- bus read data
bus_wdata_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus write data
bus_ben_o : out std_ulogic_vector(03 downto 0); -- byte enable
bus_we_o : out std_ulogic; -- write enable
bus_re_o : out std_ulogic; -- read enable
bus_cancel_o : out std_ulogic; -- cancel current bus transaction
bus_lock_o : out std_ulogic; -- locked/exclusive access
bus_ack_i : in std_ulogic; -- bus transfer acknowledge
bus_err_i : in std_ulogic -- bus transfer error
);
end neorv32_icache;
 
architecture neorv32_icache_rtl of neorv32_icache is
 
-- cache layout --
constant cache_offset_size_c : natural := index_size_f(CACHE_BLOCK_SIZE/4); -- offset addresses full 32-bit words
constant cache_index_size_c : natural := index_size_f(CACHE_NUM_BLOCKS);
constant cache_tag_size_c : natural := 32 - (cache_offset_size_c + cache_index_size_c + 2); -- 2 additonal bits for byte offset
 
-- cache memory --
component neorv32_icache_memory
generic (
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2
CACHE_NUM_SETS : natural := 1 -- associativity; 0=direct-mapped, 1=2-way set-associative
);
port (
-- global control --
clk_i : in std_ulogic; -- global clock, rising edge
invalidate_i : in std_ulogic; -- invalidate whole cache
-- host cache access (read-only) --
host_addr_i : in std_ulogic_vector(31 downto 0); -- access address
host_re_i : in std_ulogic; -- read enable
host_rdata_o : out std_ulogic_vector(31 downto 0); -- read data
-- access status (1 cycle delay to access) --
hit_o : out std_ulogic; -- hit access
-- ctrl cache access (write-only) --
ctrl_en_i : in std_ulogic; -- control interface enable
ctrl_addr_i : in std_ulogic_vector(31 downto 0); -- access address
ctrl_we_i : in std_ulogic; -- write enable (full-word)
ctrl_wdata_i : in std_ulogic_vector(31 downto 0); -- write data
ctrl_tag_we_i : in std_ulogic; -- write tag to selected block
ctrl_valid_i : in std_ulogic; -- make selected block valid
ctrl_invalid_i : in std_ulogic -- make selected block invalid
);
end component;
 
-- cache interface --
type cache_if_t is record
clear : std_ulogic; -- cache clear
--
host_addr : std_ulogic_vector(31 downto 0); -- cpu access address
host_rdata : std_ulogic_vector(31 downto 0); -- cpu read data
--
hit : std_ulogic; -- hit access
--
ctrl_en : std_ulogic; -- control access enable
ctrl_addr : std_ulogic_vector(31 downto 0); -- control access address
ctrl_we : std_ulogic; -- control write enable
ctrl_wdata : std_ulogic_vector(31 downto 0); -- control write data
ctrl_tag_we : std_ulogic; -- control tag write enabled
ctrl_valid_we : std_ulogic; -- control valid flag set
ctrl_invalid_we : std_ulogic; -- control valid flag clear
end record;
signal cache : cache_if_t;
 
-- control engine --
type ctrl_engine_state_t is (S_IDLE, S_CACHE_CLEAR, S_CACHE_CHECK, S_CACHE_MISS, S_BUS_DOWNLOAD_REQ, S_BUS_DOWNLOAD_GET,
S_CACHE_RESYNC_0, S_CACHE_RESYNC_1, S_BUS_ERROR, S_ERROR, S_HOST_CANCEL);
type ctrl_t is record
state : ctrl_engine_state_t; -- current state
state_nxt : ctrl_engine_state_t; -- next state
addr_reg : std_ulogic_vector(31 downto 0); -- address register for block download
addr_reg_nxt : std_ulogic_vector(31 downto 0);
--
re_buf : std_ulogic; -- read request buffer
re_buf_nxt : std_ulogic;
cancel_buf : std_ulogic; -- cancel request buffer
cancel_buf_nxt : std_ulogic;
end record;
signal ctrl : ctrl_t;
 
begin
 
-- Sanity Checks --------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
-- configuration --
assert not (is_power_of_two_f(CACHE_NUM_BLOCKS) = false) report "NEORV32 PROCESSOR CONFIG ERROR! Cache number of blocks <NUM_BLOCKS> has to be a power of 2." severity error;
assert not (is_power_of_two_f(CACHE_BLOCK_SIZE) = false) report "NEORV32 PROCESSOR CONFIG ERROR! Cache block size <BLOCK_SIZE> has to be a power of 2." severity error;
assert not ((is_power_of_two_f(CACHE_NUM_SETS) = false)) report "NEORV32 PROCESSOR CONFIG ERROR! Cache associativity <CACHE_NUM_SETS> has to be a power of 2." severity error;
assert not (CACHE_NUM_BLOCKS < 1) report "NEORV32 PROCESSOR CONFIG ERROR! Cache number of blocks <NUM_BLOCKS> has to be >= 1." severity error;
assert not (CACHE_BLOCK_SIZE < 4) report "NEORV32 PROCESSOR CONFIG ERROR! Cache block size <BLOCK_SIZE> has to be >= 4." severity error;
assert not ((CACHE_NUM_SETS = 0) or (CACHE_NUM_SETS > 2)) report "NEORV32 PROCESSOR CONFIG ERROR! Cache associativity <CACHE_NUM_SETS> has to be 1 (direct-mapped) or 2 (2-way set-associative)." severity error;
 
 
-- Control Engine FSM Sync ----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
-- registers that REQUIRE a specific reset state --
ctrl_engine_fsm_sync_rst: process(rstn_i, clk_i)
begin
if (rstn_i = '0') then
ctrl.state <= S_CACHE_CLEAR;
ctrl.re_buf <= '0';
ctrl.cancel_buf <= '0';
elsif rising_edge(clk_i) then
ctrl.state <= ctrl.state_nxt;
ctrl.re_buf <= ctrl.re_buf_nxt;
ctrl.cancel_buf <= ctrl.cancel_buf_nxt;
end if;
end process ctrl_engine_fsm_sync_rst;
 
-- registers that do not require a specific reset state --
ctrl_engine_fsm_sync: process(clk_i)
begin
if rising_edge(clk_i) then
ctrl.addr_reg <= ctrl.addr_reg_nxt;
end if;
end process ctrl_engine_fsm_sync;
 
 
-- Control Engine FSM Comb ----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
ctrl_engine_fsm_comb: process(ctrl, cache, clear_i, host_addr_i, host_lock_i, host_re_i, host_cancel_i, bus_rdata_i, bus_ack_i, bus_err_i)
begin
-- control defaults --
ctrl.state_nxt <= ctrl.state;
ctrl.addr_reg_nxt <= ctrl.addr_reg;
ctrl.re_buf_nxt <= (ctrl.re_buf or host_re_i) and (not host_cancel_i);
ctrl.cancel_buf_nxt <= ctrl.cancel_buf or host_cancel_i;
 
-- cache defaults --
cache.clear <= '0';
cache.host_addr <= host_addr_i;
cache.ctrl_en <= '0';
cache.ctrl_addr <= ctrl.addr_reg;
cache.ctrl_we <= '0';
cache.ctrl_wdata <= bus_rdata_i;
cache.ctrl_tag_we <= '0';
cache.ctrl_valid_we <= '0';
cache.ctrl_invalid_we <= '0';
 
-- host interface defaults --
host_ack_o <= '0';
host_err_o <= '0';
host_rdata_o <= cache.host_rdata;
 
-- peripheral bus interface defaults --
bus_addr_o <= ctrl.addr_reg;
bus_wdata_o <= (others => '0'); -- cache is read-only
bus_ben_o <= (others => '0'); -- cache is read-only
bus_we_o <= '0'; -- cache is read-only
bus_re_o <= '0';
bus_cancel_o <= '0';
bus_lock_o <= host_lock_i;
 
-- fsm --
case ctrl.state is
 
when S_IDLE => -- wait for host access request or cache control operation
-- ------------------------------------------------------------
if (clear_i = '1') then -- cache control operation?
ctrl.state_nxt <= S_CACHE_CLEAR;
elsif (host_re_i = '1') or (ctrl.re_buf = '1') then -- cache access
ctrl.re_buf_nxt <= '0';
ctrl.cancel_buf_nxt <= '0';
ctrl.state_nxt <= S_CACHE_CHECK;
end if;
 
when S_CACHE_CLEAR => -- invalidate all cache entries
-- ------------------------------------------------------------
cache.clear <= '1';
ctrl.state_nxt <= S_IDLE;
 
when S_CACHE_CHECK => -- finalize host access if cache hit
-- ------------------------------------------------------------
if (cache.hit = '1') then -- cache HIT
host_ack_o <= not ctrl.cancel_buf; -- ACK if request has not been canceled
ctrl.state_nxt <= S_IDLE;
else -- cache MISS
ctrl.state_nxt <= S_CACHE_MISS;
end if;
 
when S_CACHE_MISS => --
-- ------------------------------------------------------------
-- compute block base address --
ctrl.addr_reg_nxt <= host_addr_i;
ctrl.addr_reg_nxt((2+cache_offset_size_c)-1 downto 2) <= (others => '0'); -- block-aligned
ctrl.addr_reg_nxt(1 downto 0) <= "00"; -- word-aligned
--
if (host_cancel_i = '1') or (ctrl.cancel_buf = '1') then -- 'early' CPU cancel (abort before bus transaction has even started)
ctrl.state_nxt <= S_IDLE;
else
ctrl.state_nxt <= S_BUS_DOWNLOAD_REQ;
end if;
 
when S_BUS_DOWNLOAD_REQ => -- download new cache block: request new word
-- ------------------------------------------------------------
bus_re_o <= '1'; -- request new read transfer
ctrl.state_nxt <= S_BUS_DOWNLOAD_GET;
 
when S_BUS_DOWNLOAD_GET => -- download new cache block: wait for bus response
-- ------------------------------------------------------------
cache.ctrl_en <= '1'; -- we are in cache control mode
--
if (bus_err_i = '1') then -- bus error
ctrl.state_nxt <= S_BUS_ERROR;
elsif (ctrl.cancel_buf = '1') then -- 'late' CPU cancel (timeout?)
ctrl.state_nxt <= S_HOST_CANCEL;
elsif (bus_ack_i = '1') then -- ACK = write to cache and get next word
cache.ctrl_we <= '1'; -- write to cache
if (and_all_f(ctrl.addr_reg((2+cache_offset_size_c)-1 downto 2)) = '1') then -- block complete?
cache.ctrl_tag_we <= '1'; -- current block is valid now
cache.ctrl_valid_we <= '1'; -- write tag of current address
ctrl.state_nxt <= S_CACHE_RESYNC_0;
else -- get next word
ctrl.addr_reg_nxt <= std_ulogic_vector(unsigned(ctrl.addr_reg) + 4);
ctrl.state_nxt <= S_BUS_DOWNLOAD_REQ;
end if;
end if;
 
when S_CACHE_RESYNC_0 => -- re-sync host/cache access: cache read-latency
-- ------------------------------------------------------------
ctrl.state_nxt <= S_CACHE_RESYNC_1;
 
when S_CACHE_RESYNC_1 => -- re-sync host/cache access: finalize CPU request
-- ------------------------------------------------------------
host_ack_o <= not ctrl.cancel_buf; -- ACK if request has not been canceled
ctrl.state_nxt <= S_IDLE;
 
when S_BUS_ERROR => -- bus error during download
-- ------------------------------------------------------------
host_err_o <= '1';
ctrl.state_nxt <= S_ERROR;
 
when S_ERROR => -- wait for CPU to cancel faulting transfer
-- ------------------------------------------------------------
if (host_cancel_i = '1') then
bus_cancel_o <= '1';
ctrl.state_nxt <= S_IDLE;
end if;
 
when S_HOST_CANCEL => -- host cancels transfer
-- ------------------------------------------------------------
cache.ctrl_en <= '1'; -- we are in cache control mode
cache.ctrl_invalid_we <= '1'; -- invalidate current cache block
bus_cancel_o <= '1';
ctrl.state_nxt <= S_IDLE;
 
when others => -- undefined
-- ------------------------------------------------------------
ctrl.state_nxt <= S_IDLE;
 
end case;
end process ctrl_engine_fsm_comb;
 
 
-- Cache Memory ---------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
neorv32_icache_memory_inst: neorv32_icache_memory
generic map (
CACHE_NUM_BLOCKS => CACHE_NUM_BLOCKS, -- number of blocks (min 1), has to be a power of 2
CACHE_BLOCK_SIZE => CACHE_BLOCK_SIZE, -- block size in bytes (min 4), has to be a power of 2
CACHE_NUM_SETS => CACHE_NUM_SETS -- associativity; 0=direct-mapped, 1=2-way set-associative
)
port map (
-- global control --
clk_i => clk_i, -- global clock, rising edge
invalidate_i => cache.clear, -- invalidate whole cache
-- host cache access (read-only) --
host_addr_i => cache.host_addr, -- access address
host_re_i => host_re_i, -- read enable
host_rdata_o => cache.host_rdata, -- read data
-- access status (1 cycle delay to access) --
hit_o => cache.hit, -- hit access
-- ctrl cache access (write-only) --
ctrl_en_i => cache.ctrl_en, -- control interface enable
ctrl_addr_i => cache.ctrl_addr, -- access address
ctrl_we_i => cache.ctrl_we, -- write enable (full-word)
ctrl_wdata_i => cache.ctrl_wdata, -- write data
ctrl_tag_we_i => cache.ctrl_tag_we, -- write tag to selected block
ctrl_valid_i => cache.ctrl_valid_we, -- make selected block valid
ctrl_invalid_i => cache.ctrl_invalid_we -- make selected block invalid
);
 
end neorv32_icache_rtl;
 
 
-- ###########################################################################################################################################
-- ###########################################################################################################################################
 
 
-- #################################################################################################
-- # << NEORV32 - Cache Memory >> #
-- # ********************************************************************************************* #
-- # Direct mapped (CACHE_NUM_SETS = 1) or 2-way set-associative (CACHE_NUM_SETS = 2). #
-- # Least recently used replacement policy (if CACHE_NUM_SETS > 1). #
-- # Read-only for host, write-only for control. All output signals have one cycle latency. #
-- # #
-- # Cache sets are mapped to individual memory components - no multi-dimensional memory arrays #
-- # are used as some synthesis tools have problems to map these to actual BRAM primitives. #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License #
-- # #
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. #
-- # #
-- # Redistribution and use in source and binary forms, with or without modification, are #
-- # permitted provided that the following conditions are met: #
-- # #
-- # 1. Redistributions of source code must retain the above copyright notice, this list of #
-- # conditions and the following disclaimer. #
-- # #
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of #
-- # conditions and the following disclaimer in the documentation and/or other materials #
-- # provided with the distribution. #
-- # #
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to #
-- # endorse or promote products derived from this software without specific prior written #
-- # permission. #
-- # #
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS #
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF #
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE #
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, #
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED #
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING #
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED #
-- # OF THE POSSIBILITY OF SUCH DAMAGE. #
-- # ********************************************************************************************* #
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting #
-- #################################################################################################
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
 
library neorv32;
use neorv32.neorv32_package.all;
 
entity neorv32_icache_memory is
generic (
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2
CACHE_NUM_SETS : natural := 1 -- associativity; 1=direct-mapped, 2=2-way set-associative
);
port (
-- global control --
clk_i : in std_ulogic; -- global clock, rising edge
invalidate_i : in std_ulogic; -- invalidate whole cache
-- host cache access (read-only) --
host_addr_i : in std_ulogic_vector(31 downto 0); -- access address
host_re_i : in std_ulogic; -- read enable
host_rdata_o : out std_ulogic_vector(31 downto 0); -- read data
-- access status (1 cycle delay to access) --
hit_o : out std_ulogic; -- hit access
-- ctrl cache access (write-only) --
ctrl_en_i : in std_ulogic; -- control interface enable
ctrl_addr_i : in std_ulogic_vector(31 downto 0); -- access address
ctrl_we_i : in std_ulogic; -- write enable (full-word)
ctrl_wdata_i : in std_ulogic_vector(31 downto 0); -- write data
ctrl_tag_we_i : in std_ulogic; -- write tag to selected block
ctrl_valid_i : in std_ulogic; -- make selected block valid
ctrl_invalid_i : in std_ulogic -- make selected block invalid
);
end neorv32_icache_memory;
 
architecture neorv32_icache_memory_rtl of neorv32_icache_memory is
 
-- cache layout --
constant cache_offset_size_c : natural := index_size_f(CACHE_BLOCK_SIZE/4); -- offset addresses full 32-bit words
constant cache_index_size_c : natural := index_size_f(CACHE_NUM_BLOCKS);
constant cache_tag_size_c : natural := 32 - (cache_offset_size_c + cache_index_size_c + 2); -- 2 additonal bits for byte offset
constant cache_entries_c : natural := CACHE_NUM_BLOCKS * (CACHE_BLOCK_SIZE/4); -- number of 32-bit entries (per set)
 
-- status flag memory --
signal valid_flag_s0 : std_ulogic_vector(CACHE_NUM_BLOCKS-1 downto 0);
signal valid_flag_s1 : std_ulogic_vector(CACHE_NUM_BLOCKS-1 downto 0);
signal valid : std_ulogic_vector(1 downto 0); -- valid flag read data
 
-- tag memory --
type tag_mem_t is array (0 to CACHE_NUM_BLOCKS-1) of std_ulogic_vector(cache_tag_size_c-1 downto 0);
signal tag_mem_s0 : tag_mem_t;
signal tag_mem_s1 : tag_mem_t;
type tag_rd_t is array (0 to 1) of std_ulogic_vector(cache_tag_size_c-1 downto 0);
signal tag : tag_rd_t; -- tag read data
 
-- access status --
signal hit : std_ulogic_vector(1 downto 0);
 
-- access address decomposition --
type acc_addr_t is record
tag : std_ulogic_vector(cache_tag_size_c-1 downto 0);
index : std_ulogic_vector(cache_index_size_c-1 downto 0);
offset : std_ulogic_vector(cache_offset_size_c-1 downto 0);
end record;
signal host_acc_addr, ctrl_acc_addr : acc_addr_t;
 
-- cache data memory --
type cache_mem_t is array (0 to cache_entries_c-1) of std_ulogic_vector(31 downto 0);
signal cache_data_memory_s0 : cache_mem_t; -- set 0
signal cache_data_memory_s1 : cache_mem_t; -- set 1
 
-- cache data memory access --
type cache_rdata_t is array (0 to 1) of std_ulogic_vector(31 downto 0);
signal cache_rdata : cache_rdata_t;
signal cache_index : std_ulogic_vector(cache_index_size_c-1 downto 0);
signal cache_offset : std_ulogic_vector(cache_offset_size_c-1 downto 0);
signal cache_addr : std_ulogic_vector((cache_index_size_c+cache_offset_size_c)-1 downto 0); -- index & offset
signal cache_we : std_ulogic; -- write enable (full-word)
signal set_select : std_ulogic;
 
-- access history --
type history_t is record
re_ff : std_ulogic;
last_used_set : std_ulogic_vector(CACHE_NUM_BLOCKS-1 downto 0);
to_be_replaced : std_ulogic;
end record;
signal history : history_t;
 
begin
 
-- Access Address Decomposition -----------------------------------------------------------
-- -------------------------------------------------------------------------------------------
host_acc_addr.tag <= host_addr_i(31 downto 31-(cache_tag_size_c-1));
host_acc_addr.index <= host_addr_i(31-cache_tag_size_c downto 2+cache_offset_size_c);
host_acc_addr.offset <= host_addr_i(2+(cache_offset_size_c-1) downto 2); -- discard byte offset
 
ctrl_acc_addr.tag <= ctrl_addr_i(31 downto 31-(cache_tag_size_c-1));
ctrl_acc_addr.index <= ctrl_addr_i(31-cache_tag_size_c downto 2+cache_offset_size_c);
ctrl_acc_addr.offset <= ctrl_addr_i(2+(cache_offset_size_c-1) downto 2); -- discard byte offset
 
 
-- Cache Access History -------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
access_history: process(clk_i)
begin
if rising_edge(clk_i) then
history.re_ff <= host_re_i;
if (invalidate_i = '1') then -- invalidate whole cache
history.last_used_set <= (others => '1');
elsif (history.re_ff = '1') and (or_all_f(hit) = '1') then -- store last accessed set that caused a hit
history.last_used_set(to_integer(unsigned(cache_index))) <= not hit(0);
end if;
history.to_be_replaced <= history.last_used_set(to_integer(unsigned(cache_index)));
end if;
end process access_history;
 
-- which set is going to be replaced? -> opposite of last used set = least recently used set --
set_select <= '0' when (CACHE_NUM_SETS = 1) else (not history.to_be_replaced);
 
 
-- Status flag memory ---------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
status_memory: process(clk_i)
begin
if rising_edge(clk_i) then
-- write access --
if (invalidate_i = '1') then -- invalidate whole cache
valid_flag_s0 <= (others => '0');
valid_flag_s1 <= (others => '0');
elsif (ctrl_en_i = '1') then
if (ctrl_invalid_i = '1') then -- make current block invalid
if (set_select = '0') then
valid_flag_s0(to_integer(unsigned(cache_index))) <= '0';
else
valid_flag_s1(to_integer(unsigned(cache_index))) <= '0';
end if;
elsif (ctrl_valid_i = '1') then -- make current block valid
if (set_select = '0') then
valid_flag_s0(to_integer(unsigned(cache_index))) <= '1';
else
valid_flag_s1(to_integer(unsigned(cache_index))) <= '1';
end if;
end if;
end if;
-- read access (sync) --
valid(0) <= valid_flag_s0(to_integer(unsigned(cache_index)));
valid(1) <= valid_flag_s1(to_integer(unsigned(cache_index)));
end if;
end process status_memory;
 
 
-- Tag memory -----------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
tag_memory: process(clk_i)
begin
if rising_edge(clk_i) then
if (ctrl_en_i = '1') and (ctrl_tag_we_i = '1') then -- write access
if (set_select = '0') then
tag_mem_s0(to_integer(unsigned(cache_index))) <= ctrl_acc_addr.tag;
else
tag_mem_s1(to_integer(unsigned(cache_index))) <= ctrl_acc_addr.tag;
end if;
else -- read access
tag(0) <= tag_mem_s0(to_integer(unsigned(cache_index)));
tag(1) <= tag_mem_s1(to_integer(unsigned(cache_index)));
end if;
end if;
end process tag_memory;
 
-- comparator --
comparator: process(host_acc_addr, tag, valid)
begin
hit <= (others => '0');
for i in 0 to CACHE_NUM_SETS-1 loop
if (host_acc_addr.tag = tag(i)) and (valid(i) = '1') then
hit(i) <= '1';
end if;
end loop; -- i
end process comparator;
 
-- global hit --
hit_o <= or_all_f(hit);
 
 
-- Cache Data Memory ----------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
cache_mem_access: process(clk_i)
begin
if rising_edge(clk_i) then
if (cache_we = '1') then -- write access from control (full-word)
if (set_select = '0') then
cache_data_memory_s0(to_integer(unsigned(cache_addr))) <= ctrl_wdata_i;
else
cache_data_memory_s1(to_integer(unsigned(cache_addr))) <= ctrl_wdata_i;
end if;
else -- read access from host (full-word)
cache_rdata(0) <= cache_data_memory_s0(to_integer(unsigned(cache_addr)));
cache_rdata(1) <= cache_data_memory_s1(to_integer(unsigned(cache_addr)));
end if;
end if;
end process cache_mem_access;
 
-- data output --
host_rdata_o <= cache_rdata(0) when (hit(0) = '1') or (CACHE_NUM_SETS = 1) else cache_rdata(1);
 
-- cache block ram access address --
cache_addr <= cache_index & cache_offset;
 
-- cache access select --
cache_index <= host_acc_addr.index when (ctrl_en_i = '0') else ctrl_acc_addr.index;
cache_offset <= host_acc_addr.offset when (ctrl_en_i = '0') else ctrl_acc_addr.offset;
cache_we <= '0' when (ctrl_en_i = '0') else ctrl_we_i;
 
 
end neorv32_icache_memory_rtl;
/neorv32_package.vhd
55,7 → 55,7
-- Architecture Constants (do not modify!)= -----------------------------------------------
-- -------------------------------------------------------------------------------------------
constant data_width_c : natural := 32; -- data width - do not change!
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01050000"; -- no touchy!
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01050003"; -- no touchy!
constant pmp_max_r_c : natural := 8; -- max PMP regions - FIXED!
constant archid_c : natural := 19; -- official NEORV32 architecture ID - hands off!
constant rf_r0_is_reg_c : boolean := true; -- reg_file.r0 is a *physical register* that has to be initialized to zero by the CPU HW
708,16 → 708,17
constant hpmcnt_event_cir_c : natural := 3; -- Retired compressed instruction
constant hpmcnt_event_wait_if_c : natural := 4; -- Instruction fetch memory wait cycle
constant hpmcnt_event_wait_ii_c : natural := 5; -- Instruction issue wait cycle
constant hpmcnt_event_load_c : natural := 6; -- Load operation
constant hpmcnt_event_store_c : natural := 7; -- Store operation
constant hpmcnt_event_wait_ls_c : natural := 8; -- Load/store memory wait cycle
constant hpmcnt_event_jump_c : natural := 9; -- Unconditional jump
constant hpmcnt_event_branch_c : natural := 10; -- Conditional branch (taken or not taken)
constant hpmcnt_event_tbranch_c : natural := 11; -- Conditional taken branch
constant hpmcnt_event_trap_c : natural := 12; -- Entered trap
constant hpmcnt_event_illegal_c : natural := 13; -- Illegal instruction exception
constant hpmcnt_event_wait_mc_c : natural := 6; -- Multi-cycle ALU-operation wait cycle
constant hpmcnt_event_load_c : natural := 7; -- Load operation
constant hpmcnt_event_store_c : natural := 8; -- Store operation
constant hpmcnt_event_wait_ls_c : natural := 9; -- Load/store memory wait cycle
constant hpmcnt_event_jump_c : natural := 10; -- Unconditional jump
constant hpmcnt_event_branch_c : natural := 11; -- Conditional branch (taken or not taken)
constant hpmcnt_event_tbranch_c : natural := 12; -- Conditional taken branch
constant hpmcnt_event_trap_c : natural := 13; -- Entered trap
constant hpmcnt_event_illegal_c : natural := 14; -- Illegal instruction exception
--
constant hpmcnt_event_size_c : natural := 14; -- length of this list
constant hpmcnt_event_size_c : natural := 15; -- length of this list
 
-- Clock Generator ------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
767,6 → 768,7
ICACHE_EN : boolean := false; -- implement instruction cache
ICACHE_NUM_BLOCKS : natural := 4; -- i-cache: number of blocks (min 1), has to be a power of 2
ICACHE_BLOCK_SIZE : natural := 64; -- i-cache: block size in bytes (min 4), has to be a power of 2
ICACHE_ASSOCIATIVITY : natural := 1; -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2
-- External memory interface --
MEM_EXT_EN : boolean := false; -- implement external memory bus interface?
-- Processor peripherals --
1118,12 → 1120,13
);
end component;
 
-- Component: CPU Cache -------------------------------------------------------------------
-- Component: CPU Instruction Cache -------------------------------------------------------
-- -------------------------------------------------------------------------------------------
component neorv32_cache
component neorv32_icache
generic (
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2
CACHE_BLOCK_SIZE : natural := 16 -- block size in bytes (min 4), has to be a power of 2
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2
CACHE_NUM_SETS : natural := 1 -- associativity / number of sets (1=direct_mapped), has to be a power of 2
);
port (
-- global control --
/neorv32_sysinfo.vhd
145,15 → 145,15
sysinfo_mem(2)(31 downto 26) <= (others => '0'); -- reserved
 
-- SYSINFO(3): Cache configuration --
sysinfo_mem(3)(03 downto 00) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_BLOCK_SIZE), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(block_size_in_bytes)
sysinfo_mem(3)(07 downto 04) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_NUM_BLOCKS), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(number_of_block)
sysinfo_mem(3)(03 downto 00) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_BLOCK_SIZE), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(block_size_in_bytes)
sysinfo_mem(3)(07 downto 04) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_NUM_BLOCKS), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(number_of_block)
sysinfo_mem(3)(11 downto 08) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_ASSOCIATIVITY), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(associativity)
sysinfo_mem(3)(15 downto 12) <= (others => '0'); -- replacement strategy (irrelevant since i-cache is read-only)
sysinfo_mem(3)(15 downto 12) <= "0001" when (ICACHE_ASSOCIATIVITY > 1) and (ICACHE_EN = true) else (others => '0'); -- i-cache: replacement strategy (LRU only (yet))
--
sysinfo_mem(3)(19 downto 16) <= (others => '0'); -- reserved (for d-cache.block_size)
sysinfo_mem(3)(23 downto 20) <= (others => '0'); -- reserved (for d-cache.num_blocks)
sysinfo_mem(3)(27 downto 24) <= (others => '0'); -- reserved (for d-cache.associativity)
sysinfo_mem(3)(31 downto 28) <= (others => '0'); -- reserved (for d-cache.replacement_Strategy)
sysinfo_mem(3)(19 downto 16) <= (others => '0'); -- reserved - d-cache: log2(block_size)
sysinfo_mem(3)(23 downto 20) <= (others => '0'); -- reserved - d-cache: log2(num_blocks)
sysinfo_mem(3)(27 downto 24) <= (others => '0'); -- reserved - d-cache: log2(associativity)
sysinfo_mem(3)(31 downto 28) <= (others => '0'); -- reserved - d-cache: replacement strategy
 
-- SYSINFO(4): Base address of instruction memory space --
sysinfo_mem(4) <= ispace_base_c; -- defined in neorv32_package.vhd file
/neorv32_top.vhd
80,6 → 80,7
ICACHE_EN : boolean := false; -- implement instruction cache
ICACHE_NUM_BLOCKS : natural := 4; -- i-cache: number of blocks (min 1), has to be a power of 2
ICACHE_BLOCK_SIZE : natural := 64; -- i-cache: block size in bytes (min 4), has to be a power of 2
ICACHE_ASSOCIATIVITY : natural := 1; -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2
-- External memory interface --
MEM_EXT_EN : boolean := false; -- implement external memory bus interface?
-- Processor peripherals --
327,7 → 328,7
end process clock_generator_edge;
 
 
-- CPU ------------------------------------------------------------------------------------
-- CPU Core -------------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
neorv32_cpu_inst: neorv32_cpu
generic map (
412,10 → 413,11
-- -------------------------------------------------------------------------------------------
neorv32_icache_inst_true:
if (ICACHE_EN = true) generate
neorv32_icache_inst: neorv32_cache
neorv32_icache_inst: neorv32_icache
generic map (
CACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- number of blocks (min 2), has to be a power of 2
CACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE -- block size in bytes (min 4), has to be a power of 2
CACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- number of blocks (min 2), has to be a power of 2
CACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, -- block size in bytes (min 4), has to be a power of 2
CACHE_NUM_SETS => ICACHE_ASSOCIATIVITY -- associativity / number of sets (1=direct_mapped), has to be a power of 2
)
port map (
-- global control --
462,7 → 464,7
end generate;
 
 
-- CPU Crossbar Switch --------------------------------------------------------------------
-- CPU Bus Switch -------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
neorv32_busswitch_inst: neorv32_busswitch
generic map (
533,7 → 535,7
IMEM_BASE => imem_base_c, -- memory base address
IMEM_SIZE => MEM_INT_IMEM_SIZE, -- processor-internal instruction memory size in bytes
IMEM_AS_ROM => MEM_INT_IMEM_ROM, -- implement IMEM as read-only memory?
BOOTLOADER_EN => BOOTLOADER_EN -- implement and use bootloader?
BOOTLOADER_EN => BOOTLOADER_EN -- implement and use bootloader?
)
port map (
clk_i => clk_i, -- global clock line
613,13 → 615,13
if (MEM_EXT_EN = true) generate
neorv32_wishbone_inst: neorv32_wishbone
generic map (
WB_PIPELINED_MODE => wb_pipe_mode_c, -- false: classic/standard wishbone mode, true: pipelined wishbone mode
WB_PIPELINED_MODE => wb_pipe_mode_c, -- false: classic/standard wishbone mode, true: pipelined wishbone mode
-- Internal instruction memory --
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes
-- Internal data memory --
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE -- size of processor-internal data memory in bytes
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE -- size of processor-internal data memory in bytes
)
port map (
-- global control --
1006,34 → 1008,34
neorv32_sysinfo_inst: neorv32_sysinfo
generic map (
-- General --
CLOCK_FREQUENCY => CLOCK_FREQUENCY, -- clock frequency of clk_i in Hz
BOOTLOADER_EN => BOOTLOADER_EN, -- implement processor-internal bootloader?
USER_CODE => USER_CODE, -- custom user code
CLOCK_FREQUENCY => CLOCK_FREQUENCY, -- clock frequency of clk_i in Hz
BOOTLOADER_EN => BOOTLOADER_EN, -- implement processor-internal bootloader?
USER_CODE => USER_CODE, -- custom user code
-- internal Instruction memory --
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes
MEM_INT_IMEM_ROM => MEM_INT_IMEM_ROM, -- implement processor-internal instruction memory as ROM
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes
MEM_INT_IMEM_ROM => MEM_INT_IMEM_ROM, -- implement processor-internal instruction memory as ROM
-- Internal Data memory --
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE, -- size of processor-internal data memory in bytes
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE, -- size of processor-internal data memory in bytes
-- Internal Cache memory --
ICACHE_EN => ICACHE_EN, -- implement instruction cache
ICACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- i-cache: number of blocks (min 2), has to be a power of 2
ICACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, -- i-cache: block size in bytes (min 4), has to be a power of 2
ICACHE_ASSOCIATIVITY => 1, -- i-cache: associativity (min 1), has to be a power 2
ICACHE_EN => ICACHE_EN, -- implement instruction cache
ICACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- i-cache: number of blocks (min 2), has to be a power of 2
ICACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, -- i-cache: block size in bytes (min 4), has to be a power of 2
ICACHE_ASSOCIATIVITY => ICACHE_ASSOCIATIVITY, -- i-cache: associativity (min 1), has to be a power 2
-- External memory interface --
MEM_EXT_EN => MEM_EXT_EN, -- implement external memory bus interface?
MEM_EXT_EN => MEM_EXT_EN, -- implement external memory bus interface?
-- Processor peripherals --
IO_GPIO_EN => IO_GPIO_EN, -- implement general purpose input/output port unit (GPIO)?
IO_MTIME_EN => IO_MTIME_EN, -- implement machine system timer (MTIME)?
IO_UART_EN => IO_UART_EN, -- implement universal asynchronous receiver/transmitter (UART)?
IO_SPI_EN => IO_SPI_EN, -- implement serial peripheral interface (SPI)?
IO_TWI_EN => IO_TWI_EN, -- implement two-wire interface (TWI)?
IO_PWM_EN => IO_PWM_EN, -- implement pulse-width modulation unit (PWM)?
IO_WDT_EN => IO_WDT_EN, -- implement watch dog timer (WDT)?
IO_TRNG_EN => IO_TRNG_EN, -- implement true random number generator (TRNG)?
IO_CFU0_EN => IO_CFU0_EN, -- implement custom functions unit 0 (CFU0)?
IO_CFU1_EN => IO_CFU1_EN -- implement custom functions unit 1 (CFU1)?
IO_GPIO_EN => IO_GPIO_EN, -- implement general purpose input/output port unit (GPIO)?
IO_MTIME_EN => IO_MTIME_EN, -- implement machine system timer (MTIME)?
IO_UART_EN => IO_UART_EN, -- implement universal asynchronous receiver/transmitter (UART)?
IO_SPI_EN => IO_SPI_EN, -- implement serial peripheral interface (SPI)?
IO_TWI_EN => IO_TWI_EN, -- implement two-wire interface (TWI)?
IO_PWM_EN => IO_PWM_EN, -- implement pulse-width modulation unit (PWM)?
IO_WDT_EN => IO_WDT_EN, -- implement watch dog timer (WDT)?
IO_TRNG_EN => IO_TRNG_EN, -- implement true random number generator (TRNG)?
IO_CFU0_EN => IO_CFU0_EN, -- implement custom functions unit 0 (CFU0)?
IO_CFU1_EN => IO_CFU1_EN -- implement custom functions unit 1 (CFU1)?
)
port map (
-- host access --

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.