URL
https://opencores.org/ocsvn/neorv32/neorv32/trunk
Subversion Repositories neorv32
Compare Revisions
- This comparison shows the changes necessary to convert path
/neorv32/trunk/rtl/core
- from Rev 44 to Rev 45
- ↔ Reverse comparison
Rev 44 → Rev 45
/neorv32_cache.vhd
File deleted
/neorv32_cpu.vhd
166,8 → 166,8
|
-- Instruction prefetch buffer size -- |
assert not (is_power_of_two_f(ipb_entries_c) = false) report "NEORV32 CPU CONFIG ERROR! Number of entries in instruction prefetch buffer <ipb_entries_c> has to be a power of two." severity error; |
-- A extension - only lr.w and sc.w supported yet -- |
assert not (CPU_EXTENSION_RISCV_A = true) report "NEORV32 CPU CONFIG WARNING! Atomic operations extension (A) only supports >lr.w< and >sc.w< instructions yet." severity warning; |
-- A extension - only lr.w and sc.w are supported yet -- |
assert not (CPU_EXTENSION_RISCV_A = true) report "NEORV32 CPU CONFIG WARNING! Atomic operations extension (A) only supports <lr.w> and <sc.w> instructions." severity warning; |
|
-- Bit manipulation notifier -- |
assert not (CPU_EXTENSION_RISCV_B = true) report "NEORV32 CPU CONFIG WARNING! Bit manipulation extension (B) only supports 'base' instruction sub-set (Zbb) yet and is still 'unofficial' (not-ratified)." severity warning; |
253,7 → 253,7
|
-- Register File -------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
neorv32_regfile_inst: neorv32_cpu_regfile |
neorv32_cpu_regfile_inst: neorv32_cpu_regfile |
generic map ( |
CPU_EXTENSION_RISCV_E => CPU_EXTENSION_RISCV_E -- implement embedded RF extension? |
) |
/neorv32_cpu_control.vhd
669,8 → 669,8
-- PC update -- |
if (execute_engine.pc_we = '1') then |
case execute_engine.pc_mux_sel is |
when "00" => execute_engine.pc <= execute_engine.next_pc(data_width_c-1 downto 1) & '0'; -- normal (linear) increment |
when "01" => execute_engine.pc <= alu_add_i(data_width_c-1 downto 1) & '0'; -- jump/taken_branch |
when "00" => execute_engine.pc <= alu_add_i(data_width_c-1 downto 1) & '0'; -- jump/taken_branch |
when "01" => execute_engine.pc <= execute_engine.next_pc(data_width_c-1 downto 1) & '0'; -- normal (linear) increment |
when "10" => execute_engine.pc <= csr.mtvec(data_width_c-1 downto 1) & '0'; -- trap enter |
when others => execute_engine.pc <= csr.mepc(data_width_c-1 downto 1) & '0'; -- trap exit |
end case; |
813,7 → 813,7
execute_engine.sleep_nxt <= execute_engine.sleep; |
execute_engine.if_rst_nxt <= execute_engine.if_rst; |
-- |
execute_engine.pc_mux_sel <= (others => '0'); |
execute_engine.pc_mux_sel <= (others => '0'); -- select "slowest path" as default |
execute_engine.pc_we <= '0'; |
|
-- instruction dispatch -- |
874,10 → 874,10
|
when DISPATCH => -- Get new command from instruction issue engine |
-- ------------------------------------------------------------ |
execute_engine.pc_mux_sel <= "00"; -- linear next PC |
-- IR update -- |
execute_engine.is_ci_nxt <= cmd_issue.data(32); -- flag to indicate a de-compressed instruction beeing executed |
execute_engine.i_reg_nxt <= cmd_issue.data(31 downto 0); |
execute_engine.pc_mux_sel <= "01"; -- linear next PC |
execute_engine.is_ci_nxt <= cmd_issue.data(32); -- flag to indicate a de-compressed instruction beeing executed |
execute_engine.i_reg_nxt <= cmd_issue.data(31 downto 0); |
-- |
if (cmd_issue.valid = '1') then -- instruction available? |
-- IR update - exceptions -- |
1032,11 → 1032,15
|
when opcode_syscsr_c => -- system/csr access |
-- ------------------------------------------------------------ |
csr.re_nxt <= csr_acc_valid; -- always read CSR if valid access, only relevant for CSR-instructions |
if (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_env_c) then -- system/environment |
execute_engine.state_nxt <= SYS_ENV; |
else -- CSR access |
execute_engine.state_nxt <= CSR_ACCESS; |
if (CPU_EXTENSION_RISCV_Zicsr = true) then |
csr.re_nxt <= csr_acc_valid; -- always read CSR if valid access, only relevant for CSR-instructions |
if (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_env_c) then -- system/environment |
execute_engine.state_nxt <= SYS_ENV; |
else -- CSR access |
execute_engine.state_nxt <= CSR_ACCESS; |
end if; |
else |
execute_engine.state_nxt <= SYS_WAIT; |
end if; |
|
when others => -- undefined |
1051,16 → 1055,16
execute_engine.pc_mux_sel <= "11"; -- csr.mepc (only relevant for MRET) |
case execute_engine.i_reg(instr_funct12_msb_c downto instr_funct12_lsb_c) is |
when funct12_ecall_c => -- ECALL |
trap_ctrl.env_call <= '1'; |
trap_ctrl.env_call <= '1'; |
when funct12_ebreak_c => -- EBREAK |
trap_ctrl.break_point <= '1'; |
trap_ctrl.break_point <= '1'; |
when funct12_mret_c => -- MRET |
trap_ctrl.env_end <= '1'; |
execute_engine.pc_we <= '1'; -- update PC from MEPC |
fetch_engine.reset <= '1'; |
trap_ctrl.env_end <= '1'; |
execute_engine.pc_we <= '1'; -- update PC from MEPC |
fetch_engine.reset <= '1'; |
execute_engine.if_rst_nxt <= '1'; -- this is a non-linear PC modification |
when funct12_wfi_c => -- WFI |
execute_engine.sleep_nxt <= '1'; -- good night |
execute_engine.sleep_nxt <= '1'; -- good night |
when others => -- undefined |
NULL; |
end case; |
1079,7 → 1083,7
csr.we_nxt <= '0'; |
end case; |
-- register file write back -- |
ctrl_nxt(ctrl_rf_in_mux_msb_c downto ctrl_rf_in_mux_lsb_c) <= "11"; -- RF input = CSR output |
ctrl_nxt(ctrl_rf_in_mux_msb_c downto ctrl_rf_in_mux_lsb_c) <= "11"; -- RF input <= CSR output |
ctrl_nxt(ctrl_rf_wb_en_c) <= '1'; -- valid RF write-back |
execute_engine.state_nxt <= DISPATCH; |
|
1109,7 → 1113,7
ctrl_nxt(ctrl_rf_in_mux_msb_c) <= '0'; -- RF input = ALU result |
ctrl_nxt(ctrl_rf_wb_en_c) <= execute_engine.i_reg(instr_opcode_lsb_c+2); -- valid RF write-back? (is jump-and-link?) |
-- destination address -- |
execute_engine.pc_mux_sel <= "01"; -- alu.add = branch/jump destination |
execute_engine.pc_mux_sel <= "00"; -- alu.add = branch/jump destination |
if (execute_engine.i_reg(instr_opcode_lsb_c+2) = '1') or (execute_engine.branch_taken = '1') then -- JAL/JALR or taken branch |
execute_engine.pc_we <= '1'; -- update PC |
fetch_engine.reset <= '1'; -- trigger new instruction fetch from modified PC |
1123,9 → 1127,9
when FENCE_OP => -- fence operations - execution |
-- ------------------------------------------------------------ |
execute_engine.state_nxt <= SYS_WAIT; |
execute_engine.pc_mux_sel <= "00"; -- linear next PC = "refetch" next instruction (only relevant for fence.i) |
execute_engine.pc_mux_sel <= "01"; -- linear next PC = "refetch" next instruction (only relevant for fence.i) |
-- FENCE.I -- |
if (execute_engine.i_reg(instr_funct3_lsb_c) = funct3_fencei_c(0)) and (CPU_EXTENSION_RISCV_Zifencei = true) then |
if (CPU_EXTENSION_RISCV_Zifencei = true) and (execute_engine.i_reg(instr_funct3_lsb_c) = funct3_fencei_c(0)) then |
execute_engine.pc_we <= '1'; |
execute_engine.if_rst_nxt <= '1'; -- this is a non-linear PC modification |
fetch_engine.reset <= '1'; |
2076,7 → 2080,7
csr.minstreth <= std_ulogic_vector(unsigned(csr.minstreth) + 1); |
end if; |
|
-- [machine] high performance counters -- |
-- [machine] hardware performance monitors (counters) -- |
for i in 0 to HPM_NUM_CNTS-1 loop |
-- [m]hpmcounter* -- |
if (csr.we = '1') and (csr.addr = std_ulogic_vector(unsigned(csr_mhpmcounter3_c) + i)) then -- write access |
2137,6 → 2141,7
cnt_event_nxt(hpmcnt_event_cir_c) <= '1' when (execute_engine.state = EXECUTE) and (execute_engine.is_ci = '1') else '0'; -- retired compressed instruction |
cnt_event_nxt(hpmcnt_event_wait_if_c) <= '1' when (fetch_engine.state = IFETCH_ISSUE) and (fetch_engine.state_prev = IFETCH_ISSUE) else '0'; -- instruction fetch memory wait cycle |
cnt_event_nxt(hpmcnt_event_wait_ii_c) <= '1' when (execute_engine.state = DISPATCH) and (execute_engine.state_prev = DISPATCH) else '0'; -- instruction issue wait cycle |
cnt_event_nxt(hpmcnt_event_wait_mc_c) <= '1' when (execute_engine.state = ALU_WAIT) and (execute_engine.state_prev = ALU_WAIT) else '0'; -- multi-cycle alu-operation wait cycle |
|
cnt_event_nxt(hpmcnt_event_load_c) <= '1' when (execute_engine.state = LOADSTORE_1) and (ctrl(ctrl_bus_rd_c) = '1') else '0'; -- load operation |
cnt_event_nxt(hpmcnt_event_store_c) <= '1' when (execute_engine.state = LOADSTORE_1) and (ctrl(ctrl_bus_wr_c) = '1') else '0'; -- store operation |
/neorv32_cpu_cp_bitmanip.vhd
1,8 → 1,9
-- ################################################################################################# |
-- # << NEORV32 - CPU Co-Processor: Bit manipulation unit (RISC-V "B" Extension) >> # |
-- # ********************************************************************************************* # |
-- # The bit manipulation unit is implemted as co-processor that has a processing latency of at # |
-- # least 3 cycles. Only the "base" bit manipulation subset ('Zbb') is supported yet. # |
-- # The bit manipulation unit is implemted as co-processor that has a processing latency of 1 # |
-- # cycle for logic/arithmetic operations and 3+shamt (=shift amount) cycles for shift(-related) # |
-- # operations. # |
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
-- # # |
81,12 → 82,15
constant op_width_c : natural := 15; |
|
-- controller -- |
type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT, S_BUSY_LOGIC); |
type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT); |
signal ctrl_state : ctrl_state_t; |
signal cmd, cmd_buf : std_ulogic_vector(op_width_c-1 downto 0); |
signal valid : std_ulogic; |
|
-- operand buffers -- |
signal rs1_reg, rs2_reg : std_ulogic_vector(data_width_c-1 downto 0); |
signal rs1_reg : std_ulogic_vector(data_width_c-1 downto 0); |
signal rs2_reg : std_ulogic_vector(data_width_c-1 downto 0); |
signal less_ff : std_ulogic; |
|
-- shift amount (immediate or register) -- |
signal shamt : std_ulogic_vector(index_size_f(data_width_c)-1 downto 0); |
146,12 → 150,13
cmd_buf <= (others => '0'); |
rs1_reg <= (others => '0'); |
rs2_reg <= (others => '0'); |
less_ff <= '0'; |
shifter.start <= '0'; |
valid_o <= '0'; |
valid <= '0'; |
elsif rising_edge(clk_i) then |
-- defaults -- |
shifter.start <= '0'; |
valid_o <= '0'; |
valid <= '0'; |
|
-- fsm -- |
case ctrl_state is |
159,6 → 164,7
when S_IDLE => -- wait for operation trigger |
-- ------------------------------------------------------------ |
if (start_i = '1') then |
less_ff <= cmp_i(alu_cmp_less_c); |
cmd_buf <= cmd; |
rs1_reg <= rs1_i; |
rs2_reg <= rs2_i; |
166,7 → 172,8
shifter.start <= '1'; |
ctrl_state <= S_START_SHIFT; |
else |
ctrl_state <= S_BUSY_LOGIC; |
valid <= '1'; |
ctrl_state <= S_IDLE; |
end if; |
end if; |
|
177,14 → 184,10
when S_BUSY_SHIFT => -- wait for multi-cycle shift operation to finish |
-- ------------------------------------------------------------ |
if (shifter.run = '0') then |
ctrl_state <= S_BUSY_LOGIC; |
valid <= '1'; |
ctrl_state <= S_IDLE; |
end if; |
|
when S_BUSY_LOGIC => -- single-cycle logic operation (and output) |
-- ------------------------------------------------------------ |
valid_o <= '1'; |
ctrl_state <= S_IDLE; |
|
when others => -- undefined |
-- ------------------------------------------------------------ |
ctrl_state <= S_IDLE; |
262,8 → 265,8
res_int(op_cpop_c)(shifter.bcnt'left downto 0) <= shifter.bcnt; |
|
-- min/max select -- |
res_int(op_min_c) <= rs1_reg when (cmp_i(alu_cmp_less_c) = '1') else rs2_reg; |
res_int(op_max_c) <= rs2_reg when (cmp_i(alu_cmp_less_c) = '1') else rs1_reg; |
res_int(op_min_c) <= rs1_reg when (less_ff = '1') else rs2_reg; |
res_int(op_max_c) <= rs2_reg when (less_ff = '1') else rs1_reg; |
|
-- sign-extension -- |
res_int(op_sextb_c)(data_width_c-1 downto 8) <= (others => rs1_reg(7)); |
314,22 → 317,24
|
-- Output Gate ---------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
output_gate: process(clk_i) |
output_gate: process(valid, res_out) |
begin |
if rising_edge(clk_i) then |
if (valid = '1') then |
res_o <= res_out(op_clz_c) or res_out(op_ctz_c) or res_out(op_cpop_c) or |
res_out(op_min_c) or res_out(op_max_c) or |
res_out(op_sextb_c) or res_out(op_sexth_c) or |
res_out(op_andn_c) or res_out(op_orn_c) or res_out(op_xnor_c) or |
res_out(op_pack_c) or |
res_out(op_ror_c) or res_out(op_rol_c) or |
res_out(op_rev8_c) or |
res_out(op_orcb_c); |
else |
res_o <= (others => '0'); |
if (ctrl_state = S_BUSY_LOGIC) then |
res_o <= res_out(op_clz_c) or res_out(op_ctz_c) or res_out(op_cpop_c) or |
res_out(op_min_c) or res_out(op_max_c) or |
res_out(op_sextb_c) or res_out(op_sexth_c) or |
res_out(op_andn_c) or res_out(op_orn_c) or res_out(op_xnor_c) or |
res_out(op_pack_c) or |
res_out(op_ror_c) or res_out(op_rol_c) or |
res_out(op_rev8_c) or |
res_out(op_orcb_c); |
end if; |
end if; |
end process output_gate; |
|
-- valid output -- |
valid_o <= valid; |
|
|
end neorv32_cpu_cp_bitmanip_rtl; |
/neorv32_cpu_cp_muldiv.vhd
1,5 → 1,5
-- ################################################################################################# |
-- # << NEORV32 - CPU Co-Processor: MULDIV unit (RISC-V "M" Extension)>> # |
-- # << NEORV32 - CPU Co-Processor: Integer Multiplier/Divider Unit (RISC-V "M" Extension)>> # |
-- # ********************************************************************************************* # |
-- # Multiplier and Divider unit. Implements the RISC-V RV32-M CPU extension. # |
-- # Multiplier core (signed/unsigned) uses serial algorithm. -> 32+4 cycles latency # |
86,7 → 86,7
signal cp_op_ff : std_ulogic_vector(2 downto 0); -- operation that was executed |
signal start : std_ulogic; |
signal operation : std_ulogic; |
signal opx, opy : std_ulogic_vector(data_width_c-1 downto 0); -- input operands |
signal rs1, opx, opy : std_ulogic_vector(data_width_c-1 downto 0); -- input operands |
signal opx_is_signed : std_ulogic; |
signal opy_is_signed : std_ulogic; |
signal opy_is_zero : std_ulogic; |
121,6 → 121,7
state <= IDLE; |
opx <= (others => '0'); |
opy <= (others => '0'); |
rs1 <= (others => '0'); |
cnt <= (others => '0'); |
start <= '0'; |
valid <= '0'; |
136,9 → 137,10
-- FSM -- |
case state is |
when IDLE => |
opx <= rs1_i; |
opy <= rs2_i; |
if (start_i = '1') then |
opx <= rs1_i; |
rs1 <= rs1_i; |
opy <= rs2_i; |
state <= DECODE; |
end if; |
|
300,7 → 302,7
|
-- Data Output ---------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
operation_result: process(valid, cp_op_ff, mul_product, div_res, quotient, opy_is_zero, rs1_i, remainder) |
operation_result: process(valid, cp_op_ff, mul_product, div_res, quotient, opy_is_zero, rs1, remainder) |
begin |
if (valid = '1') then |
case cp_op_ff is |
316,7 → 318,7
if (opy_is_zero = '0') then |
res_o <= div_res; |
else |
res_o <= rs1_i; |
res_o <= rs1; |
end if; |
when others => -- cp_op_remu_c |
res_o <= remainder; |
/neorv32_cpu_regfile.vhd
1,14 → 1,19
-- ################################################################################################# |
-- # << NEORV32 - CPU Data Register File >> # |
-- # << NEORV32 - CPU General Purpose Data Register File >> # |
-- # ********************************************************************************************* # |
-- # General purpose data register file. 32 entries for normal mode (I), 16 entries for embedded # |
-- # mode (E) when RISC-V "E" extension is enabled. Register zero (r0) is a "normal" physical reg # |
-- # that has to be initialized to zero by the CPU control system. For normal operations r0 cannot # |
-- # be written. The register file uses synchronous reads so it can be mapped to FPGA block RAM. # |
-- # General purpose data register file. 32 entries (= 1024 bit) for normal mode (RV32I), # |
-- # 16 entries (= 512 bit) for embedded mode (RV32E) when RISC-V "E" extension is enabled. # |
-- # # |
-- # Register zero (r0/x0) is a "normal" physical reg that has to be initialized to zero by the # |
-- # CPU control system. For normal operations register zero cannot be written. # |
-- # # |
-- # The register file uses synchronous read accesses and a *single* (multiplexed) address port # |
-- # for writing and reading rs1 and a single read-only port for rs2. Therefore, the whole # |
-- # register file can be mapped to a single true dual-port block RAM. # |
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
-- # # |
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. # |
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved. # |
-- # # |
-- # Redistribution and use in source and binary forms, with or without modification, are # |
-- # permitted provided that the following conditions are met: # |
74,46 → 79,51
signal rd_is_r0 : std_ulogic; -- writing to r0? |
signal rf_we : std_ulogic; |
signal dst_addr : std_ulogic_vector(4 downto 0); -- destination address |
signal opa_addr : std_ulogic_vector(4 downto 0); -- rs1/dst address |
signal opb_addr : std_ulogic_vector(4 downto 0); -- rs2 address |
|
begin |
|
-- Register file read/write access -------------------------------------------------------- |
-- Data Input Mux ------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
rf_mux_data <= mem_i when (ctrl_i(ctrl_rf_in_mux_lsb_c) = '0') else csr_i; |
rf_write_data <= alu_i when (ctrl_i(ctrl_rf_in_mux_msb_c) = '0') else rf_mux_data; |
|
|
-- Register File Access ------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
rf_access: process(clk_i) |
begin |
if rising_edge(clk_i) then -- sync read and write |
if (CPU_EXTENSION_RISCV_E = false) then -- normal register file with 32 entries |
if (rf_we = '1') then |
reg_file(to_integer(unsigned(dst_addr(4 downto 0)))) <= rf_write_data; |
else |
rs1_o <= reg_file(to_integer(unsigned(ctrl_i(ctrl_rf_rs1_adr4_c downto ctrl_rf_rs1_adr0_c)))); |
rs2_o <= reg_file(to_integer(unsigned(ctrl_i(ctrl_rf_rs2_adr4_c downto ctrl_rf_rs2_adr0_c)))); |
reg_file(to_integer(unsigned(opa_addr(4 downto 0)))) <= rf_write_data; |
end if; |
rs1_o <= reg_file(to_integer(unsigned(opa_addr(4 downto 0)))); |
rs2_o <= reg_file(to_integer(unsigned(opb_addr(4 downto 0)))); |
else -- embedded register file with 16 entries |
if (rf_we = '1') then |
reg_file_emb(to_integer(unsigned(dst_addr(3 downto 0)))) <= rf_write_data; |
else |
rs1_o <= reg_file_emb(to_integer(unsigned(ctrl_i(ctrl_rf_rs1_adr3_c downto ctrl_rf_rs1_adr0_c)))); |
rs2_o <= reg_file_emb(to_integer(unsigned(ctrl_i(ctrl_rf_rs2_adr3_c downto ctrl_rf_rs2_adr0_c)))); |
reg_file_emb(to_integer(unsigned(opa_addr(3 downto 0)))) <= rf_write_data; |
end if; |
rs1_o <= reg_file_emb(to_integer(unsigned(opa_addr(3 downto 0)))); |
rs2_o <= reg_file_emb(to_integer(unsigned(opb_addr(3 downto 0)))); |
end if; |
end if; |
end process rf_access; |
|
-- data input mux -- |
rf_write_data <= alu_i when (ctrl_i(ctrl_rf_in_mux_msb_c) = '0') else rf_mux_data; |
rf_mux_data <= mem_i when (ctrl_i(ctrl_rf_in_mux_lsb_c) = '0') else csr_i; |
|
-- check if we are writing to x0 -- |
rd_is_r0 <= not or_all_f(ctrl_i(ctrl_rf_rd_adr4_c downto ctrl_rf_rd_adr0_c)) when (CPU_EXTENSION_RISCV_E = false) else |
not or_all_f(ctrl_i(ctrl_rf_rd_adr3_c downto ctrl_rf_rd_adr0_c)); |
|
-- valid RF write access -- |
-- valid RF write access? -- |
rf_we <= (ctrl_i(ctrl_rf_wb_en_c) and (not rd_is_r0)) or ctrl_i(ctrl_rf_r0_we_c); |
|
-- destination address -- |
dst_addr <= ctrl_i(ctrl_rf_rd_adr4_c downto ctrl_rf_rd_adr0_c) when (ctrl_i(ctrl_rf_r0_we_c) = '0') else (others => '0'); -- force dst=r0? |
|
-- access addresses -- |
opa_addr <= dst_addr when (rf_we = '1') else ctrl_i(ctrl_rf_rs1_adr4_c downto ctrl_rf_rs1_adr0_c); -- rd/rs1 |
opb_addr <= ctrl_i(ctrl_rf_rs2_adr4_c downto ctrl_rf_rs2_adr0_c); -- rs2 |
|
|
end neorv32_cpu_regfile_rtl; |
/neorv32_icache.vhd
0,0 → 1,614
-- ################################################################################################# |
-- # << NEORV32 - Processor-Internal Instruction Cache >> # |
-- # ********************************************************************************************* # |
-- # Direct mapped (CACHE_NUM_SETS = 1) or 2-way set-associative (CACHE_NUM_SETS = 2). # |
-- # Least recently used replacement policy (if CACHE_NUM_SETS > 1). # |
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
-- # # |
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. # |
-- # # |
-- # Redistribution and use in source and binary forms, with or without modification, are # |
-- # permitted provided that the following conditions are met: # |
-- # # |
-- # 1. Redistributions of source code must retain the above copyright notice, this list of # |
-- # conditions and the following disclaimer. # |
-- # # |
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of # |
-- # conditions and the following disclaimer in the documentation and/or other materials # |
-- # provided with the distribution. # |
-- # # |
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to # |
-- # endorse or promote products derived from this software without specific prior written # |
-- # permission. # |
-- # # |
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS # |
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF # |
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # |
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # |
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE # |
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED # |
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # |
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED # |
-- # OF THE POSSIBILITY OF SUCH DAMAGE. # |
-- # ********************************************************************************************* # |
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting # |
-- ################################################################################################# |
|
library ieee; |
use ieee.std_logic_1164.all; |
use ieee.numeric_std.all; |
|
library neorv32; |
use neorv32.neorv32_package.all; |
|
entity neorv32_icache is |
generic ( |
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS : natural := 1 -- associativity / number of sets (1=direct_mapped), has to be a power of 2 |
); |
port ( |
-- global control -- |
clk_i : in std_ulogic; -- global clock, rising edge |
rstn_i : in std_ulogic; -- global reset, low-active, async |
clear_i : in std_ulogic; -- cache clear |
-- host controller interface -- |
host_addr_i : in std_ulogic_vector(data_width_c-1 downto 0); -- bus access address |
host_rdata_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus read data |
host_wdata_i : in std_ulogic_vector(data_width_c-1 downto 0); -- bus write data |
host_ben_i : in std_ulogic_vector(03 downto 0); -- byte enable |
host_we_i : in std_ulogic; -- write enable |
host_re_i : in std_ulogic; -- read enable |
host_cancel_i : in std_ulogic; -- cancel current bus transaction |
host_lock_i : in std_ulogic; -- locked/exclusive access |
host_ack_o : out std_ulogic; -- bus transfer acknowledge |
host_err_o : out std_ulogic; -- bus transfer error |
-- peripheral bus interface -- |
bus_addr_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus access address |
bus_rdata_i : in std_ulogic_vector(data_width_c-1 downto 0); -- bus read data |
bus_wdata_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus write data |
bus_ben_o : out std_ulogic_vector(03 downto 0); -- byte enable |
bus_we_o : out std_ulogic; -- write enable |
bus_re_o : out std_ulogic; -- read enable |
bus_cancel_o : out std_ulogic; -- cancel current bus transaction |
bus_lock_o : out std_ulogic; -- locked/exclusive access |
bus_ack_i : in std_ulogic; -- bus transfer acknowledge |
bus_err_i : in std_ulogic -- bus transfer error |
); |
end neorv32_icache; |
|
architecture neorv32_icache_rtl of neorv32_icache is |
|
-- cache layout -- |
constant cache_offset_size_c : natural := index_size_f(CACHE_BLOCK_SIZE/4); -- offset addresses full 32-bit words |
constant cache_index_size_c : natural := index_size_f(CACHE_NUM_BLOCKS); |
constant cache_tag_size_c : natural := 32 - (cache_offset_size_c + cache_index_size_c + 2); -- 2 additonal bits for byte offset |
|
-- cache memory -- |
component neorv32_icache_memory |
generic ( |
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS : natural := 1 -- associativity; 0=direct-mapped, 1=2-way set-associative |
); |
port ( |
-- global control -- |
clk_i : in std_ulogic; -- global clock, rising edge |
invalidate_i : in std_ulogic; -- invalidate whole cache |
-- host cache access (read-only) -- |
host_addr_i : in std_ulogic_vector(31 downto 0); -- access address |
host_re_i : in std_ulogic; -- read enable |
host_rdata_o : out std_ulogic_vector(31 downto 0); -- read data |
-- access status (1 cycle delay to access) -- |
hit_o : out std_ulogic; -- hit access |
-- ctrl cache access (write-only) -- |
ctrl_en_i : in std_ulogic; -- control interface enable |
ctrl_addr_i : in std_ulogic_vector(31 downto 0); -- access address |
ctrl_we_i : in std_ulogic; -- write enable (full-word) |
ctrl_wdata_i : in std_ulogic_vector(31 downto 0); -- write data |
ctrl_tag_we_i : in std_ulogic; -- write tag to selected block |
ctrl_valid_i : in std_ulogic; -- make selected block valid |
ctrl_invalid_i : in std_ulogic -- make selected block invalid |
); |
end component; |
|
-- cache interface -- |
type cache_if_t is record |
clear : std_ulogic; -- cache clear |
-- |
host_addr : std_ulogic_vector(31 downto 0); -- cpu access address |
host_rdata : std_ulogic_vector(31 downto 0); -- cpu read data |
-- |
hit : std_ulogic; -- hit access |
-- |
ctrl_en : std_ulogic; -- control access enable |
ctrl_addr : std_ulogic_vector(31 downto 0); -- control access address |
ctrl_we : std_ulogic; -- control write enable |
ctrl_wdata : std_ulogic_vector(31 downto 0); -- control write data |
ctrl_tag_we : std_ulogic; -- control tag write enabled |
ctrl_valid_we : std_ulogic; -- control valid flag set |
ctrl_invalid_we : std_ulogic; -- control valid flag clear |
end record; |
signal cache : cache_if_t; |
|
-- control engine -- |
type ctrl_engine_state_t is (S_IDLE, S_CACHE_CLEAR, S_CACHE_CHECK, S_CACHE_MISS, S_BUS_DOWNLOAD_REQ, S_BUS_DOWNLOAD_GET, |
S_CACHE_RESYNC_0, S_CACHE_RESYNC_1, S_BUS_ERROR, S_ERROR, S_HOST_CANCEL); |
type ctrl_t is record |
state : ctrl_engine_state_t; -- current state |
state_nxt : ctrl_engine_state_t; -- next state |
addr_reg : std_ulogic_vector(31 downto 0); -- address register for block download |
addr_reg_nxt : std_ulogic_vector(31 downto 0); |
-- |
re_buf : std_ulogic; -- read request buffer |
re_buf_nxt : std_ulogic; |
cancel_buf : std_ulogic; -- cancel request buffer |
cancel_buf_nxt : std_ulogic; |
end record; |
signal ctrl : ctrl_t; |
|
begin |
|
-- Sanity Checks -------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
-- configuration -- |
assert not (is_power_of_two_f(CACHE_NUM_BLOCKS) = false) report "NEORV32 PROCESSOR CONFIG ERROR! Cache number of blocks <NUM_BLOCKS> has to be a power of 2." severity error; |
assert not (is_power_of_two_f(CACHE_BLOCK_SIZE) = false) report "NEORV32 PROCESSOR CONFIG ERROR! Cache block size <BLOCK_SIZE> has to be a power of 2." severity error; |
assert not ((is_power_of_two_f(CACHE_NUM_SETS) = false)) report "NEORV32 PROCESSOR CONFIG ERROR! Cache associativity <CACHE_NUM_SETS> has to be a power of 2." severity error; |
assert not (CACHE_NUM_BLOCKS < 1) report "NEORV32 PROCESSOR CONFIG ERROR! Cache number of blocks <NUM_BLOCKS> has to be >= 1." severity error; |
assert not (CACHE_BLOCK_SIZE < 4) report "NEORV32 PROCESSOR CONFIG ERROR! Cache block size <BLOCK_SIZE> has to be >= 4." severity error; |
assert not ((CACHE_NUM_SETS = 0) or (CACHE_NUM_SETS > 2)) report "NEORV32 PROCESSOR CONFIG ERROR! Cache associativity <CACHE_NUM_SETS> has to be 1 (direct-mapped) or 2 (2-way set-associative)." severity error; |
|
|
-- Control Engine FSM Sync ---------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
-- registers that REQUIRE a specific reset state -- |
ctrl_engine_fsm_sync_rst: process(rstn_i, clk_i) |
begin |
if (rstn_i = '0') then |
ctrl.state <= S_CACHE_CLEAR; |
ctrl.re_buf <= '0'; |
ctrl.cancel_buf <= '0'; |
elsif rising_edge(clk_i) then |
ctrl.state <= ctrl.state_nxt; |
ctrl.re_buf <= ctrl.re_buf_nxt; |
ctrl.cancel_buf <= ctrl.cancel_buf_nxt; |
end if; |
end process ctrl_engine_fsm_sync_rst; |
|
-- registers that do not require a specific reset state -- |
ctrl_engine_fsm_sync: process(clk_i) |
begin |
if rising_edge(clk_i) then |
ctrl.addr_reg <= ctrl.addr_reg_nxt; |
end if; |
end process ctrl_engine_fsm_sync; |
|
|
-- Control Engine FSM Comb ---------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
ctrl_engine_fsm_comb: process(ctrl, cache, clear_i, host_addr_i, host_lock_i, host_re_i, host_cancel_i, bus_rdata_i, bus_ack_i, bus_err_i) |
begin |
-- control defaults -- |
ctrl.state_nxt <= ctrl.state; |
ctrl.addr_reg_nxt <= ctrl.addr_reg; |
ctrl.re_buf_nxt <= (ctrl.re_buf or host_re_i) and (not host_cancel_i); |
ctrl.cancel_buf_nxt <= ctrl.cancel_buf or host_cancel_i; |
|
-- cache defaults -- |
cache.clear <= '0'; |
cache.host_addr <= host_addr_i; |
cache.ctrl_en <= '0'; |
cache.ctrl_addr <= ctrl.addr_reg; |
cache.ctrl_we <= '0'; |
cache.ctrl_wdata <= bus_rdata_i; |
cache.ctrl_tag_we <= '0'; |
cache.ctrl_valid_we <= '0'; |
cache.ctrl_invalid_we <= '0'; |
|
-- host interface defaults -- |
host_ack_o <= '0'; |
host_err_o <= '0'; |
host_rdata_o <= cache.host_rdata; |
|
-- peripheral bus interface defaults -- |
bus_addr_o <= ctrl.addr_reg; |
bus_wdata_o <= (others => '0'); -- cache is read-only |
bus_ben_o <= (others => '0'); -- cache is read-only |
bus_we_o <= '0'; -- cache is read-only |
bus_re_o <= '0'; |
bus_cancel_o <= '0'; |
bus_lock_o <= host_lock_i; |
|
-- fsm -- |
case ctrl.state is |
|
when S_IDLE => -- wait for host access request or cache control operation |
-- ------------------------------------------------------------ |
if (clear_i = '1') then -- cache control operation? |
ctrl.state_nxt <= S_CACHE_CLEAR; |
elsif (host_re_i = '1') or (ctrl.re_buf = '1') then -- cache access |
ctrl.re_buf_nxt <= '0'; |
ctrl.cancel_buf_nxt <= '0'; |
ctrl.state_nxt <= S_CACHE_CHECK; |
end if; |
|
when S_CACHE_CLEAR => -- invalidate all cache entries |
-- ------------------------------------------------------------ |
cache.clear <= '1'; |
ctrl.state_nxt <= S_IDLE; |
|
when S_CACHE_CHECK => -- finalize host access if cache hit |
-- ------------------------------------------------------------ |
if (cache.hit = '1') then -- cache HIT |
host_ack_o <= not ctrl.cancel_buf; -- ACK if request has not been canceled |
ctrl.state_nxt <= S_IDLE; |
else -- cache MISS |
ctrl.state_nxt <= S_CACHE_MISS; |
end if; |
|
when S_CACHE_MISS => -- |
-- ------------------------------------------------------------ |
-- compute block base address -- |
ctrl.addr_reg_nxt <= host_addr_i; |
ctrl.addr_reg_nxt((2+cache_offset_size_c)-1 downto 2) <= (others => '0'); -- block-aligned |
ctrl.addr_reg_nxt(1 downto 0) <= "00"; -- word-aligned |
-- |
if (host_cancel_i = '1') or (ctrl.cancel_buf = '1') then -- 'early' CPU cancel (abort before bus transaction has even started) |
ctrl.state_nxt <= S_IDLE; |
else |
ctrl.state_nxt <= S_BUS_DOWNLOAD_REQ; |
end if; |
|
when S_BUS_DOWNLOAD_REQ => -- download new cache block: request new word |
-- ------------------------------------------------------------ |
bus_re_o <= '1'; -- request new read transfer |
ctrl.state_nxt <= S_BUS_DOWNLOAD_GET; |
|
when S_BUS_DOWNLOAD_GET => -- download new cache block: wait for bus response |
-- ------------------------------------------------------------ |
cache.ctrl_en <= '1'; -- we are in cache control mode |
-- |
if (bus_err_i = '1') then -- bus error |
ctrl.state_nxt <= S_BUS_ERROR; |
elsif (ctrl.cancel_buf = '1') then -- 'late' CPU cancel (timeout?) |
ctrl.state_nxt <= S_HOST_CANCEL; |
elsif (bus_ack_i = '1') then -- ACK = write to cache and get next word |
cache.ctrl_we <= '1'; -- write to cache |
if (and_all_f(ctrl.addr_reg((2+cache_offset_size_c)-1 downto 2)) = '1') then -- block complete? |
cache.ctrl_tag_we <= '1'; -- current block is valid now |
cache.ctrl_valid_we <= '1'; -- write tag of current address |
ctrl.state_nxt <= S_CACHE_RESYNC_0; |
else -- get next word |
ctrl.addr_reg_nxt <= std_ulogic_vector(unsigned(ctrl.addr_reg) + 4); |
ctrl.state_nxt <= S_BUS_DOWNLOAD_REQ; |
end if; |
end if; |
|
when S_CACHE_RESYNC_0 => -- re-sync host/cache access: cache read-latency |
-- ------------------------------------------------------------ |
ctrl.state_nxt <= S_CACHE_RESYNC_1; |
|
when S_CACHE_RESYNC_1 => -- re-sync host/cache access: finalize CPU request |
-- ------------------------------------------------------------ |
host_ack_o <= not ctrl.cancel_buf; -- ACK if request has not been canceled |
ctrl.state_nxt <= S_IDLE; |
|
when S_BUS_ERROR => -- bus error during download |
-- ------------------------------------------------------------ |
host_err_o <= '1'; |
ctrl.state_nxt <= S_ERROR; |
|
when S_ERROR => -- wait for CPU to cancel faulting transfer |
-- ------------------------------------------------------------ |
if (host_cancel_i = '1') then |
bus_cancel_o <= '1'; |
ctrl.state_nxt <= S_IDLE; |
end if; |
|
when S_HOST_CANCEL => -- host cancels transfer |
-- ------------------------------------------------------------ |
cache.ctrl_en <= '1'; -- we are in cache control mode |
cache.ctrl_invalid_we <= '1'; -- invalidate current cache block |
bus_cancel_o <= '1'; |
ctrl.state_nxt <= S_IDLE; |
|
when others => -- undefined |
-- ------------------------------------------------------------ |
ctrl.state_nxt <= S_IDLE; |
|
end case; |
end process ctrl_engine_fsm_comb; |
|
|
-- Cache Memory --------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
neorv32_icache_memory_inst: neorv32_icache_memory |
generic map ( |
CACHE_NUM_BLOCKS => CACHE_NUM_BLOCKS, -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE => CACHE_BLOCK_SIZE, -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS => CACHE_NUM_SETS -- associativity; 0=direct-mapped, 1=2-way set-associative |
) |
port map ( |
-- global control -- |
clk_i => clk_i, -- global clock, rising edge |
invalidate_i => cache.clear, -- invalidate whole cache |
-- host cache access (read-only) -- |
host_addr_i => cache.host_addr, -- access address |
host_re_i => host_re_i, -- read enable |
host_rdata_o => cache.host_rdata, -- read data |
-- access status (1 cycle delay to access) -- |
hit_o => cache.hit, -- hit access |
-- ctrl cache access (write-only) -- |
ctrl_en_i => cache.ctrl_en, -- control interface enable |
ctrl_addr_i => cache.ctrl_addr, -- access address |
ctrl_we_i => cache.ctrl_we, -- write enable (full-word) |
ctrl_wdata_i => cache.ctrl_wdata, -- write data |
ctrl_tag_we_i => cache.ctrl_tag_we, -- write tag to selected block |
ctrl_valid_i => cache.ctrl_valid_we, -- make selected block valid |
ctrl_invalid_i => cache.ctrl_invalid_we -- make selected block invalid |
); |
|
end neorv32_icache_rtl; |
|
|
-- ########################################################################################################################################### |
-- ########################################################################################################################################### |
|
|
-- ################################################################################################# |
-- # << NEORV32 - Cache Memory >> # |
-- # ********************************************************************************************* # |
-- # Direct mapped (CACHE_NUM_SETS = 1) or 2-way set-associative (CACHE_NUM_SETS = 2). # |
-- # Least recently used replacement policy (if CACHE_NUM_SETS > 1). # |
-- # Read-only for host, write-only for control. All output signals have one cycle latency. # |
-- # # |
-- # Cache sets are mapped to individual memory components - no multi-dimensional memory arrays # |
-- # are used as some synthesis tools have problems to map these to actual BRAM primitives. # |
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
-- # # |
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. # |
-- # # |
-- # Redistribution and use in source and binary forms, with or without modification, are # |
-- # permitted provided that the following conditions are met: # |
-- # # |
-- # 1. Redistributions of source code must retain the above copyright notice, this list of # |
-- # conditions and the following disclaimer. # |
-- # # |
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of # |
-- # conditions and the following disclaimer in the documentation and/or other materials # |
-- # provided with the distribution. # |
-- # # |
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to # |
-- # endorse or promote products derived from this software without specific prior written # |
-- # permission. # |
-- # # |
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS # |
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF # |
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # |
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # |
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE # |
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED # |
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # |
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED # |
-- # OF THE POSSIBILITY OF SUCH DAMAGE. # |
-- # ********************************************************************************************* # |
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting # |
-- ################################################################################################# |
|
library ieee; |
use ieee.std_logic_1164.all; |
use ieee.numeric_std.all; |
|
library neorv32; |
use neorv32.neorv32_package.all; |
|
entity neorv32_icache_memory is |
generic ( |
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS : natural := 1 -- associativity; 1=direct-mapped, 2=2-way set-associative |
); |
port ( |
-- global control -- |
clk_i : in std_ulogic; -- global clock, rising edge |
invalidate_i : in std_ulogic; -- invalidate whole cache |
-- host cache access (read-only) -- |
host_addr_i : in std_ulogic_vector(31 downto 0); -- access address |
host_re_i : in std_ulogic; -- read enable |
host_rdata_o : out std_ulogic_vector(31 downto 0); -- read data |
-- access status (1 cycle delay to access) -- |
hit_o : out std_ulogic; -- hit access |
-- ctrl cache access (write-only) -- |
ctrl_en_i : in std_ulogic; -- control interface enable |
ctrl_addr_i : in std_ulogic_vector(31 downto 0); -- access address |
ctrl_we_i : in std_ulogic; -- write enable (full-word) |
ctrl_wdata_i : in std_ulogic_vector(31 downto 0); -- write data |
ctrl_tag_we_i : in std_ulogic; -- write tag to selected block |
ctrl_valid_i : in std_ulogic; -- make selected block valid |
ctrl_invalid_i : in std_ulogic -- make selected block invalid |
); |
end neorv32_icache_memory; |
|
architecture neorv32_icache_memory_rtl of neorv32_icache_memory is |
|
-- cache layout -- |
constant cache_offset_size_c : natural := index_size_f(CACHE_BLOCK_SIZE/4); -- offset addresses full 32-bit words |
constant cache_index_size_c : natural := index_size_f(CACHE_NUM_BLOCKS); |
constant cache_tag_size_c : natural := 32 - (cache_offset_size_c + cache_index_size_c + 2); -- 2 additonal bits for byte offset |
constant cache_entries_c : natural := CACHE_NUM_BLOCKS * (CACHE_BLOCK_SIZE/4); -- number of 32-bit entries (per set) |
|
-- status flag memory -- |
signal valid_flag_s0 : std_ulogic_vector(CACHE_NUM_BLOCKS-1 downto 0); |
signal valid_flag_s1 : std_ulogic_vector(CACHE_NUM_BLOCKS-1 downto 0); |
signal valid : std_ulogic_vector(1 downto 0); -- valid flag read data |
|
-- tag memory -- |
type tag_mem_t is array (0 to CACHE_NUM_BLOCKS-1) of std_ulogic_vector(cache_tag_size_c-1 downto 0); |
signal tag_mem_s0 : tag_mem_t; |
signal tag_mem_s1 : tag_mem_t; |
type tag_rd_t is array (0 to 1) of std_ulogic_vector(cache_tag_size_c-1 downto 0); |
signal tag : tag_rd_t; -- tag read data |
|
-- access status -- |
signal hit : std_ulogic_vector(1 downto 0); |
|
-- access address decomposition -- |
type acc_addr_t is record |
tag : std_ulogic_vector(cache_tag_size_c-1 downto 0); |
index : std_ulogic_vector(cache_index_size_c-1 downto 0); |
offset : std_ulogic_vector(cache_offset_size_c-1 downto 0); |
end record; |
signal host_acc_addr, ctrl_acc_addr : acc_addr_t; |
|
-- cache data memory -- |
type cache_mem_t is array (0 to cache_entries_c-1) of std_ulogic_vector(31 downto 0); |
signal cache_data_memory_s0 : cache_mem_t; -- set 0 |
signal cache_data_memory_s1 : cache_mem_t; -- set 1 |
|
-- cache data memory access -- |
type cache_rdata_t is array (0 to 1) of std_ulogic_vector(31 downto 0); |
signal cache_rdata : cache_rdata_t; |
signal cache_index : std_ulogic_vector(cache_index_size_c-1 downto 0); |
signal cache_offset : std_ulogic_vector(cache_offset_size_c-1 downto 0); |
signal cache_addr : std_ulogic_vector((cache_index_size_c+cache_offset_size_c)-1 downto 0); -- index & offset |
signal cache_we : std_ulogic; -- write enable (full-word) |
signal set_select : std_ulogic; |
|
-- access history -- |
type history_t is record |
re_ff : std_ulogic; |
last_used_set : std_ulogic_vector(CACHE_NUM_BLOCKS-1 downto 0); |
to_be_replaced : std_ulogic; |
end record; |
signal history : history_t; |
|
begin |
|
-- Access Address Decomposition ----------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
host_acc_addr.tag <= host_addr_i(31 downto 31-(cache_tag_size_c-1)); |
host_acc_addr.index <= host_addr_i(31-cache_tag_size_c downto 2+cache_offset_size_c); |
host_acc_addr.offset <= host_addr_i(2+(cache_offset_size_c-1) downto 2); -- discard byte offset |
|
ctrl_acc_addr.tag <= ctrl_addr_i(31 downto 31-(cache_tag_size_c-1)); |
ctrl_acc_addr.index <= ctrl_addr_i(31-cache_tag_size_c downto 2+cache_offset_size_c); |
ctrl_acc_addr.offset <= ctrl_addr_i(2+(cache_offset_size_c-1) downto 2); -- discard byte offset |
|
|
-- Cache Access History ------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
access_history: process(clk_i) |
begin |
if rising_edge(clk_i) then |
history.re_ff <= host_re_i; |
if (invalidate_i = '1') then -- invalidate whole cache |
history.last_used_set <= (others => '1'); |
elsif (history.re_ff = '1') and (or_all_f(hit) = '1') then -- store last accessed set that caused a hit |
history.last_used_set(to_integer(unsigned(cache_index))) <= not hit(0); |
end if; |
history.to_be_replaced <= history.last_used_set(to_integer(unsigned(cache_index))); |
end if; |
end process access_history; |
|
-- which set is going to be replaced? -> opposite of last used set = least recently used set -- |
set_select <= '0' when (CACHE_NUM_SETS = 1) else (not history.to_be_replaced); |
|
|
-- Status flag memory --------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
status_memory: process(clk_i) |
begin |
if rising_edge(clk_i) then |
-- write access -- |
if (invalidate_i = '1') then -- invalidate whole cache |
valid_flag_s0 <= (others => '0'); |
valid_flag_s1 <= (others => '0'); |
elsif (ctrl_en_i = '1') then |
if (ctrl_invalid_i = '1') then -- make current block invalid |
if (set_select = '0') then |
valid_flag_s0(to_integer(unsigned(cache_index))) <= '0'; |
else |
valid_flag_s1(to_integer(unsigned(cache_index))) <= '0'; |
end if; |
elsif (ctrl_valid_i = '1') then -- make current block valid |
if (set_select = '0') then |
valid_flag_s0(to_integer(unsigned(cache_index))) <= '1'; |
else |
valid_flag_s1(to_integer(unsigned(cache_index))) <= '1'; |
end if; |
end if; |
end if; |
-- read access (sync) -- |
valid(0) <= valid_flag_s0(to_integer(unsigned(cache_index))); |
valid(1) <= valid_flag_s1(to_integer(unsigned(cache_index))); |
end if; |
end process status_memory; |
|
|
-- Tag memory ----------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
tag_memory: process(clk_i) |
begin |
if rising_edge(clk_i) then |
if (ctrl_en_i = '1') and (ctrl_tag_we_i = '1') then -- write access |
if (set_select = '0') then |
tag_mem_s0(to_integer(unsigned(cache_index))) <= ctrl_acc_addr.tag; |
else |
tag_mem_s1(to_integer(unsigned(cache_index))) <= ctrl_acc_addr.tag; |
end if; |
else -- read access |
tag(0) <= tag_mem_s0(to_integer(unsigned(cache_index))); |
tag(1) <= tag_mem_s1(to_integer(unsigned(cache_index))); |
end if; |
end if; |
end process tag_memory; |
|
-- comparator -- |
comparator: process(host_acc_addr, tag, valid) |
begin |
hit <= (others => '0'); |
for i in 0 to CACHE_NUM_SETS-1 loop |
if (host_acc_addr.tag = tag(i)) and (valid(i) = '1') then |
hit(i) <= '1'; |
end if; |
end loop; -- i |
end process comparator; |
|
-- global hit -- |
hit_o <= or_all_f(hit); |
|
|
-- Cache Data Memory ---------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
cache_mem_access: process(clk_i) |
begin |
if rising_edge(clk_i) then |
if (cache_we = '1') then -- write access from control (full-word) |
if (set_select = '0') then |
cache_data_memory_s0(to_integer(unsigned(cache_addr))) <= ctrl_wdata_i; |
else |
cache_data_memory_s1(to_integer(unsigned(cache_addr))) <= ctrl_wdata_i; |
end if; |
else -- read access from host (full-word) |
cache_rdata(0) <= cache_data_memory_s0(to_integer(unsigned(cache_addr))); |
cache_rdata(1) <= cache_data_memory_s1(to_integer(unsigned(cache_addr))); |
end if; |
end if; |
end process cache_mem_access; |
|
-- data output -- |
host_rdata_o <= cache_rdata(0) when (hit(0) = '1') or (CACHE_NUM_SETS = 1) else cache_rdata(1); |
|
-- cache block ram access address -- |
cache_addr <= cache_index & cache_offset; |
|
-- cache access select -- |
cache_index <= host_acc_addr.index when (ctrl_en_i = '0') else ctrl_acc_addr.index; |
cache_offset <= host_acc_addr.offset when (ctrl_en_i = '0') else ctrl_acc_addr.offset; |
cache_we <= '0' when (ctrl_en_i = '0') else ctrl_we_i; |
|
|
end neorv32_icache_memory_rtl; |
/neorv32_package.vhd
55,7 → 55,7
-- Architecture Constants (do not modify!)= ----------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
constant data_width_c : natural := 32; -- data width - do not change! |
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01050000"; -- no touchy! |
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01050003"; -- no touchy! |
constant pmp_max_r_c : natural := 8; -- max PMP regions - FIXED! |
constant archid_c : natural := 19; -- official NEORV32 architecture ID - hands off! |
constant rf_r0_is_reg_c : boolean := true; -- reg_file.r0 is a *physical register* that has to be initialized to zero by the CPU HW |
708,16 → 708,17
constant hpmcnt_event_cir_c : natural := 3; -- Retired compressed instruction |
constant hpmcnt_event_wait_if_c : natural := 4; -- Instruction fetch memory wait cycle |
constant hpmcnt_event_wait_ii_c : natural := 5; -- Instruction issue wait cycle |
constant hpmcnt_event_load_c : natural := 6; -- Load operation |
constant hpmcnt_event_store_c : natural := 7; -- Store operation |
constant hpmcnt_event_wait_ls_c : natural := 8; -- Load/store memory wait cycle |
constant hpmcnt_event_jump_c : natural := 9; -- Unconditional jump |
constant hpmcnt_event_branch_c : natural := 10; -- Conditional branch (taken or not taken) |
constant hpmcnt_event_tbranch_c : natural := 11; -- Conditional taken branch |
constant hpmcnt_event_trap_c : natural := 12; -- Entered trap |
constant hpmcnt_event_illegal_c : natural := 13; -- Illegal instruction exception |
constant hpmcnt_event_wait_mc_c : natural := 6; -- Multi-cycle ALU-operation wait cycle |
constant hpmcnt_event_load_c : natural := 7; -- Load operation |
constant hpmcnt_event_store_c : natural := 8; -- Store operation |
constant hpmcnt_event_wait_ls_c : natural := 9; -- Load/store memory wait cycle |
constant hpmcnt_event_jump_c : natural := 10; -- Unconditional jump |
constant hpmcnt_event_branch_c : natural := 11; -- Conditional branch (taken or not taken) |
constant hpmcnt_event_tbranch_c : natural := 12; -- Conditional taken branch |
constant hpmcnt_event_trap_c : natural := 13; -- Entered trap |
constant hpmcnt_event_illegal_c : natural := 14; -- Illegal instruction exception |
-- |
constant hpmcnt_event_size_c : natural := 14; -- length of this list |
constant hpmcnt_event_size_c : natural := 15; -- length of this list |
|
-- Clock Generator ------------------------------------------------------------------------ |
-- ------------------------------------------------------------------------------------------- |
767,6 → 768,7
ICACHE_EN : boolean := false; -- implement instruction cache |
ICACHE_NUM_BLOCKS : natural := 4; -- i-cache: number of blocks (min 1), has to be a power of 2 |
ICACHE_BLOCK_SIZE : natural := 64; -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY : natural := 1; -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2 |
-- External memory interface -- |
MEM_EXT_EN : boolean := false; -- implement external memory bus interface? |
-- Processor peripherals -- |
1118,12 → 1120,13
); |
end component; |
|
-- Component: CPU Cache ------------------------------------------------------------------- |
-- Component: CPU Instruction Cache ------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
component neorv32_cache |
component neorv32_icache |
generic ( |
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE : natural := 16 -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS : natural := 1 -- associativity / number of sets (1=direct_mapped), has to be a power of 2 |
); |
port ( |
-- global control -- |
/neorv32_sysinfo.vhd
145,15 → 145,15
sysinfo_mem(2)(31 downto 26) <= (others => '0'); -- reserved |
|
-- SYSINFO(3): Cache configuration -- |
sysinfo_mem(3)(03 downto 00) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_BLOCK_SIZE), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(block_size_in_bytes) |
sysinfo_mem(3)(07 downto 04) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_NUM_BLOCKS), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(number_of_block) |
sysinfo_mem(3)(03 downto 00) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_BLOCK_SIZE), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(block_size_in_bytes) |
sysinfo_mem(3)(07 downto 04) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_NUM_BLOCKS), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(number_of_block) |
sysinfo_mem(3)(11 downto 08) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_ASSOCIATIVITY), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(associativity) |
sysinfo_mem(3)(15 downto 12) <= (others => '0'); -- replacement strategy (irrelevant since i-cache is read-only) |
sysinfo_mem(3)(15 downto 12) <= "0001" when (ICACHE_ASSOCIATIVITY > 1) and (ICACHE_EN = true) else (others => '0'); -- i-cache: replacement strategy (LRU only (yet)) |
-- |
sysinfo_mem(3)(19 downto 16) <= (others => '0'); -- reserved (for d-cache.block_size) |
sysinfo_mem(3)(23 downto 20) <= (others => '0'); -- reserved (for d-cache.num_blocks) |
sysinfo_mem(3)(27 downto 24) <= (others => '0'); -- reserved (for d-cache.associativity) |
sysinfo_mem(3)(31 downto 28) <= (others => '0'); -- reserved (for d-cache.replacement_Strategy) |
sysinfo_mem(3)(19 downto 16) <= (others => '0'); -- reserved - d-cache: log2(block_size) |
sysinfo_mem(3)(23 downto 20) <= (others => '0'); -- reserved - d-cache: log2(num_blocks) |
sysinfo_mem(3)(27 downto 24) <= (others => '0'); -- reserved - d-cache: log2(associativity) |
sysinfo_mem(3)(31 downto 28) <= (others => '0'); -- reserved - d-cache: replacement strategy |
|
-- SYSINFO(4): Base address of instruction memory space -- |
sysinfo_mem(4) <= ispace_base_c; -- defined in neorv32_package.vhd file |
/neorv32_top.vhd
80,6 → 80,7
ICACHE_EN : boolean := false; -- implement instruction cache |
ICACHE_NUM_BLOCKS : natural := 4; -- i-cache: number of blocks (min 1), has to be a power of 2 |
ICACHE_BLOCK_SIZE : natural := 64; -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY : natural := 1; -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2 |
-- External memory interface -- |
MEM_EXT_EN : boolean := false; -- implement external memory bus interface? |
-- Processor peripherals -- |
327,7 → 328,7
end process clock_generator_edge; |
|
|
-- CPU ------------------------------------------------------------------------------------ |
-- CPU Core ------------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
neorv32_cpu_inst: neorv32_cpu |
generic map ( |
412,10 → 413,11
-- ------------------------------------------------------------------------------------------- |
neorv32_icache_inst_true: |
if (ICACHE_EN = true) generate |
neorv32_icache_inst: neorv32_cache |
neorv32_icache_inst: neorv32_icache |
generic map ( |
CACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- number of blocks (min 2), has to be a power of 2 |
CACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- number of blocks (min 2), has to be a power of 2 |
CACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS => ICACHE_ASSOCIATIVITY -- associativity / number of sets (1=direct_mapped), has to be a power of 2 |
) |
port map ( |
-- global control -- |
462,7 → 464,7
end generate; |
|
|
-- CPU Crossbar Switch -------------------------------------------------------------------- |
-- CPU Bus Switch ------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
neorv32_busswitch_inst: neorv32_busswitch |
generic map ( |
533,7 → 535,7
IMEM_BASE => imem_base_c, -- memory base address |
IMEM_SIZE => MEM_INT_IMEM_SIZE, -- processor-internal instruction memory size in bytes |
IMEM_AS_ROM => MEM_INT_IMEM_ROM, -- implement IMEM as read-only memory? |
BOOTLOADER_EN => BOOTLOADER_EN -- implement and use bootloader? |
BOOTLOADER_EN => BOOTLOADER_EN -- implement and use bootloader? |
) |
port map ( |
clk_i => clk_i, -- global clock line |
613,13 → 615,13
if (MEM_EXT_EN = true) generate |
neorv32_wishbone_inst: neorv32_wishbone |
generic map ( |
WB_PIPELINED_MODE => wb_pipe_mode_c, -- false: classic/standard wishbone mode, true: pipelined wishbone mode |
WB_PIPELINED_MODE => wb_pipe_mode_c, -- false: classic/standard wishbone mode, true: pipelined wishbone mode |
-- Internal instruction memory -- |
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory |
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes |
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory |
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes |
-- Internal data memory -- |
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory |
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE -- size of processor-internal data memory in bytes |
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory |
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE -- size of processor-internal data memory in bytes |
) |
port map ( |
-- global control -- |
1006,34 → 1008,34
neorv32_sysinfo_inst: neorv32_sysinfo |
generic map ( |
-- General -- |
CLOCK_FREQUENCY => CLOCK_FREQUENCY, -- clock frequency of clk_i in Hz |
BOOTLOADER_EN => BOOTLOADER_EN, -- implement processor-internal bootloader? |
USER_CODE => USER_CODE, -- custom user code |
CLOCK_FREQUENCY => CLOCK_FREQUENCY, -- clock frequency of clk_i in Hz |
BOOTLOADER_EN => BOOTLOADER_EN, -- implement processor-internal bootloader? |
USER_CODE => USER_CODE, -- custom user code |
-- internal Instruction memory -- |
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory |
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes |
MEM_INT_IMEM_ROM => MEM_INT_IMEM_ROM, -- implement processor-internal instruction memory as ROM |
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory |
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes |
MEM_INT_IMEM_ROM => MEM_INT_IMEM_ROM, -- implement processor-internal instruction memory as ROM |
-- Internal Data memory -- |
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory |
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE, -- size of processor-internal data memory in bytes |
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory |
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE, -- size of processor-internal data memory in bytes |
-- Internal Cache memory -- |
ICACHE_EN => ICACHE_EN, -- implement instruction cache |
ICACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- i-cache: number of blocks (min 2), has to be a power of 2 |
ICACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY => 1, -- i-cache: associativity (min 1), has to be a power 2 |
ICACHE_EN => ICACHE_EN, -- implement instruction cache |
ICACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- i-cache: number of blocks (min 2), has to be a power of 2 |
ICACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY => ICACHE_ASSOCIATIVITY, -- i-cache: associativity (min 1), has to be a power 2 |
-- External memory interface -- |
MEM_EXT_EN => MEM_EXT_EN, -- implement external memory bus interface? |
MEM_EXT_EN => MEM_EXT_EN, -- implement external memory bus interface? |
-- Processor peripherals -- |
IO_GPIO_EN => IO_GPIO_EN, -- implement general purpose input/output port unit (GPIO)? |
IO_MTIME_EN => IO_MTIME_EN, -- implement machine system timer (MTIME)? |
IO_UART_EN => IO_UART_EN, -- implement universal asynchronous receiver/transmitter (UART)? |
IO_SPI_EN => IO_SPI_EN, -- implement serial peripheral interface (SPI)? |
IO_TWI_EN => IO_TWI_EN, -- implement two-wire interface (TWI)? |
IO_PWM_EN => IO_PWM_EN, -- implement pulse-width modulation unit (PWM)? |
IO_WDT_EN => IO_WDT_EN, -- implement watch dog timer (WDT)? |
IO_TRNG_EN => IO_TRNG_EN, -- implement true random number generator (TRNG)? |
IO_CFU0_EN => IO_CFU0_EN, -- implement custom functions unit 0 (CFU0)? |
IO_CFU1_EN => IO_CFU1_EN -- implement custom functions unit 1 (CFU1)? |
IO_GPIO_EN => IO_GPIO_EN, -- implement general purpose input/output port unit (GPIO)? |
IO_MTIME_EN => IO_MTIME_EN, -- implement machine system timer (MTIME)? |
IO_UART_EN => IO_UART_EN, -- implement universal asynchronous receiver/transmitter (UART)? |
IO_SPI_EN => IO_SPI_EN, -- implement serial peripheral interface (SPI)? |
IO_TWI_EN => IO_TWI_EN, -- implement two-wire interface (TWI)? |
IO_PWM_EN => IO_PWM_EN, -- implement pulse-width modulation unit (PWM)? |
IO_WDT_EN => IO_WDT_EN, -- implement watch dog timer (WDT)? |
IO_TRNG_EN => IO_TRNG_EN, -- implement true random number generator (TRNG)? |
IO_CFU0_EN => IO_CFU0_EN, -- implement custom functions unit 0 (CFU0)? |
IO_CFU1_EN => IO_CFU1_EN -- implement custom functions unit 1 (CFU1)? |
) |
port map ( |
-- host access -- |