URL
https://opencores.org/ocsvn/neorv32/neorv32/trunk
Subversion Repositories neorv32
Compare Revisions
- This comparison shows the changes necessary to convert path
/neorv32/trunk
- from Rev 44 to Rev 45
- ↔ Reverse comparison
Rev 44 → Rev 45
/docs/Doxyfile
38,7 → 38,7
# could be handy for archiving the generated documentation or if some version |
# control system is used. |
|
PROJECT_NUMBER = |
PROJECT_NUMBER = |
|
# Using the PROJECT_BRIEF tag one can provide an optional one line description |
# for a project that appears at the top of each page and should give viewer a |
/docs/NEORV32.pdf
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
/rtl/core/neorv32_cache.vhd
File deleted
/rtl/core/neorv32_cpu.vhd
166,8 → 166,8
|
-- Instruction prefetch buffer size -- |
assert not (is_power_of_two_f(ipb_entries_c) = false) report "NEORV32 CPU CONFIG ERROR! Number of entries in instruction prefetch buffer <ipb_entries_c> has to be a power of two." severity error; |
-- A extension - only lr.w and sc.w supported yet -- |
assert not (CPU_EXTENSION_RISCV_A = true) report "NEORV32 CPU CONFIG WARNING! Atomic operations extension (A) only supports >lr.w< and >sc.w< instructions yet." severity warning; |
-- A extension - only lr.w and sc.w are supported yet -- |
assert not (CPU_EXTENSION_RISCV_A = true) report "NEORV32 CPU CONFIG WARNING! Atomic operations extension (A) only supports <lr.w> and <sc.w> instructions." severity warning; |
|
-- Bit manipulation notifier -- |
assert not (CPU_EXTENSION_RISCV_B = true) report "NEORV32 CPU CONFIG WARNING! Bit manipulation extension (B) only supports 'base' instruction sub-set (Zbb) yet and is still 'unofficial' (not-ratified)." severity warning; |
253,7 → 253,7
|
-- Register File -------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
neorv32_regfile_inst: neorv32_cpu_regfile |
neorv32_cpu_regfile_inst: neorv32_cpu_regfile |
generic map ( |
CPU_EXTENSION_RISCV_E => CPU_EXTENSION_RISCV_E -- implement embedded RF extension? |
) |
/rtl/core/neorv32_cpu_control.vhd
669,8 → 669,8
-- PC update -- |
if (execute_engine.pc_we = '1') then |
case execute_engine.pc_mux_sel is |
when "00" => execute_engine.pc <= execute_engine.next_pc(data_width_c-1 downto 1) & '0'; -- normal (linear) increment |
when "01" => execute_engine.pc <= alu_add_i(data_width_c-1 downto 1) & '0'; -- jump/taken_branch |
when "00" => execute_engine.pc <= alu_add_i(data_width_c-1 downto 1) & '0'; -- jump/taken_branch |
when "01" => execute_engine.pc <= execute_engine.next_pc(data_width_c-1 downto 1) & '0'; -- normal (linear) increment |
when "10" => execute_engine.pc <= csr.mtvec(data_width_c-1 downto 1) & '0'; -- trap enter |
when others => execute_engine.pc <= csr.mepc(data_width_c-1 downto 1) & '0'; -- trap exit |
end case; |
813,7 → 813,7
execute_engine.sleep_nxt <= execute_engine.sleep; |
execute_engine.if_rst_nxt <= execute_engine.if_rst; |
-- |
execute_engine.pc_mux_sel <= (others => '0'); |
execute_engine.pc_mux_sel <= (others => '0'); -- select "slowest path" as default |
execute_engine.pc_we <= '0'; |
|
-- instruction dispatch -- |
874,10 → 874,10
|
when DISPATCH => -- Get new command from instruction issue engine |
-- ------------------------------------------------------------ |
execute_engine.pc_mux_sel <= "00"; -- linear next PC |
-- IR update -- |
execute_engine.is_ci_nxt <= cmd_issue.data(32); -- flag to indicate a de-compressed instruction beeing executed |
execute_engine.i_reg_nxt <= cmd_issue.data(31 downto 0); |
execute_engine.pc_mux_sel <= "01"; -- linear next PC |
execute_engine.is_ci_nxt <= cmd_issue.data(32); -- flag to indicate a de-compressed instruction beeing executed |
execute_engine.i_reg_nxt <= cmd_issue.data(31 downto 0); |
-- |
if (cmd_issue.valid = '1') then -- instruction available? |
-- IR update - exceptions -- |
1032,11 → 1032,15
|
when opcode_syscsr_c => -- system/csr access |
-- ------------------------------------------------------------ |
csr.re_nxt <= csr_acc_valid; -- always read CSR if valid access, only relevant for CSR-instructions |
if (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_env_c) then -- system/environment |
execute_engine.state_nxt <= SYS_ENV; |
else -- CSR access |
execute_engine.state_nxt <= CSR_ACCESS; |
if (CPU_EXTENSION_RISCV_Zicsr = true) then |
csr.re_nxt <= csr_acc_valid; -- always read CSR if valid access, only relevant for CSR-instructions |
if (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_env_c) then -- system/environment |
execute_engine.state_nxt <= SYS_ENV; |
else -- CSR access |
execute_engine.state_nxt <= CSR_ACCESS; |
end if; |
else |
execute_engine.state_nxt <= SYS_WAIT; |
end if; |
|
when others => -- undefined |
1051,16 → 1055,16
execute_engine.pc_mux_sel <= "11"; -- csr.mepc (only relevant for MRET) |
case execute_engine.i_reg(instr_funct12_msb_c downto instr_funct12_lsb_c) is |
when funct12_ecall_c => -- ECALL |
trap_ctrl.env_call <= '1'; |
trap_ctrl.env_call <= '1'; |
when funct12_ebreak_c => -- EBREAK |
trap_ctrl.break_point <= '1'; |
trap_ctrl.break_point <= '1'; |
when funct12_mret_c => -- MRET |
trap_ctrl.env_end <= '1'; |
execute_engine.pc_we <= '1'; -- update PC from MEPC |
fetch_engine.reset <= '1'; |
trap_ctrl.env_end <= '1'; |
execute_engine.pc_we <= '1'; -- update PC from MEPC |
fetch_engine.reset <= '1'; |
execute_engine.if_rst_nxt <= '1'; -- this is a non-linear PC modification |
when funct12_wfi_c => -- WFI |
execute_engine.sleep_nxt <= '1'; -- good night |
execute_engine.sleep_nxt <= '1'; -- good night |
when others => -- undefined |
NULL; |
end case; |
1079,7 → 1083,7
csr.we_nxt <= '0'; |
end case; |
-- register file write back -- |
ctrl_nxt(ctrl_rf_in_mux_msb_c downto ctrl_rf_in_mux_lsb_c) <= "11"; -- RF input = CSR output |
ctrl_nxt(ctrl_rf_in_mux_msb_c downto ctrl_rf_in_mux_lsb_c) <= "11"; -- RF input <= CSR output |
ctrl_nxt(ctrl_rf_wb_en_c) <= '1'; -- valid RF write-back |
execute_engine.state_nxt <= DISPATCH; |
|
1109,7 → 1113,7
ctrl_nxt(ctrl_rf_in_mux_msb_c) <= '0'; -- RF input = ALU result |
ctrl_nxt(ctrl_rf_wb_en_c) <= execute_engine.i_reg(instr_opcode_lsb_c+2); -- valid RF write-back? (is jump-and-link?) |
-- destination address -- |
execute_engine.pc_mux_sel <= "01"; -- alu.add = branch/jump destination |
execute_engine.pc_mux_sel <= "00"; -- alu.add = branch/jump destination |
if (execute_engine.i_reg(instr_opcode_lsb_c+2) = '1') or (execute_engine.branch_taken = '1') then -- JAL/JALR or taken branch |
execute_engine.pc_we <= '1'; -- update PC |
fetch_engine.reset <= '1'; -- trigger new instruction fetch from modified PC |
1123,9 → 1127,9
when FENCE_OP => -- fence operations - execution |
-- ------------------------------------------------------------ |
execute_engine.state_nxt <= SYS_WAIT; |
execute_engine.pc_mux_sel <= "00"; -- linear next PC = "refetch" next instruction (only relevant for fence.i) |
execute_engine.pc_mux_sel <= "01"; -- linear next PC = "refetch" next instruction (only relevant for fence.i) |
-- FENCE.I -- |
if (execute_engine.i_reg(instr_funct3_lsb_c) = funct3_fencei_c(0)) and (CPU_EXTENSION_RISCV_Zifencei = true) then |
if (CPU_EXTENSION_RISCV_Zifencei = true) and (execute_engine.i_reg(instr_funct3_lsb_c) = funct3_fencei_c(0)) then |
execute_engine.pc_we <= '1'; |
execute_engine.if_rst_nxt <= '1'; -- this is a non-linear PC modification |
fetch_engine.reset <= '1'; |
2076,7 → 2080,7
csr.minstreth <= std_ulogic_vector(unsigned(csr.minstreth) + 1); |
end if; |
|
-- [machine] high performance counters -- |
-- [machine] hardware performance monitors (counters) -- |
for i in 0 to HPM_NUM_CNTS-1 loop |
-- [m]hpmcounter* -- |
if (csr.we = '1') and (csr.addr = std_ulogic_vector(unsigned(csr_mhpmcounter3_c) + i)) then -- write access |
2137,6 → 2141,7
cnt_event_nxt(hpmcnt_event_cir_c) <= '1' when (execute_engine.state = EXECUTE) and (execute_engine.is_ci = '1') else '0'; -- retired compressed instruction |
cnt_event_nxt(hpmcnt_event_wait_if_c) <= '1' when (fetch_engine.state = IFETCH_ISSUE) and (fetch_engine.state_prev = IFETCH_ISSUE) else '0'; -- instruction fetch memory wait cycle |
cnt_event_nxt(hpmcnt_event_wait_ii_c) <= '1' when (execute_engine.state = DISPATCH) and (execute_engine.state_prev = DISPATCH) else '0'; -- instruction issue wait cycle |
cnt_event_nxt(hpmcnt_event_wait_mc_c) <= '1' when (execute_engine.state = ALU_WAIT) and (execute_engine.state_prev = ALU_WAIT) else '0'; -- multi-cycle alu-operation wait cycle |
|
cnt_event_nxt(hpmcnt_event_load_c) <= '1' when (execute_engine.state = LOADSTORE_1) and (ctrl(ctrl_bus_rd_c) = '1') else '0'; -- load operation |
cnt_event_nxt(hpmcnt_event_store_c) <= '1' when (execute_engine.state = LOADSTORE_1) and (ctrl(ctrl_bus_wr_c) = '1') else '0'; -- store operation |
/rtl/core/neorv32_cpu_cp_bitmanip.vhd
1,8 → 1,9
-- ################################################################################################# |
-- # << NEORV32 - CPU Co-Processor: Bit manipulation unit (RISC-V "B" Extension) >> # |
-- # ********************************************************************************************* # |
-- # The bit manipulation unit is implemted as co-processor that has a processing latency of at # |
-- # least 3 cycles. Only the "base" bit manipulation subset ('Zbb') is supported yet. # |
-- # The bit manipulation unit is implemted as co-processor that has a processing latency of 1 # |
-- # cycle for logic/arithmetic operations and 3+shamt (=shift amount) cycles for shift(-related) # |
-- # operations. # |
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
-- # # |
81,12 → 82,15
constant op_width_c : natural := 15; |
|
-- controller -- |
type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT, S_BUSY_LOGIC); |
type ctrl_state_t is (S_IDLE, S_START_SHIFT, S_BUSY_SHIFT); |
signal ctrl_state : ctrl_state_t; |
signal cmd, cmd_buf : std_ulogic_vector(op_width_c-1 downto 0); |
signal valid : std_ulogic; |
|
-- operand buffers -- |
signal rs1_reg, rs2_reg : std_ulogic_vector(data_width_c-1 downto 0); |
signal rs1_reg : std_ulogic_vector(data_width_c-1 downto 0); |
signal rs2_reg : std_ulogic_vector(data_width_c-1 downto 0); |
signal less_ff : std_ulogic; |
|
-- shift amount (immediate or register) -- |
signal shamt : std_ulogic_vector(index_size_f(data_width_c)-1 downto 0); |
146,12 → 150,13
cmd_buf <= (others => '0'); |
rs1_reg <= (others => '0'); |
rs2_reg <= (others => '0'); |
less_ff <= '0'; |
shifter.start <= '0'; |
valid_o <= '0'; |
valid <= '0'; |
elsif rising_edge(clk_i) then |
-- defaults -- |
shifter.start <= '0'; |
valid_o <= '0'; |
valid <= '0'; |
|
-- fsm -- |
case ctrl_state is |
159,6 → 164,7
when S_IDLE => -- wait for operation trigger |
-- ------------------------------------------------------------ |
if (start_i = '1') then |
less_ff <= cmp_i(alu_cmp_less_c); |
cmd_buf <= cmd; |
rs1_reg <= rs1_i; |
rs2_reg <= rs2_i; |
166,7 → 172,8
shifter.start <= '1'; |
ctrl_state <= S_START_SHIFT; |
else |
ctrl_state <= S_BUSY_LOGIC; |
valid <= '1'; |
ctrl_state <= S_IDLE; |
end if; |
end if; |
|
177,14 → 184,10
when S_BUSY_SHIFT => -- wait for multi-cycle shift operation to finish |
-- ------------------------------------------------------------ |
if (shifter.run = '0') then |
ctrl_state <= S_BUSY_LOGIC; |
valid <= '1'; |
ctrl_state <= S_IDLE; |
end if; |
|
when S_BUSY_LOGIC => -- single-cycle logic operation (and output) |
-- ------------------------------------------------------------ |
valid_o <= '1'; |
ctrl_state <= S_IDLE; |
|
when others => -- undefined |
-- ------------------------------------------------------------ |
ctrl_state <= S_IDLE; |
262,8 → 265,8
res_int(op_cpop_c)(shifter.bcnt'left downto 0) <= shifter.bcnt; |
|
-- min/max select -- |
res_int(op_min_c) <= rs1_reg when (cmp_i(alu_cmp_less_c) = '1') else rs2_reg; |
res_int(op_max_c) <= rs2_reg when (cmp_i(alu_cmp_less_c) = '1') else rs1_reg; |
res_int(op_min_c) <= rs1_reg when (less_ff = '1') else rs2_reg; |
res_int(op_max_c) <= rs2_reg when (less_ff = '1') else rs1_reg; |
|
-- sign-extension -- |
res_int(op_sextb_c)(data_width_c-1 downto 8) <= (others => rs1_reg(7)); |
314,22 → 317,24
|
-- Output Gate ---------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
output_gate: process(clk_i) |
output_gate: process(valid, res_out) |
begin |
if rising_edge(clk_i) then |
if (valid = '1') then |
res_o <= res_out(op_clz_c) or res_out(op_ctz_c) or res_out(op_cpop_c) or |
res_out(op_min_c) or res_out(op_max_c) or |
res_out(op_sextb_c) or res_out(op_sexth_c) or |
res_out(op_andn_c) or res_out(op_orn_c) or res_out(op_xnor_c) or |
res_out(op_pack_c) or |
res_out(op_ror_c) or res_out(op_rol_c) or |
res_out(op_rev8_c) or |
res_out(op_orcb_c); |
else |
res_o <= (others => '0'); |
if (ctrl_state = S_BUSY_LOGIC) then |
res_o <= res_out(op_clz_c) or res_out(op_ctz_c) or res_out(op_cpop_c) or |
res_out(op_min_c) or res_out(op_max_c) or |
res_out(op_sextb_c) or res_out(op_sexth_c) or |
res_out(op_andn_c) or res_out(op_orn_c) or res_out(op_xnor_c) or |
res_out(op_pack_c) or |
res_out(op_ror_c) or res_out(op_rol_c) or |
res_out(op_rev8_c) or |
res_out(op_orcb_c); |
end if; |
end if; |
end process output_gate; |
|
-- valid output -- |
valid_o <= valid; |
|
|
end neorv32_cpu_cp_bitmanip_rtl; |
/rtl/core/neorv32_cpu_cp_muldiv.vhd
1,5 → 1,5
-- ################################################################################################# |
-- # << NEORV32 - CPU Co-Processor: MULDIV unit (RISC-V "M" Extension)>> # |
-- # << NEORV32 - CPU Co-Processor: Integer Multiplier/Divider Unit (RISC-V "M" Extension)>> # |
-- # ********************************************************************************************* # |
-- # Multiplier and Divider unit. Implements the RISC-V RV32-M CPU extension. # |
-- # Multiplier core (signed/unsigned) uses serial algorithm. -> 32+4 cycles latency # |
86,7 → 86,7
signal cp_op_ff : std_ulogic_vector(2 downto 0); -- operation that was executed |
signal start : std_ulogic; |
signal operation : std_ulogic; |
signal opx, opy : std_ulogic_vector(data_width_c-1 downto 0); -- input operands |
signal rs1, opx, opy : std_ulogic_vector(data_width_c-1 downto 0); -- input operands |
signal opx_is_signed : std_ulogic; |
signal opy_is_signed : std_ulogic; |
signal opy_is_zero : std_ulogic; |
121,6 → 121,7
state <= IDLE; |
opx <= (others => '0'); |
opy <= (others => '0'); |
rs1 <= (others => '0'); |
cnt <= (others => '0'); |
start <= '0'; |
valid <= '0'; |
136,9 → 137,10
-- FSM -- |
case state is |
when IDLE => |
opx <= rs1_i; |
opy <= rs2_i; |
if (start_i = '1') then |
opx <= rs1_i; |
rs1 <= rs1_i; |
opy <= rs2_i; |
state <= DECODE; |
end if; |
|
300,7 → 302,7
|
-- Data Output ---------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
operation_result: process(valid, cp_op_ff, mul_product, div_res, quotient, opy_is_zero, rs1_i, remainder) |
operation_result: process(valid, cp_op_ff, mul_product, div_res, quotient, opy_is_zero, rs1, remainder) |
begin |
if (valid = '1') then |
case cp_op_ff is |
316,7 → 318,7
if (opy_is_zero = '0') then |
res_o <= div_res; |
else |
res_o <= rs1_i; |
res_o <= rs1; |
end if; |
when others => -- cp_op_remu_c |
res_o <= remainder; |
/rtl/core/neorv32_cpu_regfile.vhd
1,14 → 1,19
-- ################################################################################################# |
-- # << NEORV32 - CPU Data Register File >> # |
-- # << NEORV32 - CPU General Purpose Data Register File >> # |
-- # ********************************************************************************************* # |
-- # General purpose data register file. 32 entries for normal mode (I), 16 entries for embedded # |
-- # mode (E) when RISC-V "E" extension is enabled. Register zero (r0) is a "normal" physical reg # |
-- # that has to be initialized to zero by the CPU control system. For normal operations r0 cannot # |
-- # be written. The register file uses synchronous reads so it can be mapped to FPGA block RAM. # |
-- # General purpose data register file. 32 entries (= 1024 bit) for normal mode (RV32I), # |
-- # 16 entries (= 512 bit) for embedded mode (RV32E) when RISC-V "E" extension is enabled. # |
-- # # |
-- # Register zero (r0/x0) is a "normal" physical reg that has to be initialized to zero by the # |
-- # CPU control system. For normal operations register zero cannot be written. # |
-- # # |
-- # The register file uses synchronous read accesses and a *single* (multiplexed) address port # |
-- # for writing and reading rs1 and a single read-only port for rs2. Therefore, the whole # |
-- # register file can be mapped to a single true dual-port block RAM. # |
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
-- # # |
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. # |
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved. # |
-- # # |
-- # Redistribution and use in source and binary forms, with or without modification, are # |
-- # permitted provided that the following conditions are met: # |
74,46 → 79,51
signal rd_is_r0 : std_ulogic; -- writing to r0? |
signal rf_we : std_ulogic; |
signal dst_addr : std_ulogic_vector(4 downto 0); -- destination address |
signal opa_addr : std_ulogic_vector(4 downto 0); -- rs1/dst address |
signal opb_addr : std_ulogic_vector(4 downto 0); -- rs2 address |
|
begin |
|
-- Register file read/write access -------------------------------------------------------- |
-- Data Input Mux ------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
rf_mux_data <= mem_i when (ctrl_i(ctrl_rf_in_mux_lsb_c) = '0') else csr_i; |
rf_write_data <= alu_i when (ctrl_i(ctrl_rf_in_mux_msb_c) = '0') else rf_mux_data; |
|
|
-- Register File Access ------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
rf_access: process(clk_i) |
begin |
if rising_edge(clk_i) then -- sync read and write |
if (CPU_EXTENSION_RISCV_E = false) then -- normal register file with 32 entries |
if (rf_we = '1') then |
reg_file(to_integer(unsigned(dst_addr(4 downto 0)))) <= rf_write_data; |
else |
rs1_o <= reg_file(to_integer(unsigned(ctrl_i(ctrl_rf_rs1_adr4_c downto ctrl_rf_rs1_adr0_c)))); |
rs2_o <= reg_file(to_integer(unsigned(ctrl_i(ctrl_rf_rs2_adr4_c downto ctrl_rf_rs2_adr0_c)))); |
reg_file(to_integer(unsigned(opa_addr(4 downto 0)))) <= rf_write_data; |
end if; |
rs1_o <= reg_file(to_integer(unsigned(opa_addr(4 downto 0)))); |
rs2_o <= reg_file(to_integer(unsigned(opb_addr(4 downto 0)))); |
else -- embedded register file with 16 entries |
if (rf_we = '1') then |
reg_file_emb(to_integer(unsigned(dst_addr(3 downto 0)))) <= rf_write_data; |
else |
rs1_o <= reg_file_emb(to_integer(unsigned(ctrl_i(ctrl_rf_rs1_adr3_c downto ctrl_rf_rs1_adr0_c)))); |
rs2_o <= reg_file_emb(to_integer(unsigned(ctrl_i(ctrl_rf_rs2_adr3_c downto ctrl_rf_rs2_adr0_c)))); |
reg_file_emb(to_integer(unsigned(opa_addr(3 downto 0)))) <= rf_write_data; |
end if; |
rs1_o <= reg_file_emb(to_integer(unsigned(opa_addr(3 downto 0)))); |
rs2_o <= reg_file_emb(to_integer(unsigned(opb_addr(3 downto 0)))); |
end if; |
end if; |
end process rf_access; |
|
-- data input mux -- |
rf_write_data <= alu_i when (ctrl_i(ctrl_rf_in_mux_msb_c) = '0') else rf_mux_data; |
rf_mux_data <= mem_i when (ctrl_i(ctrl_rf_in_mux_lsb_c) = '0') else csr_i; |
|
-- check if we are writing to x0 -- |
rd_is_r0 <= not or_all_f(ctrl_i(ctrl_rf_rd_adr4_c downto ctrl_rf_rd_adr0_c)) when (CPU_EXTENSION_RISCV_E = false) else |
not or_all_f(ctrl_i(ctrl_rf_rd_adr3_c downto ctrl_rf_rd_adr0_c)); |
|
-- valid RF write access -- |
-- valid RF write access? -- |
rf_we <= (ctrl_i(ctrl_rf_wb_en_c) and (not rd_is_r0)) or ctrl_i(ctrl_rf_r0_we_c); |
|
-- destination address -- |
dst_addr <= ctrl_i(ctrl_rf_rd_adr4_c downto ctrl_rf_rd_adr0_c) when (ctrl_i(ctrl_rf_r0_we_c) = '0') else (others => '0'); -- force dst=r0? |
|
-- access addresses -- |
opa_addr <= dst_addr when (rf_we = '1') else ctrl_i(ctrl_rf_rs1_adr4_c downto ctrl_rf_rs1_adr0_c); -- rd/rs1 |
opb_addr <= ctrl_i(ctrl_rf_rs2_adr4_c downto ctrl_rf_rs2_adr0_c); -- rs2 |
|
|
end neorv32_cpu_regfile_rtl; |
/rtl/core/neorv32_icache.vhd
0,0 → 1,614
-- ################################################################################################# |
-- # << NEORV32 - Processor-Internal Instruction Cache >> # |
-- # ********************************************************************************************* # |
-- # Direct mapped (CACHE_NUM_SETS = 1) or 2-way set-associative (CACHE_NUM_SETS = 2). # |
-- # Least recently used replacement policy (if CACHE_NUM_SETS > 1). # |
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
-- # # |
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. # |
-- # # |
-- # Redistribution and use in source and binary forms, with or without modification, are # |
-- # permitted provided that the following conditions are met: # |
-- # # |
-- # 1. Redistributions of source code must retain the above copyright notice, this list of # |
-- # conditions and the following disclaimer. # |
-- # # |
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of # |
-- # conditions and the following disclaimer in the documentation and/or other materials # |
-- # provided with the distribution. # |
-- # # |
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to # |
-- # endorse or promote products derived from this software without specific prior written # |
-- # permission. # |
-- # # |
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS # |
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF # |
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # |
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # |
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE # |
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED # |
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # |
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED # |
-- # OF THE POSSIBILITY OF SUCH DAMAGE. # |
-- # ********************************************************************************************* # |
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting # |
-- ################################################################################################# |
|
library ieee; |
use ieee.std_logic_1164.all; |
use ieee.numeric_std.all; |
|
library neorv32; |
use neorv32.neorv32_package.all; |
|
entity neorv32_icache is |
generic ( |
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS : natural := 1 -- associativity / number of sets (1=direct_mapped), has to be a power of 2 |
); |
port ( |
-- global control -- |
clk_i : in std_ulogic; -- global clock, rising edge |
rstn_i : in std_ulogic; -- global reset, low-active, async |
clear_i : in std_ulogic; -- cache clear |
-- host controller interface -- |
host_addr_i : in std_ulogic_vector(data_width_c-1 downto 0); -- bus access address |
host_rdata_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus read data |
host_wdata_i : in std_ulogic_vector(data_width_c-1 downto 0); -- bus write data |
host_ben_i : in std_ulogic_vector(03 downto 0); -- byte enable |
host_we_i : in std_ulogic; -- write enable |
host_re_i : in std_ulogic; -- read enable |
host_cancel_i : in std_ulogic; -- cancel current bus transaction |
host_lock_i : in std_ulogic; -- locked/exclusive access |
host_ack_o : out std_ulogic; -- bus transfer acknowledge |
host_err_o : out std_ulogic; -- bus transfer error |
-- peripheral bus interface -- |
bus_addr_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus access address |
bus_rdata_i : in std_ulogic_vector(data_width_c-1 downto 0); -- bus read data |
bus_wdata_o : out std_ulogic_vector(data_width_c-1 downto 0); -- bus write data |
bus_ben_o : out std_ulogic_vector(03 downto 0); -- byte enable |
bus_we_o : out std_ulogic; -- write enable |
bus_re_o : out std_ulogic; -- read enable |
bus_cancel_o : out std_ulogic; -- cancel current bus transaction |
bus_lock_o : out std_ulogic; -- locked/exclusive access |
bus_ack_i : in std_ulogic; -- bus transfer acknowledge |
bus_err_i : in std_ulogic -- bus transfer error |
); |
end neorv32_icache; |
|
architecture neorv32_icache_rtl of neorv32_icache is |
|
-- cache layout -- |
constant cache_offset_size_c : natural := index_size_f(CACHE_BLOCK_SIZE/4); -- offset addresses full 32-bit words |
constant cache_index_size_c : natural := index_size_f(CACHE_NUM_BLOCKS); |
constant cache_tag_size_c : natural := 32 - (cache_offset_size_c + cache_index_size_c + 2); -- 2 additonal bits for byte offset |
|
-- cache memory -- |
component neorv32_icache_memory |
generic ( |
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS : natural := 1 -- associativity; 0=direct-mapped, 1=2-way set-associative |
); |
port ( |
-- global control -- |
clk_i : in std_ulogic; -- global clock, rising edge |
invalidate_i : in std_ulogic; -- invalidate whole cache |
-- host cache access (read-only) -- |
host_addr_i : in std_ulogic_vector(31 downto 0); -- access address |
host_re_i : in std_ulogic; -- read enable |
host_rdata_o : out std_ulogic_vector(31 downto 0); -- read data |
-- access status (1 cycle delay to access) -- |
hit_o : out std_ulogic; -- hit access |
-- ctrl cache access (write-only) -- |
ctrl_en_i : in std_ulogic; -- control interface enable |
ctrl_addr_i : in std_ulogic_vector(31 downto 0); -- access address |
ctrl_we_i : in std_ulogic; -- write enable (full-word) |
ctrl_wdata_i : in std_ulogic_vector(31 downto 0); -- write data |
ctrl_tag_we_i : in std_ulogic; -- write tag to selected block |
ctrl_valid_i : in std_ulogic; -- make selected block valid |
ctrl_invalid_i : in std_ulogic -- make selected block invalid |
); |
end component; |
|
-- cache interface -- |
type cache_if_t is record |
clear : std_ulogic; -- cache clear |
-- |
host_addr : std_ulogic_vector(31 downto 0); -- cpu access address |
host_rdata : std_ulogic_vector(31 downto 0); -- cpu read data |
-- |
hit : std_ulogic; -- hit access |
-- |
ctrl_en : std_ulogic; -- control access enable |
ctrl_addr : std_ulogic_vector(31 downto 0); -- control access address |
ctrl_we : std_ulogic; -- control write enable |
ctrl_wdata : std_ulogic_vector(31 downto 0); -- control write data |
ctrl_tag_we : std_ulogic; -- control tag write enabled |
ctrl_valid_we : std_ulogic; -- control valid flag set |
ctrl_invalid_we : std_ulogic; -- control valid flag clear |
end record; |
signal cache : cache_if_t; |
|
-- control engine -- |
type ctrl_engine_state_t is (S_IDLE, S_CACHE_CLEAR, S_CACHE_CHECK, S_CACHE_MISS, S_BUS_DOWNLOAD_REQ, S_BUS_DOWNLOAD_GET, |
S_CACHE_RESYNC_0, S_CACHE_RESYNC_1, S_BUS_ERROR, S_ERROR, S_HOST_CANCEL); |
type ctrl_t is record |
state : ctrl_engine_state_t; -- current state |
state_nxt : ctrl_engine_state_t; -- next state |
addr_reg : std_ulogic_vector(31 downto 0); -- address register for block download |
addr_reg_nxt : std_ulogic_vector(31 downto 0); |
-- |
re_buf : std_ulogic; -- read request buffer |
re_buf_nxt : std_ulogic; |
cancel_buf : std_ulogic; -- cancel request buffer |
cancel_buf_nxt : std_ulogic; |
end record; |
signal ctrl : ctrl_t; |
|
begin |
|
-- Sanity Checks -------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
-- configuration -- |
assert not (is_power_of_two_f(CACHE_NUM_BLOCKS) = false) report "NEORV32 PROCESSOR CONFIG ERROR! Cache number of blocks <NUM_BLOCKS> has to be a power of 2." severity error; |
assert not (is_power_of_two_f(CACHE_BLOCK_SIZE) = false) report "NEORV32 PROCESSOR CONFIG ERROR! Cache block size <BLOCK_SIZE> has to be a power of 2." severity error; |
assert not ((is_power_of_two_f(CACHE_NUM_SETS) = false)) report "NEORV32 PROCESSOR CONFIG ERROR! Cache associativity <CACHE_NUM_SETS> has to be a power of 2." severity error; |
assert not (CACHE_NUM_BLOCKS < 1) report "NEORV32 PROCESSOR CONFIG ERROR! Cache number of blocks <NUM_BLOCKS> has to be >= 1." severity error; |
assert not (CACHE_BLOCK_SIZE < 4) report "NEORV32 PROCESSOR CONFIG ERROR! Cache block size <BLOCK_SIZE> has to be >= 4." severity error; |
assert not ((CACHE_NUM_SETS = 0) or (CACHE_NUM_SETS > 2)) report "NEORV32 PROCESSOR CONFIG ERROR! Cache associativity <CACHE_NUM_SETS> has to be 1 (direct-mapped) or 2 (2-way set-associative)." severity error; |
|
|
-- Control Engine FSM Sync ---------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
-- registers that REQUIRE a specific reset state -- |
ctrl_engine_fsm_sync_rst: process(rstn_i, clk_i) |
begin |
if (rstn_i = '0') then |
ctrl.state <= S_CACHE_CLEAR; |
ctrl.re_buf <= '0'; |
ctrl.cancel_buf <= '0'; |
elsif rising_edge(clk_i) then |
ctrl.state <= ctrl.state_nxt; |
ctrl.re_buf <= ctrl.re_buf_nxt; |
ctrl.cancel_buf <= ctrl.cancel_buf_nxt; |
end if; |
end process ctrl_engine_fsm_sync_rst; |
|
-- registers that do not require a specific reset state -- |
ctrl_engine_fsm_sync: process(clk_i) |
begin |
if rising_edge(clk_i) then |
ctrl.addr_reg <= ctrl.addr_reg_nxt; |
end if; |
end process ctrl_engine_fsm_sync; |
|
|
-- Control Engine FSM Comb ---------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
ctrl_engine_fsm_comb: process(ctrl, cache, clear_i, host_addr_i, host_lock_i, host_re_i, host_cancel_i, bus_rdata_i, bus_ack_i, bus_err_i) |
begin |
-- control defaults -- |
ctrl.state_nxt <= ctrl.state; |
ctrl.addr_reg_nxt <= ctrl.addr_reg; |
ctrl.re_buf_nxt <= (ctrl.re_buf or host_re_i) and (not host_cancel_i); |
ctrl.cancel_buf_nxt <= ctrl.cancel_buf or host_cancel_i; |
|
-- cache defaults -- |
cache.clear <= '0'; |
cache.host_addr <= host_addr_i; |
cache.ctrl_en <= '0'; |
cache.ctrl_addr <= ctrl.addr_reg; |
cache.ctrl_we <= '0'; |
cache.ctrl_wdata <= bus_rdata_i; |
cache.ctrl_tag_we <= '0'; |
cache.ctrl_valid_we <= '0'; |
cache.ctrl_invalid_we <= '0'; |
|
-- host interface defaults -- |
host_ack_o <= '0'; |
host_err_o <= '0'; |
host_rdata_o <= cache.host_rdata; |
|
-- peripheral bus interface defaults -- |
bus_addr_o <= ctrl.addr_reg; |
bus_wdata_o <= (others => '0'); -- cache is read-only |
bus_ben_o <= (others => '0'); -- cache is read-only |
bus_we_o <= '0'; -- cache is read-only |
bus_re_o <= '0'; |
bus_cancel_o <= '0'; |
bus_lock_o <= host_lock_i; |
|
-- fsm -- |
case ctrl.state is |
|
when S_IDLE => -- wait for host access request or cache control operation |
-- ------------------------------------------------------------ |
if (clear_i = '1') then -- cache control operation? |
ctrl.state_nxt <= S_CACHE_CLEAR; |
elsif (host_re_i = '1') or (ctrl.re_buf = '1') then -- cache access |
ctrl.re_buf_nxt <= '0'; |
ctrl.cancel_buf_nxt <= '0'; |
ctrl.state_nxt <= S_CACHE_CHECK; |
end if; |
|
when S_CACHE_CLEAR => -- invalidate all cache entries |
-- ------------------------------------------------------------ |
cache.clear <= '1'; |
ctrl.state_nxt <= S_IDLE; |
|
when S_CACHE_CHECK => -- finalize host access if cache hit |
-- ------------------------------------------------------------ |
if (cache.hit = '1') then -- cache HIT |
host_ack_o <= not ctrl.cancel_buf; -- ACK if request has not been canceled |
ctrl.state_nxt <= S_IDLE; |
else -- cache MISS |
ctrl.state_nxt <= S_CACHE_MISS; |
end if; |
|
when S_CACHE_MISS => -- |
-- ------------------------------------------------------------ |
-- compute block base address -- |
ctrl.addr_reg_nxt <= host_addr_i; |
ctrl.addr_reg_nxt((2+cache_offset_size_c)-1 downto 2) <= (others => '0'); -- block-aligned |
ctrl.addr_reg_nxt(1 downto 0) <= "00"; -- word-aligned |
-- |
if (host_cancel_i = '1') or (ctrl.cancel_buf = '1') then -- 'early' CPU cancel (abort before bus transaction has even started) |
ctrl.state_nxt <= S_IDLE; |
else |
ctrl.state_nxt <= S_BUS_DOWNLOAD_REQ; |
end if; |
|
when S_BUS_DOWNLOAD_REQ => -- download new cache block: request new word |
-- ------------------------------------------------------------ |
bus_re_o <= '1'; -- request new read transfer |
ctrl.state_nxt <= S_BUS_DOWNLOAD_GET; |
|
when S_BUS_DOWNLOAD_GET => -- download new cache block: wait for bus response |
-- ------------------------------------------------------------ |
cache.ctrl_en <= '1'; -- we are in cache control mode |
-- |
if (bus_err_i = '1') then -- bus error |
ctrl.state_nxt <= S_BUS_ERROR; |
elsif (ctrl.cancel_buf = '1') then -- 'late' CPU cancel (timeout?) |
ctrl.state_nxt <= S_HOST_CANCEL; |
elsif (bus_ack_i = '1') then -- ACK = write to cache and get next word |
cache.ctrl_we <= '1'; -- write to cache |
if (and_all_f(ctrl.addr_reg((2+cache_offset_size_c)-1 downto 2)) = '1') then -- block complete? |
cache.ctrl_tag_we <= '1'; -- current block is valid now |
cache.ctrl_valid_we <= '1'; -- write tag of current address |
ctrl.state_nxt <= S_CACHE_RESYNC_0; |
else -- get next word |
ctrl.addr_reg_nxt <= std_ulogic_vector(unsigned(ctrl.addr_reg) + 4); |
ctrl.state_nxt <= S_BUS_DOWNLOAD_REQ; |
end if; |
end if; |
|
when S_CACHE_RESYNC_0 => -- re-sync host/cache access: cache read-latency |
-- ------------------------------------------------------------ |
ctrl.state_nxt <= S_CACHE_RESYNC_1; |
|
when S_CACHE_RESYNC_1 => -- re-sync host/cache access: finalize CPU request |
-- ------------------------------------------------------------ |
host_ack_o <= not ctrl.cancel_buf; -- ACK if request has not been canceled |
ctrl.state_nxt <= S_IDLE; |
|
when S_BUS_ERROR => -- bus error during download |
-- ------------------------------------------------------------ |
host_err_o <= '1'; |
ctrl.state_nxt <= S_ERROR; |
|
when S_ERROR => -- wait for CPU to cancel faulting transfer |
-- ------------------------------------------------------------ |
if (host_cancel_i = '1') then |
bus_cancel_o <= '1'; |
ctrl.state_nxt <= S_IDLE; |
end if; |
|
when S_HOST_CANCEL => -- host cancels transfer |
-- ------------------------------------------------------------ |
cache.ctrl_en <= '1'; -- we are in cache control mode |
cache.ctrl_invalid_we <= '1'; -- invalidate current cache block |
bus_cancel_o <= '1'; |
ctrl.state_nxt <= S_IDLE; |
|
when others => -- undefined |
-- ------------------------------------------------------------ |
ctrl.state_nxt <= S_IDLE; |
|
end case; |
end process ctrl_engine_fsm_comb; |
|
|
-- Cache Memory --------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
neorv32_icache_memory_inst: neorv32_icache_memory |
generic map ( |
CACHE_NUM_BLOCKS => CACHE_NUM_BLOCKS, -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE => CACHE_BLOCK_SIZE, -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS => CACHE_NUM_SETS -- associativity; 0=direct-mapped, 1=2-way set-associative |
) |
port map ( |
-- global control -- |
clk_i => clk_i, -- global clock, rising edge |
invalidate_i => cache.clear, -- invalidate whole cache |
-- host cache access (read-only) -- |
host_addr_i => cache.host_addr, -- access address |
host_re_i => host_re_i, -- read enable |
host_rdata_o => cache.host_rdata, -- read data |
-- access status (1 cycle delay to access) -- |
hit_o => cache.hit, -- hit access |
-- ctrl cache access (write-only) -- |
ctrl_en_i => cache.ctrl_en, -- control interface enable |
ctrl_addr_i => cache.ctrl_addr, -- access address |
ctrl_we_i => cache.ctrl_we, -- write enable (full-word) |
ctrl_wdata_i => cache.ctrl_wdata, -- write data |
ctrl_tag_we_i => cache.ctrl_tag_we, -- write tag to selected block |
ctrl_valid_i => cache.ctrl_valid_we, -- make selected block valid |
ctrl_invalid_i => cache.ctrl_invalid_we -- make selected block invalid |
); |
|
end neorv32_icache_rtl; |
|
|
-- ########################################################################################################################################### |
-- ########################################################################################################################################### |
|
|
-- ################################################################################################# |
-- # << NEORV32 - Cache Memory >> # |
-- # ********************************************************************************************* # |
-- # Direct mapped (CACHE_NUM_SETS = 1) or 2-way set-associative (CACHE_NUM_SETS = 2). # |
-- # Least recently used replacement policy (if CACHE_NUM_SETS > 1). # |
-- # Read-only for host, write-only for control. All output signals have one cycle latency. # |
-- # # |
-- # Cache sets are mapped to individual memory components - no multi-dimensional memory arrays # |
-- # are used as some synthesis tools have problems to map these to actual BRAM primitives. # |
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
-- # # |
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. # |
-- # # |
-- # Redistribution and use in source and binary forms, with or without modification, are # |
-- # permitted provided that the following conditions are met: # |
-- # # |
-- # 1. Redistributions of source code must retain the above copyright notice, this list of # |
-- # conditions and the following disclaimer. # |
-- # # |
-- # 2. Redistributions in binary form must reproduce the above copyright notice, this list of # |
-- # conditions and the following disclaimer in the documentation and/or other materials # |
-- # provided with the distribution. # |
-- # # |
-- # 3. Neither the name of the copyright holder nor the names of its contributors may be used to # |
-- # endorse or promote products derived from this software without specific prior written # |
-- # permission. # |
-- # # |
-- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS # |
-- # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF # |
-- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # |
-- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # |
-- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE # |
-- # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED # |
-- # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # |
-- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED # |
-- # OF THE POSSIBILITY OF SUCH DAMAGE. # |
-- # ********************************************************************************************* # |
-- # The NEORV32 Processor - https://github.com/stnolting/neorv32 (c) Stephan Nolting # |
-- ################################################################################################# |
|
library ieee; |
use ieee.std_logic_1164.all; |
use ieee.numeric_std.all; |
|
library neorv32; |
use neorv32.neorv32_package.all; |
|
entity neorv32_icache_memory is |
generic ( |
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS : natural := 1 -- associativity; 1=direct-mapped, 2=2-way set-associative |
); |
port ( |
-- global control -- |
clk_i : in std_ulogic; -- global clock, rising edge |
invalidate_i : in std_ulogic; -- invalidate whole cache |
-- host cache access (read-only) -- |
host_addr_i : in std_ulogic_vector(31 downto 0); -- access address |
host_re_i : in std_ulogic; -- read enable |
host_rdata_o : out std_ulogic_vector(31 downto 0); -- read data |
-- access status (1 cycle delay to access) -- |
hit_o : out std_ulogic; -- hit access |
-- ctrl cache access (write-only) -- |
ctrl_en_i : in std_ulogic; -- control interface enable |
ctrl_addr_i : in std_ulogic_vector(31 downto 0); -- access address |
ctrl_we_i : in std_ulogic; -- write enable (full-word) |
ctrl_wdata_i : in std_ulogic_vector(31 downto 0); -- write data |
ctrl_tag_we_i : in std_ulogic; -- write tag to selected block |
ctrl_valid_i : in std_ulogic; -- make selected block valid |
ctrl_invalid_i : in std_ulogic -- make selected block invalid |
); |
end neorv32_icache_memory; |
|
architecture neorv32_icache_memory_rtl of neorv32_icache_memory is |
|
-- cache layout -- |
constant cache_offset_size_c : natural := index_size_f(CACHE_BLOCK_SIZE/4); -- offset addresses full 32-bit words |
constant cache_index_size_c : natural := index_size_f(CACHE_NUM_BLOCKS); |
constant cache_tag_size_c : natural := 32 - (cache_offset_size_c + cache_index_size_c + 2); -- 2 additonal bits for byte offset |
constant cache_entries_c : natural := CACHE_NUM_BLOCKS * (CACHE_BLOCK_SIZE/4); -- number of 32-bit entries (per set) |
|
-- status flag memory -- |
signal valid_flag_s0 : std_ulogic_vector(CACHE_NUM_BLOCKS-1 downto 0); |
signal valid_flag_s1 : std_ulogic_vector(CACHE_NUM_BLOCKS-1 downto 0); |
signal valid : std_ulogic_vector(1 downto 0); -- valid flag read data |
|
-- tag memory -- |
type tag_mem_t is array (0 to CACHE_NUM_BLOCKS-1) of std_ulogic_vector(cache_tag_size_c-1 downto 0); |
signal tag_mem_s0 : tag_mem_t; |
signal tag_mem_s1 : tag_mem_t; |
type tag_rd_t is array (0 to 1) of std_ulogic_vector(cache_tag_size_c-1 downto 0); |
signal tag : tag_rd_t; -- tag read data |
|
-- access status -- |
signal hit : std_ulogic_vector(1 downto 0); |
|
-- access address decomposition -- |
type acc_addr_t is record |
tag : std_ulogic_vector(cache_tag_size_c-1 downto 0); |
index : std_ulogic_vector(cache_index_size_c-1 downto 0); |
offset : std_ulogic_vector(cache_offset_size_c-1 downto 0); |
end record; |
signal host_acc_addr, ctrl_acc_addr : acc_addr_t; |
|
-- cache data memory -- |
type cache_mem_t is array (0 to cache_entries_c-1) of std_ulogic_vector(31 downto 0); |
signal cache_data_memory_s0 : cache_mem_t; -- set 0 |
signal cache_data_memory_s1 : cache_mem_t; -- set 1 |
|
-- cache data memory access -- |
type cache_rdata_t is array (0 to 1) of std_ulogic_vector(31 downto 0); |
signal cache_rdata : cache_rdata_t; |
signal cache_index : std_ulogic_vector(cache_index_size_c-1 downto 0); |
signal cache_offset : std_ulogic_vector(cache_offset_size_c-1 downto 0); |
signal cache_addr : std_ulogic_vector((cache_index_size_c+cache_offset_size_c)-1 downto 0); -- index & offset |
signal cache_we : std_ulogic; -- write enable (full-word) |
signal set_select : std_ulogic; |
|
-- access history -- |
type history_t is record |
re_ff : std_ulogic; |
last_used_set : std_ulogic_vector(CACHE_NUM_BLOCKS-1 downto 0); |
to_be_replaced : std_ulogic; |
end record; |
signal history : history_t; |
|
begin |
|
-- Access Address Decomposition ----------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
host_acc_addr.tag <= host_addr_i(31 downto 31-(cache_tag_size_c-1)); |
host_acc_addr.index <= host_addr_i(31-cache_tag_size_c downto 2+cache_offset_size_c); |
host_acc_addr.offset <= host_addr_i(2+(cache_offset_size_c-1) downto 2); -- discard byte offset |
|
ctrl_acc_addr.tag <= ctrl_addr_i(31 downto 31-(cache_tag_size_c-1)); |
ctrl_acc_addr.index <= ctrl_addr_i(31-cache_tag_size_c downto 2+cache_offset_size_c); |
ctrl_acc_addr.offset <= ctrl_addr_i(2+(cache_offset_size_c-1) downto 2); -- discard byte offset |
|
|
-- Cache Access History ------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
access_history: process(clk_i) |
begin |
if rising_edge(clk_i) then |
history.re_ff <= host_re_i; |
if (invalidate_i = '1') then -- invalidate whole cache |
history.last_used_set <= (others => '1'); |
elsif (history.re_ff = '1') and (or_all_f(hit) = '1') then -- store last accessed set that caused a hit |
history.last_used_set(to_integer(unsigned(cache_index))) <= not hit(0); |
end if; |
history.to_be_replaced <= history.last_used_set(to_integer(unsigned(cache_index))); |
end if; |
end process access_history; |
|
-- which set is going to be replaced? -> opposite of last used set = least recently used set -- |
set_select <= '0' when (CACHE_NUM_SETS = 1) else (not history.to_be_replaced); |
|
|
-- Status flag memory --------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
status_memory: process(clk_i) |
begin |
if rising_edge(clk_i) then |
-- write access -- |
if (invalidate_i = '1') then -- invalidate whole cache |
valid_flag_s0 <= (others => '0'); |
valid_flag_s1 <= (others => '0'); |
elsif (ctrl_en_i = '1') then |
if (ctrl_invalid_i = '1') then -- make current block invalid |
if (set_select = '0') then |
valid_flag_s0(to_integer(unsigned(cache_index))) <= '0'; |
else |
valid_flag_s1(to_integer(unsigned(cache_index))) <= '0'; |
end if; |
elsif (ctrl_valid_i = '1') then -- make current block valid |
if (set_select = '0') then |
valid_flag_s0(to_integer(unsigned(cache_index))) <= '1'; |
else |
valid_flag_s1(to_integer(unsigned(cache_index))) <= '1'; |
end if; |
end if; |
end if; |
-- read access (sync) -- |
valid(0) <= valid_flag_s0(to_integer(unsigned(cache_index))); |
valid(1) <= valid_flag_s1(to_integer(unsigned(cache_index))); |
end if; |
end process status_memory; |
|
|
-- Tag memory ----------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
tag_memory: process(clk_i) |
begin |
if rising_edge(clk_i) then |
if (ctrl_en_i = '1') and (ctrl_tag_we_i = '1') then -- write access |
if (set_select = '0') then |
tag_mem_s0(to_integer(unsigned(cache_index))) <= ctrl_acc_addr.tag; |
else |
tag_mem_s1(to_integer(unsigned(cache_index))) <= ctrl_acc_addr.tag; |
end if; |
else -- read access |
tag(0) <= tag_mem_s0(to_integer(unsigned(cache_index))); |
tag(1) <= tag_mem_s1(to_integer(unsigned(cache_index))); |
end if; |
end if; |
end process tag_memory; |
|
-- comparator -- |
comparator: process(host_acc_addr, tag, valid) |
begin |
hit <= (others => '0'); |
for i in 0 to CACHE_NUM_SETS-1 loop |
if (host_acc_addr.tag = tag(i)) and (valid(i) = '1') then |
hit(i) <= '1'; |
end if; |
end loop; -- i |
end process comparator; |
|
-- global hit -- |
hit_o <= or_all_f(hit); |
|
|
-- Cache Data Memory ---------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
cache_mem_access: process(clk_i) |
begin |
if rising_edge(clk_i) then |
if (cache_we = '1') then -- write access from control (full-word) |
if (set_select = '0') then |
cache_data_memory_s0(to_integer(unsigned(cache_addr))) <= ctrl_wdata_i; |
else |
cache_data_memory_s1(to_integer(unsigned(cache_addr))) <= ctrl_wdata_i; |
end if; |
else -- read access from host (full-word) |
cache_rdata(0) <= cache_data_memory_s0(to_integer(unsigned(cache_addr))); |
cache_rdata(1) <= cache_data_memory_s1(to_integer(unsigned(cache_addr))); |
end if; |
end if; |
end process cache_mem_access; |
|
-- data output -- |
host_rdata_o <= cache_rdata(0) when (hit(0) = '1') or (CACHE_NUM_SETS = 1) else cache_rdata(1); |
|
-- cache block ram access address -- |
cache_addr <= cache_index & cache_offset; |
|
-- cache access select -- |
cache_index <= host_acc_addr.index when (ctrl_en_i = '0') else ctrl_acc_addr.index; |
cache_offset <= host_acc_addr.offset when (ctrl_en_i = '0') else ctrl_acc_addr.offset; |
cache_we <= '0' when (ctrl_en_i = '0') else ctrl_we_i; |
|
|
end neorv32_icache_memory_rtl; |
/rtl/core/neorv32_package.vhd
55,7 → 55,7
-- Architecture Constants (do not modify!)= ----------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
constant data_width_c : natural := 32; -- data width - do not change! |
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01050000"; -- no touchy! |
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01050003"; -- no touchy! |
constant pmp_max_r_c : natural := 8; -- max PMP regions - FIXED! |
constant archid_c : natural := 19; -- official NEORV32 architecture ID - hands off! |
constant rf_r0_is_reg_c : boolean := true; -- reg_file.r0 is a *physical register* that has to be initialized to zero by the CPU HW |
708,16 → 708,17
constant hpmcnt_event_cir_c : natural := 3; -- Retired compressed instruction |
constant hpmcnt_event_wait_if_c : natural := 4; -- Instruction fetch memory wait cycle |
constant hpmcnt_event_wait_ii_c : natural := 5; -- Instruction issue wait cycle |
constant hpmcnt_event_load_c : natural := 6; -- Load operation |
constant hpmcnt_event_store_c : natural := 7; -- Store operation |
constant hpmcnt_event_wait_ls_c : natural := 8; -- Load/store memory wait cycle |
constant hpmcnt_event_jump_c : natural := 9; -- Unconditional jump |
constant hpmcnt_event_branch_c : natural := 10; -- Conditional branch (taken or not taken) |
constant hpmcnt_event_tbranch_c : natural := 11; -- Conditional taken branch |
constant hpmcnt_event_trap_c : natural := 12; -- Entered trap |
constant hpmcnt_event_illegal_c : natural := 13; -- Illegal instruction exception |
constant hpmcnt_event_wait_mc_c : natural := 6; -- Multi-cycle ALU-operation wait cycle |
constant hpmcnt_event_load_c : natural := 7; -- Load operation |
constant hpmcnt_event_store_c : natural := 8; -- Store operation |
constant hpmcnt_event_wait_ls_c : natural := 9; -- Load/store memory wait cycle |
constant hpmcnt_event_jump_c : natural := 10; -- Unconditional jump |
constant hpmcnt_event_branch_c : natural := 11; -- Conditional branch (taken or not taken) |
constant hpmcnt_event_tbranch_c : natural := 12; -- Conditional taken branch |
constant hpmcnt_event_trap_c : natural := 13; -- Entered trap |
constant hpmcnt_event_illegal_c : natural := 14; -- Illegal instruction exception |
-- |
constant hpmcnt_event_size_c : natural := 14; -- length of this list |
constant hpmcnt_event_size_c : natural := 15; -- length of this list |
|
-- Clock Generator ------------------------------------------------------------------------ |
-- ------------------------------------------------------------------------------------------- |
767,6 → 768,7
ICACHE_EN : boolean := false; -- implement instruction cache |
ICACHE_NUM_BLOCKS : natural := 4; -- i-cache: number of blocks (min 1), has to be a power of 2 |
ICACHE_BLOCK_SIZE : natural := 64; -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY : natural := 1; -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2 |
-- External memory interface -- |
MEM_EXT_EN : boolean := false; -- implement external memory bus interface? |
-- Processor peripherals -- |
1118,12 → 1120,13
); |
end component; |
|
-- Component: CPU Cache ------------------------------------------------------------------- |
-- Component: CPU Instruction Cache ------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
component neorv32_cache |
component neorv32_icache |
generic ( |
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE : natural := 16 -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_BLOCKS : natural := 4; -- number of blocks (min 1), has to be a power of 2 |
CACHE_BLOCK_SIZE : natural := 16; -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS : natural := 1 -- associativity / number of sets (1=direct_mapped), has to be a power of 2 |
); |
port ( |
-- global control -- |
/rtl/core/neorv32_sysinfo.vhd
145,15 → 145,15
sysinfo_mem(2)(31 downto 26) <= (others => '0'); -- reserved |
|
-- SYSINFO(3): Cache configuration -- |
sysinfo_mem(3)(03 downto 00) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_BLOCK_SIZE), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(block_size_in_bytes) |
sysinfo_mem(3)(07 downto 04) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_NUM_BLOCKS), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(number_of_block) |
sysinfo_mem(3)(03 downto 00) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_BLOCK_SIZE), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(block_size_in_bytes) |
sysinfo_mem(3)(07 downto 04) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_NUM_BLOCKS), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(number_of_block) |
sysinfo_mem(3)(11 downto 08) <= std_ulogic_vector(to_unsigned(index_size_f(ICACHE_ASSOCIATIVITY), 4)) when (ICACHE_EN = true) else (others => '0'); -- i-cache: log2(associativity) |
sysinfo_mem(3)(15 downto 12) <= (others => '0'); -- replacement strategy (irrelevant since i-cache is read-only) |
sysinfo_mem(3)(15 downto 12) <= "0001" when (ICACHE_ASSOCIATIVITY > 1) and (ICACHE_EN = true) else (others => '0'); -- i-cache: replacement strategy (LRU only (yet)) |
-- |
sysinfo_mem(3)(19 downto 16) <= (others => '0'); -- reserved (for d-cache.block_size) |
sysinfo_mem(3)(23 downto 20) <= (others => '0'); -- reserved (for d-cache.num_blocks) |
sysinfo_mem(3)(27 downto 24) <= (others => '0'); -- reserved (for d-cache.associativity) |
sysinfo_mem(3)(31 downto 28) <= (others => '0'); -- reserved (for d-cache.replacement_Strategy) |
sysinfo_mem(3)(19 downto 16) <= (others => '0'); -- reserved - d-cache: log2(block_size) |
sysinfo_mem(3)(23 downto 20) <= (others => '0'); -- reserved - d-cache: log2(num_blocks) |
sysinfo_mem(3)(27 downto 24) <= (others => '0'); -- reserved - d-cache: log2(associativity) |
sysinfo_mem(3)(31 downto 28) <= (others => '0'); -- reserved - d-cache: replacement strategy |
|
-- SYSINFO(4): Base address of instruction memory space -- |
sysinfo_mem(4) <= ispace_base_c; -- defined in neorv32_package.vhd file |
/rtl/core/neorv32_top.vhd
80,6 → 80,7
ICACHE_EN : boolean := false; -- implement instruction cache |
ICACHE_NUM_BLOCKS : natural := 4; -- i-cache: number of blocks (min 1), has to be a power of 2 |
ICACHE_BLOCK_SIZE : natural := 64; -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY : natural := 1; -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2 |
-- External memory interface -- |
MEM_EXT_EN : boolean := false; -- implement external memory bus interface? |
-- Processor peripherals -- |
327,7 → 328,7
end process clock_generator_edge; |
|
|
-- CPU ------------------------------------------------------------------------------------ |
-- CPU Core ------------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
neorv32_cpu_inst: neorv32_cpu |
generic map ( |
412,10 → 413,11
-- ------------------------------------------------------------------------------------------- |
neorv32_icache_inst_true: |
if (ICACHE_EN = true) generate |
neorv32_icache_inst: neorv32_cache |
neorv32_icache_inst: neorv32_icache |
generic map ( |
CACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- number of blocks (min 2), has to be a power of 2 |
CACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- number of blocks (min 2), has to be a power of 2 |
CACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, -- block size in bytes (min 4), has to be a power of 2 |
CACHE_NUM_SETS => ICACHE_ASSOCIATIVITY -- associativity / number of sets (1=direct_mapped), has to be a power of 2 |
) |
port map ( |
-- global control -- |
462,7 → 464,7
end generate; |
|
|
-- CPU Crossbar Switch -------------------------------------------------------------------- |
-- CPU Bus Switch ------------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
neorv32_busswitch_inst: neorv32_busswitch |
generic map ( |
533,7 → 535,7
IMEM_BASE => imem_base_c, -- memory base address |
IMEM_SIZE => MEM_INT_IMEM_SIZE, -- processor-internal instruction memory size in bytes |
IMEM_AS_ROM => MEM_INT_IMEM_ROM, -- implement IMEM as read-only memory? |
BOOTLOADER_EN => BOOTLOADER_EN -- implement and use bootloader? |
BOOTLOADER_EN => BOOTLOADER_EN -- implement and use bootloader? |
) |
port map ( |
clk_i => clk_i, -- global clock line |
613,13 → 615,13
if (MEM_EXT_EN = true) generate |
neorv32_wishbone_inst: neorv32_wishbone |
generic map ( |
WB_PIPELINED_MODE => wb_pipe_mode_c, -- false: classic/standard wishbone mode, true: pipelined wishbone mode |
WB_PIPELINED_MODE => wb_pipe_mode_c, -- false: classic/standard wishbone mode, true: pipelined wishbone mode |
-- Internal instruction memory -- |
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory |
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes |
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory |
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes |
-- Internal data memory -- |
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory |
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE -- size of processor-internal data memory in bytes |
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory |
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE -- size of processor-internal data memory in bytes |
) |
port map ( |
-- global control -- |
1006,34 → 1008,34
neorv32_sysinfo_inst: neorv32_sysinfo |
generic map ( |
-- General -- |
CLOCK_FREQUENCY => CLOCK_FREQUENCY, -- clock frequency of clk_i in Hz |
BOOTLOADER_EN => BOOTLOADER_EN, -- implement processor-internal bootloader? |
USER_CODE => USER_CODE, -- custom user code |
CLOCK_FREQUENCY => CLOCK_FREQUENCY, -- clock frequency of clk_i in Hz |
BOOTLOADER_EN => BOOTLOADER_EN, -- implement processor-internal bootloader? |
USER_CODE => USER_CODE, -- custom user code |
-- internal Instruction memory -- |
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory |
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes |
MEM_INT_IMEM_ROM => MEM_INT_IMEM_ROM, -- implement processor-internal instruction memory as ROM |
MEM_INT_IMEM_EN => MEM_INT_IMEM_EN, -- implement processor-internal instruction memory |
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes |
MEM_INT_IMEM_ROM => MEM_INT_IMEM_ROM, -- implement processor-internal instruction memory as ROM |
-- Internal Data memory -- |
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory |
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE, -- size of processor-internal data memory in bytes |
MEM_INT_DMEM_EN => MEM_INT_DMEM_EN, -- implement processor-internal data memory |
MEM_INT_DMEM_SIZE => MEM_INT_DMEM_SIZE, -- size of processor-internal data memory in bytes |
-- Internal Cache memory -- |
ICACHE_EN => ICACHE_EN, -- implement instruction cache |
ICACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- i-cache: number of blocks (min 2), has to be a power of 2 |
ICACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY => 1, -- i-cache: associativity (min 1), has to be a power 2 |
ICACHE_EN => ICACHE_EN, -- implement instruction cache |
ICACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- i-cache: number of blocks (min 2), has to be a power of 2 |
ICACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY => ICACHE_ASSOCIATIVITY, -- i-cache: associativity (min 1), has to be a power 2 |
-- External memory interface -- |
MEM_EXT_EN => MEM_EXT_EN, -- implement external memory bus interface? |
MEM_EXT_EN => MEM_EXT_EN, -- implement external memory bus interface? |
-- Processor peripherals -- |
IO_GPIO_EN => IO_GPIO_EN, -- implement general purpose input/output port unit (GPIO)? |
IO_MTIME_EN => IO_MTIME_EN, -- implement machine system timer (MTIME)? |
IO_UART_EN => IO_UART_EN, -- implement universal asynchronous receiver/transmitter (UART)? |
IO_SPI_EN => IO_SPI_EN, -- implement serial peripheral interface (SPI)? |
IO_TWI_EN => IO_TWI_EN, -- implement two-wire interface (TWI)? |
IO_PWM_EN => IO_PWM_EN, -- implement pulse-width modulation unit (PWM)? |
IO_WDT_EN => IO_WDT_EN, -- implement watch dog timer (WDT)? |
IO_TRNG_EN => IO_TRNG_EN, -- implement true random number generator (TRNG)? |
IO_CFU0_EN => IO_CFU0_EN, -- implement custom functions unit 0 (CFU0)? |
IO_CFU1_EN => IO_CFU1_EN -- implement custom functions unit 1 (CFU1)? |
IO_GPIO_EN => IO_GPIO_EN, -- implement general purpose input/output port unit (GPIO)? |
IO_MTIME_EN => IO_MTIME_EN, -- implement machine system timer (MTIME)? |
IO_UART_EN => IO_UART_EN, -- implement universal asynchronous receiver/transmitter (UART)? |
IO_SPI_EN => IO_SPI_EN, -- implement serial peripheral interface (SPI)? |
IO_TWI_EN => IO_TWI_EN, -- implement two-wire interface (TWI)? |
IO_PWM_EN => IO_PWM_EN, -- implement pulse-width modulation unit (PWM)? |
IO_WDT_EN => IO_WDT_EN, -- implement watch dog timer (WDT)? |
IO_TRNG_EN => IO_TRNG_EN, -- implement true random number generator (TRNG)? |
IO_CFU0_EN => IO_CFU0_EN, -- implement custom functions unit 0 (CFU0)? |
IO_CFU1_EN => IO_CFU1_EN -- implement custom functions unit 1 (CFU1)? |
) |
port map ( |
-- host access -- |
/rtl/top_templates/neorv32_test_setup.vhd
101,6 → 101,7
ICACHE_EN => false, -- implement instruction cache |
ICACHE_NUM_BLOCKS => 4, -- i-cache: number of blocks (min 1), has to be a power of 2 |
ICACHE_BLOCK_SIZE => 64, -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY => 1, -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2 |
-- External memory interface -- |
MEM_EXT_EN => false, -- implement external memory bus interface? |
-- Processor peripherals -- |
/rtl/top_templates/neorv32_top_axi4lite.vhd
76,6 → 76,7
ICACHE_EN : boolean := false; -- implement instruction cache |
ICACHE_NUM_BLOCKS : natural := 4; -- i-cache: number of blocks (min 1), has to be a power of 2 |
ICACHE_BLOCK_SIZE : natural := 64; -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY : natural := 1; -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2 |
-- Processor peripherals -- |
IO_GPIO_EN : boolean := true; -- implement general purpose input/output port unit (GPIO)? |
IO_MTIME_EN : boolean := true; -- implement machine system timer (MTIME)? |
238,6 → 239,7
ICACHE_EN => ICACHE_EN, -- implement instruction cache |
ICACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- i-cache: number of blocks (min 1), has to be a power of 2 |
ICACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY => ICACHE_ASSOCIATIVITY, -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2 |
-- External memory interface -- |
MEM_EXT_EN => true, -- implement external memory bus interface? |
-- Processor peripherals -- |
/rtl/top_templates/neorv32_top_stdlogic.vhd
74,6 → 74,7
ICACHE_EN : boolean := false; -- implement instruction cache |
ICACHE_NUM_BLOCKS : natural := 4; -- i-cache: number of blocks (min 1), has to be a power of 2 |
ICACHE_BLOCK_SIZE : natural := 64; -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY : natural := 1; -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2 |
-- External memory interface -- |
MEM_EXT_EN : boolean := false; -- implement external memory bus interface? |
-- Processor peripherals -- |
214,6 → 215,7
ICACHE_EN => ICACHE_EN, -- implement instruction cache |
ICACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, -- i-cache: number of blocks (min 1), has to be a power of 2 |
ICACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY => ICACHE_ASSOCIATIVITY, -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2 |
-- External memory interface -- |
MEM_EXT_EN => MEM_EXT_EN, -- implement external memory bus interface? |
-- Processor peripherals -- |
/sim/ghdl/ghdl_sim.sh
41,7 → 41,7
# |
ghdl -a --work=neorv32 $srcdir_core/neorv32_boot_rom.vhd |
ghdl -a --work=neorv32 $srcdir_core/neorv32_busswitch.vhd |
ghdl -a --work=neorv32 $srcdir_core/neorv32_cache.vhd |
ghdl -a --work=neorv32 $srcdir_core/neorv32_icache.vhd |
ghdl -a --work=neorv32 $srcdir_core/neorv32_cfu0.vhd |
ghdl -a --work=neorv32 $srcdir_core/neorv32_cfu1.vhd |
ghdl -a --work=neorv32 $srcdir_core/neorv32_cpu.vhd |
/sim/rtl_modules/neorv32_imem.vhd
6,7 → 6,7
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
-- # # |
-- # Copyright (c) 2020, Stephan Nolting. All rights reserved. # |
-- # Copyright (c) 2021, Stephan Nolting. All rights reserved. # |
-- # # |
-- # Redistribution and use in source and binary forms, with or without modification, are # |
-- # permitted provided that the following conditions are met: # |
48,7 → 48,7
IMEM_BASE : std_ulogic_vector(31 downto 0) := x"00000000"; -- memory base address |
IMEM_SIZE : natural := 4*1024; -- processor-internal instruction memory size in bytes |
IMEM_AS_ROM : boolean := false; -- implement IMEM as read-only memory? |
BOOTLOADER_USE : boolean := true -- implement and use bootloader? |
BOOTLOADER_EN : boolean := true -- implement and use bootloader? |
); |
port ( |
clk_i : in std_ulogic; -- global clock line |
/sim/neorv32_tb.vhd
1,7 → 1,8
-- ################################################################################################# |
-- # << NEORV32 - Default Testbench >> # |
-- # ********************************************************************************************* # |
-- # Use the "User Configuration" section to configure the testbench according to your need. # |
-- # The processor is configured to use a maximum of functional units (for testing purpose). # |
-- # Use the "User Configuration" section to configure the testbench according to your needs. # |
-- # See NEORV32 data sheet (docs/NEORV32.pdf) for more information. # |
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
204,6 → 205,7
ICACHE_EN => icache_en_c, -- implement instruction cache |
ICACHE_NUM_BLOCKS => 8, -- i-cache: number of blocks (min 2), has to be a power of 2 |
ICACHE_BLOCK_SIZE => 64, -- i-cache: block size in bytes (min 4), has to be a power of 2 |
ICACHE_ASSOCIATIVITY => 2, -- i-cache: associativity / number of sets (1=direct_mapped), has to be a power of 2 |
-- External memory interface -- |
MEM_EXT_EN => true, -- implement external memory bus interface? |
-- Processor peripherals -- |
498,7 → 500,7
|
-- Wishbone IRQ Triggers ------------------------------------------------------------------ |
-- ------------------------------------------------------------------------------------------- |
ext_irq_trigger: process(clk_gen) |
irq_trigger: process(clk_gen) |
begin |
if rising_edge(clk_gen) then |
-- default -- |
523,7 → 525,7
wb_mei.ack <= '1'; |
end if; |
end if; |
end process ext_irq_trigger; |
end process irq_trigger; |
|
|
end neorv32_tb_rtl; |
/sw/example/coremark/core_portme.c
173,14 → 173,15
neorv32_cpu_csr_write(CSR_MHPMCOUNTER3, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT3, 1 << HPMCNT_EVENT_CIR); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER4, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT4, 1 << HPMCNT_EVENT_WAIT_IF); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER5, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT5, 1 << HPMCNT_EVENT_WAIT_II); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER6, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT6, 1 << HPMCNT_EVENT_LOAD); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER7, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT7, 1 << HPMCNT_EVENT_STORE); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER8, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT8, 1 << HPMCNT_EVENT_WAIT_LS); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER9, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT9, 1 << HPMCNT_EVENT_JUMP); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER10, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT10, 1 << HPMCNT_EVENT_BRANCH); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER11, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_TBRANCH); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER12, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_TRAP); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER13, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT12, 1 << HPMCNT_EVENT_ILLEGAL); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER6, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT6, 1 << HPMCNT_EVENT_WAIT_MC); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER7, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT7, 1 << HPMCNT_EVENT_LOAD); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER8, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT8, 1 << HPMCNT_EVENT_STORE); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER9, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT9, 1 << HPMCNT_EVENT_WAIT_LS); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER10, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT10, 1 << HPMCNT_EVENT_JUMP); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER11, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_BRANCH); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER12, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT12, 1 << HPMCNT_EVENT_TBRANCH); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER13, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT13, 1 << HPMCNT_EVENT_TRAP); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER14, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT14, 1 << HPMCNT_EVENT_ILLEGAL); |
|
neorv32_uart_printf("NEORV32: Processor running at %u Hz\n", (uint32_t)SYSINFO_CLK); |
neorv32_uart_printf("NEORV32: Executing coremark (%u iterations). This may take some time...\n\n", (uint32_t)ITERATIONS); |
230,7 → 231,7
exe_time.uint64 = (uint64_t)get_time(); |
exe_instructions.uint64 = neorv32_cpu_get_instret(); |
|
neorv32_uart_printf("\nNEORV32: All reported numbers only show the integer part of the results.\n\n"); |
neorv32_uart_printf("\nNEORV32: All reported numbers only show the integer part.\n\n"); |
|
neorv32_uart_printf("NEORV32: HPM results\n"); |
if (num_hpm_cnts_global == 0) {neorv32_uart_printf("no HPMs available\n"); } |
237,14 → 238,15
if (num_hpm_cnts_global > 0) {neorv32_uart_printf("# Retired compr. instructions: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER3)); } |
if (num_hpm_cnts_global > 1) {neorv32_uart_printf("# I-fetch wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER4)); } |
if (num_hpm_cnts_global > 2) {neorv32_uart_printf("# I-issue wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER5)); } |
if (num_hpm_cnts_global > 3) {neorv32_uart_printf("# Load operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER6)); } |
if (num_hpm_cnts_global > 4) {neorv32_uart_printf("# Store operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER7)); } |
if (num_hpm_cnts_global > 5) {neorv32_uart_printf("# Load/store wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER8)); } |
if (num_hpm_cnts_global > 6) {neorv32_uart_printf("# Unconditional jumps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER9)); } |
if (num_hpm_cnts_global > 7) {neorv32_uart_printf("# Conditional branches (all): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER10)); } |
if (num_hpm_cnts_global > 8) {neorv32_uart_printf("# Conditional branches (taken): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER11)); } |
if (num_hpm_cnts_global > 9) {neorv32_uart_printf("# Entered traps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER12)); } |
if (num_hpm_cnts_global > 10) {neorv32_uart_printf("# Illegal operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER13)); } |
if (num_hpm_cnts_global > 3) {neorv32_uart_printf("# Multi-cycle ALU wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER6)); } |
if (num_hpm_cnts_global > 4) {neorv32_uart_printf("# Load operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER7)); } |
if (num_hpm_cnts_global > 5) {neorv32_uart_printf("# Store operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER8)); } |
if (num_hpm_cnts_global > 6) {neorv32_uart_printf("# Load/store wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER9)); } |
if (num_hpm_cnts_global > 7) {neorv32_uart_printf("# Unconditional jumps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER10)); } |
if (num_hpm_cnts_global > 8) {neorv32_uart_printf("# Conditional branches (all): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER11)); } |
if (num_hpm_cnts_global > 9) {neorv32_uart_printf("# Conditional branches (taken): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER12)); } |
if (num_hpm_cnts_global > 10) {neorv32_uart_printf("# Entered traps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER13)); } |
if (num_hpm_cnts_global > 11) {neorv32_uart_printf("# Illegal operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER14)); } |
neorv32_uart_printf("\n"); |
|
neorv32_uart_printf("NEORV32: Executed instructions 0x%x_%x\n", (uint32_t)exe_instructions.uint32[1], (uint32_t)exe_instructions.uint32[0]); |
/sw/example/cpu_test/main.c
144,7 → 144,7
neorv32_uart_printf("build: "__DATE__" "__TIME__"\n"); |
|
// check if we came from hardware reset |
neorv32_uart_printf("Coming from hardware reset? "); |
neorv32_uart_printf("\nComing from HW reset? "); |
if (neorv32_cpu_csr_read(CSR_MCAUSE) == TRAP_CODE_RESET) { |
neorv32_uart_printf("yes\n"); |
} |
347,7 → 347,6
test_ok(); |
} |
else { |
neorv32_uart_printf("SECURITY VIOLATION! "); |
test_fail(); |
} |
} |
375,15 → 374,16
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER3, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT3, 1 << HPMCNT_EVENT_CIR); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER4, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT4, 1 << HPMCNT_EVENT_WAIT_IF); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER4, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT5, 1 << HPMCNT_EVENT_WAIT_II); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER5, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT6, 1 << HPMCNT_EVENT_LOAD); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER6, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT7, 1 << HPMCNT_EVENT_STORE); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER7, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT8, 1 << HPMCNT_EVENT_WAIT_LS); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER8, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT9, 1 << HPMCNT_EVENT_JUMP); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER9, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT10, 1 << HPMCNT_EVENT_BRANCH); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER10, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_TBRANCH); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER11, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT12, 1 << HPMCNT_EVENT_TRAP); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER12, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT13, 1 << HPMCNT_EVENT_ILLEGAL); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER5, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT5, 1 << HPMCNT_EVENT_WAIT_II); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER6, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT6, 1 << HPMCNT_EVENT_WAIT_MC); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER7, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT7, 1 << HPMCNT_EVENT_LOAD); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER8, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT8, 1 << HPMCNT_EVENT_STORE); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER9, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT9, 1 << HPMCNT_EVENT_WAIT_LS); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER10, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT10, 1 << HPMCNT_EVENT_JUMP); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER11, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_BRANCH); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER12, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT12, 1 << HPMCNT_EVENT_TBRANCH); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER13, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT13, 1 << HPMCNT_EVENT_TRAP); |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER14, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT14, 1 << HPMCNT_EVENT_ILLEGAL); |
|
neorv32_cpu_csr_write(CSR_MCOUNTINHIBIT, 0); // enable all counters |
|
467,11 → 467,36
} |
} |
else { |
neorv32_uart_printf("skipped (on real hardware)\n"); |
neorv32_uart_printf("skipped (on real HW)\n"); |
} |
|
|
// ---------------------------------------------------------- |
// Test FENCE.I instruction (clear & reload i-cache) |
// ---------------------------------------------------------- |
neorv32_cpu_csr_write(CSR_MCAUSE, 0); |
neorv32_uart_printf("[%i] Testing FENCE.I operation: ", cnt_test); |
|
// check if implemented |
if (neorv32_cpu_csr_read(CSR_MZEXT) & (1 << CSR_MZEXT_ZIFENCEI)) { |
cnt_test++; |
|
asm volatile ("fence.i"); |
|
// make sure there was no exception (and that the cpu did not crash...) |
if (neorv32_cpu_csr_read(CSR_MCAUSE) == 0) { |
test_ok(); |
} |
else { |
test_fail(); |
} |
} |
else { |
neorv32_uart_printf("skipped (not implemented)\n"); |
} |
|
|
// ---------------------------------------------------------- |
// Illegal CSR access (CSR not implemented) |
// ---------------------------------------------------------- |
neorv32_cpu_csr_write(CSR_MCAUSE, 0); |
908,7 → 933,7
} |
} |
else { |
neorv32_uart_printf("skipped (on real hardware)\n"); |
neorv32_uart_printf("skipped (on real HW)\n"); |
} |
|
|
936,7 → 961,7
} |
} |
else { |
neorv32_uart_printf("skipped (on real hardware)\n"); |
neorv32_uart_printf("skipped (on real HW)\n"); |
} |
|
|
1015,7 → 1040,7
} |
} |
else { |
neorv32_uart_printf("skipped (on real hardware)\n"); |
neorv32_uart_printf("skipped (on real HW)\n"); |
} |
|
|
1193,7 → 1218,6
test_ok(); |
} |
else { |
neorv32_uart_printf("SECURITY VIOLATION! "); |
test_fail(); |
} |
} |
1412,7 → 1436,7
} |
} |
else { |
neorv32_uart_printf("skipped (on real hardware)\n"); |
neorv32_uart_printf("skipped (on real HW)\n"); |
} |
#else |
neorv32_uart_printf("skipped (not implemented)\n"); |
1448,7 → 1472,7
} |
} |
else { |
neorv32_uart_printf("skipped (on real hardware)\n"); |
neorv32_uart_printf("skipped (on real HW)\n"); |
} |
#else |
neorv32_uart_printf("skipped (not implemented)\n"); |
1491,30 → 1515,28
// HPM reports |
// ---------------------------------------------------------- |
neorv32_cpu_csr_write(CSR_MCOUNTINHIBIT, -1); // stop all counters |
neorv32_uart_printf("\n\n-- HPM results --\n"); |
if (num_hpm_cnts_global == 0) {neorv32_uart_printf("no HPMs available\n"); } |
if (num_hpm_cnts_global > 0) {neorv32_uart_printf("# Retired compr. instructions: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER3)); } |
if (num_hpm_cnts_global > 1) {neorv32_uart_printf("# I-fetch wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER4)); } |
if (num_hpm_cnts_global > 2) {neorv32_uart_printf("# I-issue wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER5)); } |
if (num_hpm_cnts_global > 3) {neorv32_uart_printf("# Load operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER6)); } |
if (num_hpm_cnts_global > 4) {neorv32_uart_printf("# Store operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER7)); } |
if (num_hpm_cnts_global > 5) {neorv32_uart_printf("# Load/store wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER8)); } |
if (num_hpm_cnts_global > 6) {neorv32_uart_printf("# Unconditional jumps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER9)); } |
if (num_hpm_cnts_global > 7) {neorv32_uart_printf("# Conditional branches (all): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER10)); } |
if (num_hpm_cnts_global > 8) {neorv32_uart_printf("# Conditional branches (taken): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER11)); } |
if (num_hpm_cnts_global > 9) {neorv32_uart_printf("# Entered traps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER12)); } |
if (num_hpm_cnts_global > 10) {neorv32_uart_printf("# Illegal operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER13)); } |
neorv32_uart_printf("\n"); |
neorv32_uart_printf("\n\n-- HPM reports (%u HPMs available) --\n", num_hpm_cnts_global); |
neorv32_uart_printf("#IR - Total number of instructions: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_INSTRET)); // = HPM_0 |
neorv32_uart_printf("#CY - Total number of clock cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_CYCLE)); // = HPM_2 |
neorv32_uart_printf("#03 - Retired compr. instructions: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER3)); |
neorv32_uart_printf("#04 - I-fetch wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER4)); |
neorv32_uart_printf("#05 - I-issue wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER5)); |
neorv32_uart_printf("#06 - Multi-cycle ALU wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER6)); |
neorv32_uart_printf("#07 - Load operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER7)); |
neorv32_uart_printf("#08 - Store operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER8)); |
neorv32_uart_printf("#09 - Load/store wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER9)); |
neorv32_uart_printf("#10 - Unconditional jumps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER10)); |
neorv32_uart_printf("#11 - Conditional branches (all): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER11)); |
neorv32_uart_printf("#12 - Conditional branches (taken): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER12)); |
neorv32_uart_printf("#13 - Entered traps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER13)); |
neorv32_uart_printf("#14 - Illegal operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER14)); |
|
|
// ---------------------------------------------------------- |
// Final test reports |
// ---------------------------------------------------------- |
neorv32_uart_printf("\n# Instructions: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_INSTRET)); |
neorv32_uart_printf( "# Clock cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_CYCLE)); |
neorv32_uart_printf("\n\nTest results:\nOK: %i/%i\nFAILED: %i/%i\n\n", cnt_ok, cnt_test, cnt_fail, cnt_test); |
|
neorv32_uart_printf("\nTest results:\nOK: %i/%i\nFAILED: %i/%i\n\n", cnt_ok, cnt_test, cnt_fail, cnt_test); |
|
// final result |
if (cnt_fail == 0) { |
neorv32_uart_printf("%c[1m[CPU TEST COMPLETED SUCCESSFULLY!]%c[0m\n", 27, 27); |
/sw/lib/include/neorv32.h
428,16 → 428,17
HPMCNT_EVENT_CIR = 3, /**< CPU mhpmevent CSR (3): Retired compressed instruction */ |
HPMCNT_EVENT_WAIT_IF = 4, /**< CPU mhpmevent CSR (4): Instruction fetch memory wait cycle */ |
HPMCNT_EVENT_WAIT_II = 5, /**< CPU mhpmevent CSR (5): Instruction issue wait cycle */ |
HPMCNT_EVENT_LOAD = 6, /**< CPU mhpmevent CSR (6): Load operation */ |
HPMCNT_EVENT_STORE = 7, /**< CPU mhpmevent CSR (7): Store operation */ |
HPMCNT_EVENT_WAIT_LS = 8, /**< CPU mhpmevent CSR (8): Load/store memory wait cycle */ |
HPMCNT_EVENT_WAIT_MC = 6, /**< CPU mhpmevent CSR (6): Multi-cycle ALU-operation wait cycle */ |
HPMCNT_EVENT_LOAD = 7, /**< CPU mhpmevent CSR (7): Load operation */ |
HPMCNT_EVENT_STORE = 8, /**< CPU mhpmevent CSR (8): Store operation */ |
HPMCNT_EVENT_WAIT_LS = 9, /**< CPU mhpmevent CSR (9): Load/store memory wait cycle */ |
|
HPMCNT_EVENT_JUMP = 9, /**< CPU mhpmevent CSR (9): Unconditional jump */ |
HPMCNT_EVENT_BRANCH = 10, /**< CPU mhpmevent CSR (10): Conditional branch (taken or not taken) */ |
HPMCNT_EVENT_TBRANCH = 11, /**< CPU mhpmevent CSR (11): Conditional taken branch */ |
HPMCNT_EVENT_JUMP = 10, /**< CPU mhpmevent CSR (10): Unconditional jump */ |
HPMCNT_EVENT_BRANCH = 11, /**< CPU mhpmevent CSR (11): Conditional branch (taken or not taken) */ |
HPMCNT_EVENT_TBRANCH = 12, /**< CPU mhpmevent CSR (12): Conditional taken branch */ |
|
HPMCNT_EVENT_TRAP = 12, /**< CPU mhpmevent CSR (12): Entered trap */ |
HPMCNT_EVENT_ILLEGAL = 13 /**< CPU mhpmevent CSR (13): Illegal instruction exception */ |
HPMCNT_EVENT_TRAP = 13, /**< CPU mhpmevent CSR (13): Entered trap */ |
HPMCNT_EVENT_ILLEGAL = 14 /**< CPU mhpmevent CSR (14): Illegal instruction exception */ |
}; |
|
|
840,10 → 841,15
SYSINFO_CACHE_IC_NUM_BLOCKS_2 = 6, /**< SYSINFO_CACHE (6) (r/-): i-cache: log2(Number of cache blocks/pages/lines), bit 2 (via ICACHE_NUM_BLOCKS generic) */ |
SYSINFO_CACHE_IC_NUM_BLOCKS_3 = 7, /**< SYSINFO_CACHE (7) (r/-): i-cache: log2(Number of cache blocks/pages/lines), bit 3 (via ICACHE_NUM_BLOCKS generic) */ |
|
SYSINFO_CACHE_IC_ASSOCIATIVITY_0 = 8, /**< SYSINFO_CACHE (10) (r/-): i-cache: log2(associativity), bit 0 (always 0 -> direct mapped) */ |
SYSINFO_CACHE_IC_ASSOCIATIVITY_1 = 9, /**< SYSINFO_CACHE (11) (r/-): i-cache: log2(associativity), bit 1 (always 0 -> direct mapped) */ |
SYSINFO_CACHE_IC_ASSOCIATIVITY_2 = 10, /**< SYSINFO_CACHE (12) (r/-): i-cache: log2(associativity), bit 2 (always 0 -> direct mapped) */ |
SYSINFO_CACHE_IC_ASSOCIATIVITY_3 = 11, /**< SYSINFO_CACHE (13) (r/-): i-cache: log2(associativity), bit 3 (always 0 -> direct mapped) */ |
SYSINFO_CACHE_IC_ASSOCIATIVITY_0 = 8, /**< SYSINFO_CACHE (8) (r/-): i-cache: log2(associativity), bit 0 (via ICACHE_ASSOCIATIVITY generic) */ |
SYSINFO_CACHE_IC_ASSOCIATIVITY_1 = 9, /**< SYSINFO_CACHE (9) (r/-): i-cache: log2(associativity), bit 1 (via ICACHE_ASSOCIATIVITY generic) */ |
SYSINFO_CACHE_IC_ASSOCIATIVITY_2 = 10, /**< SYSINFO_CACHE (10) (r/-): i-cache: log2(associativity), bit 2 (via ICACHE_ASSOCIATIVITY generic) */ |
SYSINFO_CACHE_IC_ASSOCIATIVITY_3 = 11, /**< SYSINFO_CACHE (11) (r/-): i-cache: log2(associativity), bit 3 (via ICACHE_ASSOCIATIVITY generic) */ |
|
SYSINFO_CACHE_IC_REPLACEMENT_0 = 12, /**< SYSINFO_CACHE (12) (r/-): i-cache: replacement policy (0001 = LRU if associativity > 0) bit 0 */ |
SYSINFO_CACHE_IC_REPLACEMENT_1 = 13, /**< SYSINFO_CACHE (13) (r/-): i-cache: replacement policy (0001 = LRU if associativity > 0) bit 1 */ |
SYSINFO_CACHE_IC_REPLACEMENT_2 = 14, /**< SYSINFO_CACHE (14) (r/-): i-cache: replacement policy (0001 = LRU if associativity > 0) bit 2 */ |
SYSINFO_CACHE_IC_REPLACEMENT_3 = 15, /**< SYSINFO_CACHE (15) (r/-): i-cache: replacement policy (0001 = LRU if associativity > 0) bit 3 */ |
}; |
|
|
/sw/lib/source/neorv32_cpu.c
43,7 → 43,15
#include "neorv32_cpu.h" |
|
|
|
/**********************************************************************//** |
* >Private< helper functions. |
**************************************************************************/ |
static uint32_t __neorv32_cpu_pmp_cfg_read(uint32_t index); |
static void __neorv32_cpu_pmp_cfg_write(uint32_t index, uint32_t data); |
|
|
/**********************************************************************//** |
* Enable specific CPU interrupt. |
* |
* @note Interrupts have to be globally enabled via neorv32_cpu_eint(void), too. |
292,7 → 300,7
|
return 0; |
#else |
return 1; // A extension not implemented -Y always fail |
return 1; // A extension not implemented - function always fails |
#endif |
} |
|
307,23 → 315,13
**************************************************************************/ |
uint32_t neorv32_cpu_pmp_get_num_regions(void) { |
|
uint32_t i = 0; |
|
// try setting R bit in all PMPCFG CSRs |
neorv32_cpu_csr_write(CSR_PMPCFG0, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG1, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG2, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG3, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG4, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG5, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG6, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG7, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG8, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG9, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG10, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG11, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG12, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG13, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG14, 0x01010101); |
neorv32_cpu_csr_write(CSR_PMPCFG15, 0x01010101); |
const uint32_t tmp = 0x01010101; |
for (i=0; i<16; i++) { |
__neorv32_cpu_pmp_cfg_write(i, tmp); |
} |
|
// sum up all written ones (only available PMPCFG* CSRs/entries will return =! 0) |
union { |
332,22 → 330,9
} cnt; |
|
cnt.uint32 = 0; |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG0); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG1); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG2); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG3); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG4); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG5); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG6); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG7); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG8); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG9); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG10); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG11); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG12); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG13); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG14); |
cnt.uint32 += neorv32_cpu_csr_read(CSR_PMPCFG15); |
for (i=0; i<16; i++) { |
cnt.uint32 += __neorv32_cpu_pmp_cfg_read(i); |
} |
|
// sum up bytes |
uint32_t num_regions = 0; |
414,6 → 399,9
return 1; // region size is not a power of two |
} |
|
// pmpcfg register index |
uint32_t pmpcfg_index = index >> 4; // 4 entries per pmpcfg csr |
|
// setup configuration |
uint32_t tmp; |
uint32_t config_int = ((uint32_t)config) << ((index%4)*8); |
421,26 → 409,9
config_mask = ~config_mask; |
|
// clear old configuration |
switch(index & 15) { |
case 0: neorv32_cpu_csr_write(CSR_PMPCFG0, neorv32_cpu_csr_read(CSR_PMPCFG0) & config_mask); break; |
case 1: neorv32_cpu_csr_write(CSR_PMPCFG1, neorv32_cpu_csr_read(CSR_PMPCFG1) & config_mask); break; |
case 2: neorv32_cpu_csr_write(CSR_PMPCFG2, neorv32_cpu_csr_read(CSR_PMPCFG2) & config_mask); break; |
case 3: neorv32_cpu_csr_write(CSR_PMPCFG3, neorv32_cpu_csr_read(CSR_PMPCFG3) & config_mask); break; |
case 4: neorv32_cpu_csr_write(CSR_PMPCFG4, neorv32_cpu_csr_read(CSR_PMPCFG4) & config_mask); break; |
case 5: neorv32_cpu_csr_write(CSR_PMPCFG5, neorv32_cpu_csr_read(CSR_PMPCFG5) & config_mask); break; |
case 6: neorv32_cpu_csr_write(CSR_PMPCFG6, neorv32_cpu_csr_read(CSR_PMPCFG6) & config_mask); break; |
case 7: neorv32_cpu_csr_write(CSR_PMPCFG7, neorv32_cpu_csr_read(CSR_PMPCFG7) & config_mask); break; |
case 8: neorv32_cpu_csr_write(CSR_PMPCFG8, neorv32_cpu_csr_read(CSR_PMPCFG8) & config_mask); break; |
case 9: neorv32_cpu_csr_write(CSR_PMPCFG9, neorv32_cpu_csr_read(CSR_PMPCFG9) & config_mask); break; |
case 10: neorv32_cpu_csr_write(CSR_PMPCFG10, neorv32_cpu_csr_read(CSR_PMPCFG10) & config_mask); break; |
case 11: neorv32_cpu_csr_write(CSR_PMPCFG11, neorv32_cpu_csr_read(CSR_PMPCFG11) & config_mask); break; |
case 12: neorv32_cpu_csr_write(CSR_PMPCFG12, neorv32_cpu_csr_read(CSR_PMPCFG12) & config_mask); break; |
case 13: neorv32_cpu_csr_write(CSR_PMPCFG13, neorv32_cpu_csr_read(CSR_PMPCFG13) & config_mask); break; |
case 14: neorv32_cpu_csr_write(CSR_PMPCFG14, neorv32_cpu_csr_read(CSR_PMPCFG14) & config_mask); break; |
case 15: neorv32_cpu_csr_write(CSR_PMPCFG15, neorv32_cpu_csr_read(CSR_PMPCFG15) & config_mask); break; |
default: break; |
} |
__neorv32_cpu_pmp_cfg_write(pmpcfg_index, __neorv32_cpu_pmp_cfg_read(pmpcfg_index) & config_mask); |
|
|
// set base address and region size |
uint32_t addr_mask = ~((size - 1) >> 2); |
uint32_t size_mask = (size - 1) >> 3; |
522,31 → 493,80
} |
|
// set new configuration |
__neorv32_cpu_pmp_cfg_write(pmpcfg_index, __neorv32_cpu_pmp_cfg_read(pmpcfg_index) | config_int); |
|
return 0; |
} |
|
|
/**********************************************************************//** |
* Internal helper function: Read PMP configuration register 0..15 |
* |
* @warning This function requires the PMP CPU extension. |
* |
* @param[in] index PMP CFG configuration register ID (0..15). |
* @return PMP CFG read data. |
**************************************************************************/ |
static uint32_t __neorv32_cpu_pmp_cfg_read(uint32_t index) { |
|
uint32_t tmp = 0; |
switch(index & 15) { |
case 0: neorv32_cpu_csr_write(CSR_PMPCFG0, neorv32_cpu_csr_read(CSR_PMPCFG0) | config_int); break; |
case 1: neorv32_cpu_csr_write(CSR_PMPCFG1, neorv32_cpu_csr_read(CSR_PMPCFG1) | config_int); break; |
case 2: neorv32_cpu_csr_write(CSR_PMPCFG2, neorv32_cpu_csr_read(CSR_PMPCFG2) | config_int); break; |
case 3: neorv32_cpu_csr_write(CSR_PMPCFG3, neorv32_cpu_csr_read(CSR_PMPCFG3) | config_int); break; |
case 4: neorv32_cpu_csr_write(CSR_PMPCFG4, neorv32_cpu_csr_read(CSR_PMPCFG4) | config_int); break; |
case 5: neorv32_cpu_csr_write(CSR_PMPCFG5, neorv32_cpu_csr_read(CSR_PMPCFG5) | config_int); break; |
case 6: neorv32_cpu_csr_write(CSR_PMPCFG6, neorv32_cpu_csr_read(CSR_PMPCFG6) | config_int); break; |
case 7: neorv32_cpu_csr_write(CSR_PMPCFG7, neorv32_cpu_csr_read(CSR_PMPCFG7) | config_int); break; |
case 8: neorv32_cpu_csr_write(CSR_PMPCFG8, neorv32_cpu_csr_read(CSR_PMPCFG8) | config_int); break; |
case 9: neorv32_cpu_csr_write(CSR_PMPCFG9, neorv32_cpu_csr_read(CSR_PMPCFG9) | config_int); break; |
case 10: neorv32_cpu_csr_write(CSR_PMPCFG10, neorv32_cpu_csr_read(CSR_PMPCFG10) | config_int); break; |
case 11: neorv32_cpu_csr_write(CSR_PMPCFG11, neorv32_cpu_csr_read(CSR_PMPCFG11) | config_int); break; |
case 12: neorv32_cpu_csr_write(CSR_PMPCFG12, neorv32_cpu_csr_read(CSR_PMPCFG12) | config_int); break; |
case 13: neorv32_cpu_csr_write(CSR_PMPCFG13, neorv32_cpu_csr_read(CSR_PMPCFG13) | config_int); break; |
case 14: neorv32_cpu_csr_write(CSR_PMPCFG14, neorv32_cpu_csr_read(CSR_PMPCFG14) | config_int); break; |
case 15: neorv32_cpu_csr_write(CSR_PMPCFG15, neorv32_cpu_csr_read(CSR_PMPCFG15) | config_int); break; |
case 0: tmp = neorv32_cpu_csr_read(CSR_PMPCFG0); break; |
case 1: tmp = neorv32_cpu_csr_read(CSR_PMPCFG1); break; |
case 2: tmp = neorv32_cpu_csr_read(CSR_PMPCFG2); break; |
case 3: tmp = neorv32_cpu_csr_read(CSR_PMPCFG3); break; |
case 4: tmp = neorv32_cpu_csr_read(CSR_PMPCFG4); break; |
case 5: tmp = neorv32_cpu_csr_read(CSR_PMPCFG5); break; |
case 6: tmp = neorv32_cpu_csr_read(CSR_PMPCFG6); break; |
case 7: tmp = neorv32_cpu_csr_read(CSR_PMPCFG7); break; |
case 8: tmp = neorv32_cpu_csr_read(CSR_PMPCFG8); break; |
case 9: tmp = neorv32_cpu_csr_read(CSR_PMPCFG9); break; |
case 10: tmp = neorv32_cpu_csr_read(CSR_PMPCFG10); break; |
case 11: tmp = neorv32_cpu_csr_read(CSR_PMPCFG11); break; |
case 12: tmp = neorv32_cpu_csr_read(CSR_PMPCFG12); break; |
case 13: tmp = neorv32_cpu_csr_read(CSR_PMPCFG13); break; |
case 14: tmp = neorv32_cpu_csr_read(CSR_PMPCFG14); break; |
case 15: tmp = neorv32_cpu_csr_read(CSR_PMPCFG15); break; |
default: break; |
} |
|
return 0; |
return tmp; |
} |
|
|
/**********************************************************************//** |
* Internal helper function: Write PMP configuration register 0..15 |
* |
* @warning This function requires the PMP CPU extension. |
* |
* @param[in] index PMP CFG configuration register ID (0..15). |
* @param[in] data PMP CFG write data. |
**************************************************************************/ |
static void __neorv32_cpu_pmp_cfg_write(uint32_t index, uint32_t data) { |
|
switch(index & 15) { |
case 0: neorv32_cpu_csr_write(CSR_PMPCFG0, data); break; |
case 1: neorv32_cpu_csr_write(CSR_PMPCFG1, data); break; |
case 2: neorv32_cpu_csr_write(CSR_PMPCFG2, data); break; |
case 3: neorv32_cpu_csr_write(CSR_PMPCFG3, data); break; |
case 4: neorv32_cpu_csr_write(CSR_PMPCFG4, data); break; |
case 5: neorv32_cpu_csr_write(CSR_PMPCFG5, data); break; |
case 6: neorv32_cpu_csr_write(CSR_PMPCFG6, data); break; |
case 7: neorv32_cpu_csr_write(CSR_PMPCFG7, data); break; |
case 8: neorv32_cpu_csr_write(CSR_PMPCFG8, data); break; |
case 9: neorv32_cpu_csr_write(CSR_PMPCFG9, data); break; |
case 10: neorv32_cpu_csr_write(CSR_PMPCFG10, data); break; |
case 11: neorv32_cpu_csr_write(CSR_PMPCFG11, data); break; |
case 12: neorv32_cpu_csr_write(CSR_PMPCFG12, data); break; |
case 13: neorv32_cpu_csr_write(CSR_PMPCFG13, data); break; |
case 14: neorv32_cpu_csr_write(CSR_PMPCFG14, data); break; |
case 15: neorv32_cpu_csr_write(CSR_PMPCFG15, data); break; |
default: break; |
} |
} |
|
|
/**********************************************************************//** |
* Hardware performance monitors (HPM): Get number of available HPM counters. |
* |
* @warning This function overrides all available mhpmcounter* CSRs. |
/sw/lib/source/neorv32_rte.c
377,15 → 377,15
uint32_t ic_associativity = (SYSINFO_CACHE >> SYSINFO_CACHE_IC_ASSOCIATIVITY_0) & 0x0F; |
ic_associativity = 1 << ic_associativity; |
|
neorv32_uart_printf("%u bytes (%u set(s), %u block(s) per set, %u bytes per block), ", ic_associativity*ic_num_blocks*ic_block_size, ic_associativity, ic_num_blocks, ic_block_size); |
if (ic_associativity == 0) { |
neorv32_uart_printf("direct-mapped\n"); |
neorv32_uart_printf("%u bytes: %u set(s), %u block(s) per set, %u bytes per block", ic_associativity*ic_num_blocks*ic_block_size, ic_associativity, ic_num_blocks, ic_block_size); |
if (ic_associativity == 1) { |
neorv32_uart_printf(" (direct-mapped)\n"); |
} |
else if (ic_associativity == ic_num_blocks) { |
neorv32_uart_printf("%u-way set-associative\n", ic_associativity); |
else if (((SYSINFO_CACHE >> SYSINFO_CACHE_IC_REPLACEMENT_0) & 0x0F) == 1) { |
neorv32_uart_printf(" (LRU replacement policy)\n"); |
} |
else { |
neorv32_uart_printf("fully-associative\n"); |
neorv32_uart_printf("\n"); |
} |
} |
|
/CHANGELOG.md
15,7 → 15,10
|
| Date (*dd.mm.yyyy*) | Version | Comment | |
|:----------:|:-------:|:--------| |
| 10.01.2021 | 1.5.0.0 | Renamed configuration generics: `*_USE` -> `*_EN` | |
| 17.01.2021 | 1.5.0.3 | CPU data register file can now be mapped to a **single** "true dual-port" block RAM by the synthesizer (requiring only 1024 memory bits instead of 2048); :bug: fixed typo error in `sim/rtl_modules/neorv32_imem.vhd`; modified M co-processor (due to register file read access modification), reduced switching activity when co-processor is idle; logic/arithmetic operations of `B` extension only require 3 cycles now, reduced switching activity when co-processor is idle | |
| 15.01.2021 | 1.5.0.2 | added instruction cache associativity configuration (number of sets); new configuration generic: `ICACHE_ASSOCIATIVITY` -> number of sets (1 = direct mapped, 2 = 2-way set-associative), has to be a power of two; if associativity is > 1 the used replacement policy is *least recently used (LRU)*; :bug: fixed bug in `sw/lib/source/neorv32_cpu.c` PMP.CFG configuration function | |
| 14.01.2021 | 1.5.0.1 | added new HPM trigger event: multi-cycle ALU operation wait cycle (`HPMCNT_EVENT_WAIT_MC`); renamed `neorv32_cache.vhd` -> `neorv32_icache.vhd` | |
| 10.01.2021 | [**:rocket:1.5.0.0**](https://github.com/stnolting/neorv32/releases/tag/v1.5.0.0) | Renamed configuration generics: `*_USE` -> `*_EN` | |
| 10.01.2021 | 1.4.9.10 | :sparkles: Added support for [**bit manipulation extension (`B`)**](https://github.com/riscv/riscv-bitmanip) - base subset `Zbb` only (:warning: RISC-V `B` (sub-)extensions are not officially ratified yet; compatible to version "0.94-draft"); enabled via new configuration constant `CPU_EXTENSION_RISCV_B` (default = false); uported `Zbb` instructions: `CLZ` `CTZ` `CPOP` `SEXT.B` `SEXT.H` `MIN[U]` `MAX[U]` `ANDN` `ORN` `XNOR` `ROL` `ROR` `RORI` `zext`(*pseudo-instruction* for `PACK rd, rs, zero`) `rev8`(*pseudo-instruction* for `GREVI rd, rs, -8`) `orc.b`(*pseudo-instruction* for `GORCI rd, rs, 7`); added `B` flag to `misa` CSR; added `Zbb` flag to `mzext` CSR | |
| 03.01.2021 | 1.4.9.8 | Added HPM trigger for instruction issue wait cycle (caused by pipeline flush); all HPM counters do not increment if CPU is sleep mode; fixed CoreMark timer overflow issues; `rtl/core/neorv32_busswitch.vhd`: removed wait states, less load/store wait cycles -> faster execution; updated CoreMark results | |
| 02.01.2021 | 1.4.9.7 | :sparkles: added RISC-V hardware performance monitors (`HPM`); new CSRs: `mhpmevent*`(3..31), `[m]hpmcounter*[h]`(3..31), amount configurable via top's generic `HPM_NUM_CNTS`; supported counter events: active cycle, retired instruction, retired compressed instruction, instruction fetcch memory wait cycle, load operation, store operation, load/store memory wait cycle, unconditional jump, conditional branche (all), conditional taken branch, entered trap, illegal instruction exception; PMP can now have up to 64 regions; number of regions configured via top's `PMP_NUM_REGIONS` generic; removed obsolete top's `PMP_USE` generic; removed PMP flag from `mzext` CSR; minimal region granularity (in bytes) configured via top's `PMP_MIN_GRANULARITY` generic, has to be a power of two and >= 8 bytes; :bug: fixed bug in sleep (`wfi`) instruction | |
/README.md
27,6 → 27,8
|
The project’s change log is available in the [CHANGELOG.md](https://github.com/stnolting/neorv32/blob/master/CHANGELOG.md) file in the root directory of this repository. |
To see the changes between releases visit the project's [release page](https://github.com/stnolting/neorv32/releases). |
|
The documentation of the software framework is available online on [GitHub-pages](https://stnolting.github.io/neorv32/files.html). |
For more detailed information take a look at the [:page_facing_up: NEORV32 data sheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf) (pdf). |
|
|
35,7 → 37,7
* RISC-V-[compliant](#Status) 32-bit `rv32i` [**NEORV32 CPU**](#NEORV32-CPU-Features), compliant to |
* Subset of the *Unprivileged ISA Specification* [(Version 2.2)](https://github.com/stnolting/neorv32/blob/master/docs/riscv-privileged.pdf) |
* Subset of the *Privileged Architecture Specification* [(Version 1.12-draft)](https://github.com/stnolting/neorv32/blob/master/docs/riscv-spec.pdf) |
* Configurable RISC-V CPU extensions |
* Configurable RISC-V-compliant CPU extensions |
* `A` - atomic memory access instructions (optional) |
* `B` - Bit manipulation instructions (optional) |
* `C` - compressed instructions (16-bit) (optional) |
58,6 → 60,7
* core libraries for high-level usage of the provided functions and peripherals |
* application compilation based on [GNU makefiles](https://github.com/stnolting/neorv32/blob/master/sw/example/blink_led/makefile) |
* GCC-based toolchain ([pre-compiled toolchains available](https://github.com/stnolting/riscv_gcc_prebuilt)) |
* bootloader with UART interface console |
* runtime environment |
* several example programs |
* [doxygen-based](https://github.com/stnolting/neorv32/blob/master/docs/doxygen_makefile_sw) documentation: available on [GitHub pages](https://stnolting.github.io/neorv32/files.html) |
91,7 → 94,7
|:----------------- |:----------| |
| [NEORV32 processor](https://github.com/stnolting/neorv32) | [![Processor Check](https://github.com/stnolting/neorv32/workflows/Processor%20Check/badge.svg)](https://github.com/stnolting/neorv32/actions?query=workflow%3A%22Processor+Check%22) | |
| [SW Framework Documentation (online)](https://stnolting.github.io/neorv32/files.html) | [![Doc@GitHub-pages](https://github.com/stnolting/neorv32/workflows/Deploy%20SW%20Framework%20Documentation%20to%20GitHub-Pages/badge.svg)](https://stnolting.github.io/neorv32/files.html) | |
| [Pre-built toolchain](https://github.com/stnolting/riscv_gcc_prebuilt) | [![Test Toolchains](https://github.com/stnolting/riscv_gcc_prebuilt/workflows/Test%20Toolchains/badge.svg)](https://github.com/stnolting/riscv_gcc_prebuilt/actions?query=workflow%3A%22Test+Toolchains%22) | |
| [Pre-built toolchains](https://github.com/stnolting/riscv_gcc_prebuilt) | [![Test Toolchains](https://github.com/stnolting/riscv_gcc_prebuilt/workflows/Test%20Toolchains/badge.svg)](https://github.com/stnolting/riscv_gcc_prebuilt/actions?query=workflow%3A%22Test+Toolchains%22) | |
| [RISC-V compliance test](https://github.com/stnolting/neorv32/blob/master/riscv-compliance/README.md) | [![RISC-V Compliance](https://github.com/stnolting/neorv32/workflows/RISC-V%20Compliance/badge.svg)](https://github.com/stnolting/neorv32/actions?query=workflow%3A%22RISC-V+Compliance%22) | |
|
|
100,17 → 103,16
|
* Use LaTeX for data sheet |
* Further size and performance optimization |
* Add associativity configuration for instruction cache |
* Add *data* cache |
* Further expand associativity configuration of instruction cache (4x/8x set-associativity) |
* Add data cache |
* Burst mode for the external memory/bus interface |
* RISC-V `F` (using `[Zfinx](https://github.com/riscv/riscv-zfinx/blob/master/Zfinx_spec.adoc)`?) CPU extension (single-precision floating point) |
* RISC-V `F` (using [`Zfinx`](https://github.com/riscv/riscv-zfinx/blob/master/Zfinx_spec.adoc)?) CPU extension (single-precision floating point) |
* Add template (HW module + intrinsics skeleton) for custom instructions? |
* Synthesis results (+ wrappers?) for more/specific platforms |
* Implement further RISC-V (or custom?) CPU extensions |
* More support for FreeRTOS (like *all* traps) |
* Port additional RTOSs (like [Zephyr](https://github.com/zephyrproject-rtos/zephyr) or [RIOT](https://www.riot-os.org)) |
* Implement further RISC-V (or custom?) CPU extensions |
* Maybe port [CircuitPython](https://circuitpython.org/) (just for fun) |
* Add debugger ([RISC-V debug spec](https://github.com/riscv/riscv-debug-spec)) |
* Add memory-mapped trigger to testbench to quit simulation (maybe using VHDL2008's `use std.env.finish`?) |
* ... |
* [Ideas?](#ContributeFeedbackQuestions) |
|
267,16 → 269,16
of the CPU's generics is assumed (e.g. no physical memory protection, no hardware performance monitors). |
No constraints were used at all. The `u` and `Zifencei` extensions have a negligible impact on the hardware requirements. |
|
Results generated for hardware version [`1.4.9.10`](https://github.com/stnolting/neorv32/blob/master/CHANGELOG.md). |
Results generated for hardware version [`1.5.0.3`](https://github.com/stnolting/neorv32/blob/master/CHANGELOG.md). |
|
| CPU Configuration | LEs | FFs | Memory bits | DSPs | f_max | |
|:-----------------------------------------|:----------:|:--------:|:-----------:|:----:|:-------:| |
| `rv32i` | 1190 | 512 | 2048 | 0 | 120 MHz | |
| `rv32i` + `u` + `Zicsr` + `Zifencei` | 1927 | 903 | 2048 | 0 | 123 MHz | |
| `rv32im` + `u` + `Zicsr` + `Zifencei` | 2471 | 1148 | 2048 | 0 | 120 MHz | |
| `rv32imc` + `u` + `Zicsr` + `Zifencei` | 2716 | 1165 | 2048 | 0 | 120 MHz | |
| `rv32imac` + `u` + `Zicsr` + `Zifencei` | 2736 | 1168 | 2048 | 0 | 120 MHz | |
| `rv32imacb` + `u` + `Zicsr` + `Zifencei` | 3045 | 1260 | 2048 | 0 | 114 MHz | |
| `rv32i` | 1190 | 512 | 1024 | 0 | 120 MHz | |
| `rv32i` + `u` + `Zicsr` + `Zifencei` | 1927 | 903 | 1024 | 0 | 123 MHz | |
| `rv32im` + `u` + `Zicsr` + `Zifencei` | 2471 | 1148 | 1024 | 0 | 120 MHz | |
| `rv32imc` + `u` + `Zicsr` + `Zifencei` | 2716 | 1165 | 1024 | 0 | 120 MHz | |
| `rv32imac` + `u` + `Zicsr` + `Zifencei` | 2736 | 1168 | 1024 | 0 | 120 MHz | |
| `rv32imacb` + `u` + `Zicsr` + `Zifencei` | 3045 | 1260 | 1024 | 0 | 114 MHz | |
|
Setups with enabled "embedded CPU extension" `E` show the same LUT and FF utilization and identical f_max. However, the size of the register file is cut in half. |
|
283,13 → 285,13
|
### NEORV32 Processor-Internal Peripherals and Memories |
|
Results generated for hardware version [`1.4.9.10`](https://github.com/stnolting/neorv32/blob/master/CHANGELOG.md). |
Results generated for hardware version [`1.5.0.3`](https://github.com/stnolting/neorv32/blob/master/CHANGELOG.md). |
|
| Module | Description | LEs | FFs | Memory bits | DSPs | |
|:----------|:-----------------------------------------------------|----:|----:|------------:|-----:| |
| BOOT ROM | Bootloader ROM (default 4kB) | 3 | 1 | 32 768 | 0 | |
| BUSSWITCH | Mux for CPU I & D interfaces | 65 | 8 | 0 | 0 | |
| iCACHE | Proc.-int. nstruction cache (default 1x4x54 bytes) | 234 | 156 | 8 192 | 0 | |
| i-CACHE | Proc.-int. nstruction cache (default 1x4x64 bytes) | 234 | 156 | 8 192 | 0 | |
| CFU0 | Custom functions unit 0 | - | - | - | - | |
| CFU1 | Custom functions unit 1 | - | - | - | - | |
| DMEM | Processor-internal data memory (default 8kB) | 6 | 2 | 65 536 | 0 | |
342,7 → 344,7
|
~~~ |
**Configuration** |
Hardware: 32kB IMEM, 16kB DMEM, no caches(!), 100MHz clock |
Hardware: 32kB IMEM, 16kB DMEM, no caches, 100MHz clock |
CoreMark: 2000 iterations, MEM_METHOD is MEM_STACK |
Compiler: RISCV32-GCC 10.1.0 (rv32i toolchain) |
Compiler flags: default, see makefile |
459,9 → 461,6
At first you need the **RISC-V GCC toolchain**. You can either [download the sources](https://github.com/riscv/riscv-gnu-toolchain) |
and build the toolchain by yourself, or you can download a prebuilt one and install it. |
|
:warning: Keep in mind that – for instance – a `rv32imc` toolchain only provides library code compiled with compressed and |
`mul`/`div` instructions! Hence, this code cannot be executed (without emulation) on an architecture without these extensions! |
|
To build the toolchain by yourself, follow the official [build instructions](https://github.com/riscv/riscv-gnu-toolchain). |
Make sure to use the `ilp32` or `ilp32e` ABI. |
|
469,7 → 468,13
were compiled on a 64-bit x86 Ubuntu 20.04 LTS (Ubuntu on Windows, actually). Download the toolchain of choice: |
[:octocat: github.com/stnolting/riscv_gcc_prebuilt](https://github.com/stnolting/riscv_gcc_prebuilt) |
|
You can also use the toolchains provided by [SiFive](https://github.com/sifive/freedom-tools/releases). These are 64-bit toolchains that can also emit 32-bit |
RISC-V code. They were compiled for more sophisticated machines (`imac`) so the according hardware extensions are *mandatory* |
|
:warning: Keep in mind that – for instance – a `rv32imc` toolchain only provides library code compiled with compressed and |
`mul`/`div` instructions! Hence, this code cannot be executed (without emulation) on an architecture without these extensions! |
|
|
### Dowload the NEORV32 Project |
|
Get the sources of the NEORV32 Processor project. The simplest way is using `git clone` (suggested for easy project updates via `git pull`): |
653,7 → 658,7
|
"Artix" and "Vivado" are trademarks of Xilinx Inc. |
|
"Cyclone", "Quartus Prime Lite" and "Avalon Bus" are trademarks of Intel Corporation. |
"Cyclone" and "Quartus Prime Lite" are trademarks of Intel Corporation. |
|
"iCE40", "UltraPlus" and "Radiant" are trademarks of Lattice Semiconductor Corporation. |
|
669,7 → 674,6
|
Continous integration provided by [:octocat: GitHub Actions](https://github.com/features/actions) and powered by [GHDL](https://github.com/ghdl/ghdl). |
|
|
![Open Source Hardware Logo https://www.oshwa.org](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/figures/oshw_logo.png) |
|
This project is not affiliated with or endorsed by the Open Source Initiative (https://www.oshwa.org / https://opensource.org). |