OpenCores
URL https://opencores.org/ocsvn/neorv32/neorv32/trunk

Subversion Repositories neorv32

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /neorv32/trunk
    from Rev 30 to Rev 31
    Reverse comparison

Rev 30 → Rev 31

/docs/figures/neorv32_cpu.png Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream
/docs/NEORV32.pdf Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream
/rtl/core/neorv32_cpu.vhd
329,7 → 329,7
ma_instr_o => ma_instr, -- misaligned instruction address
be_instr_o => be_instr, -- bus error on instruction access
-- cpu data access interface --
addr_i => alu_res, -- ALU result -> access address
addr_i => alu_res, -- ALU result -> access address
wdata_i => rs2, -- write data
rdata_o => rdata, -- read data
mar_o => mar, -- current memory address register
/rtl/core/neorv32_cpu_bus.vhd
340,7 → 340,7
be_instr_o <= i_arbiter.err_bus;
 
-- instruction bus (read-only) --
i_bus_addr_o <= fetch_pc_i;
i_bus_addr_o <= fetch_pc_i(data_width_c-1 downto 2) & "00"; -- instruction access is always 4-byte aligned (even for compressed instructions)
i_bus_wdata_o <= (others => '0');
i_bus_ben_o <= (others => '0');
i_bus_we_o <= '0';
/rtl/core/neorv32_cpu_control.vhd
1,9 → 1,10
-- #################################################################################################
-- # << NEORV32 - CPU Control >> #
-- # ********************************************************************************************* #
-- # CPU operation is split into a fetch engine (responsible for fetching an decompressing instr- #
-- # uctions), an execute engine (responsible for actually executing the instructions), an inter- #
-- # rupt and exception handling controller and the RISC-V status and control registers (CSRs). #
-- # CPU operation is split into a fetch engine (responsible for fetching instruction data), an #
-- # issue engine (for recoding compressed instructions and for constructing 32-bit instruction #
-- # words) and an execute engine (responsible for actually executing the instructions), a trap #
-- # handling controller and the RISC-V status and control register set (CSRs). #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License #
-- # #
105,49 → 106,68
architecture neorv32_cpu_control_rtl of neorv32_cpu_control is
 
-- instruction fetch enginge --
type fetch_engine_state_t is (IFETCH_RESET, IFETCH_0, IFETCH_1, IFETCH_2);
type fetch_engine_state_t is (IFETCH_RESET, IFETCH_REQUEST, IFETCH_ISSUE);
type fetch_engine_t is record
state : fetch_engine_state_t;
state_nxt : fetch_engine_state_t;
i_buf : std_ulogic_vector(33 downto 0);
i_buf_nxt : std_ulogic_vector(33 downto 0);
i_buf2 : std_ulogic_vector(33 downto 0);
i_buf2_nxt : std_ulogic_vector(33 downto 0);
ci_input : std_ulogic_vector(15 downto 0); -- input to compressed instr. decoder
i_buf_state : std_ulogic_vector(01 downto 0);
i_buf_state_nxt : std_ulogic_vector(01 downto 0);
pc : std_ulogic_vector(data_width_c-1 downto 0);
pc_add : std_ulogic_vector(data_width_c-1 downto 0);
reset : std_ulogic;
bus_err_ack : std_ulogic;
state : fetch_engine_state_t;
state_nxt : fetch_engine_state_t;
pc : std_ulogic_vector(data_width_c-1 downto 0);
pc_nxt : std_ulogic_vector(data_width_c-1 downto 0);
reset : std_ulogic;
bus_err_ack : std_ulogic;
end record;
signal fetch_engine : fetch_engine_t;
 
-- pre-decoder --
signal ci_instr32 : std_ulogic_vector(31 downto 0);
signal ci_illegal : std_ulogic;
 
-- instrucion prefetch buffer (IPB) --
type ipb_dbuf_t is array (0 to ipb_entries_c-1) of std_ulogic_vector(35 downto 0);
type ipb_data_fifo_t is array (0 to ipb_entries_c-1) of std_ulogic_vector(2+31 downto 0);
type ipb_t is record
wdata : std_ulogic_vector(35 downto 0); -- data (+ status) to be written
we : std_ulogic; -- trigger write
free : std_ulogic; -- free entry available?
wdata : std_ulogic_vector(2+31 downto 0); -- write status (bus_error, align_error) + 32-bit instruction data
we : std_ulogic; -- trigger write
free : std_ulogic; -- free entry available?
clear : std_ulogic; -- clear all entries
--
rdata : std_ulogic_vector(35 downto 0); -- read data (+ status)
re : std_ulogic; -- trigger read
avail : std_ulogic; -- data available?
rdata : std_ulogic_vector(2+31 downto 0); -- read data: status (bus_error, align_error) + 32-bit instruction data
re : std_ulogic; -- read enable
avail : std_ulogic; -- data available?
--
clear : std_ulogic; -- clear all entries
w_pnt : std_ulogic_vector(index_size_f(ipb_entries_c) downto 0); -- write pointer
r_pnt : std_ulogic_vector(index_size_f(ipb_entries_c) downto 0); -- read pointer
empty : std_ulogic;
full : std_ulogic;
--
data : ipb_dbuf_t; -- the data fifo
w_pnt : std_ulogic_vector(index_size_f(ipb_entries_c) downto 0); -- write pointer
r_pnt : std_ulogic_vector(index_size_f(ipb_entries_c) downto 0); -- read pointer
empty : std_ulogic;
full : std_ulogic;
data : ipb_data_fifo_t; -- fifo memory
end record;
signal ipb : ipb_t;
 
-- pre-decoder --
signal ci_instr16 : std_ulogic_vector(15 downto 0);
signal ci_instr32 : std_ulogic_vector(31 downto 0);
signal ci_illegal : std_ulogic;
 
-- instruction issue enginge --
type issue_engine_state_t is (ISSUE_ACTIVE, ISSUE_REALIGN);
type issue_engine_t is record
state : issue_engine_state_t;
state_nxt : issue_engine_state_t;
align : std_ulogic;
align_nxt : std_ulogic;
buf : std_ulogic_vector(2+15 downto 0);
buf_nxt : std_ulogic_vector(2+15 downto 0);
end record;
signal issue_engine : issue_engine_t;
 
-- instruction buffer --
type i_buf_t is record
wdata : std_ulogic_vector(35 downto 0); -- 4-bit status + 32-bit instruction
rdata : std_ulogic_vector(35 downto 0); -- 4-bit status + 32-bit instruction
status : std_ulogic;
clear : std_ulogic;
we : std_ulogic;
re : std_ulogic;
free : std_ulogic;
avail : std_ulogic;
end record;
signal i_buf : i_buf_t;
 
-- instruction execution engine --
type execute_engine_state_t is (SYS_WAIT, DISPATCH, TRAP, EXECUTE, ALU_WAIT, BRANCH, LOADSTORE_0, LOADSTORE_1, LOADSTORE_2, CSR_ACCESS);
type execute_engine_t is record
257,16 → 277,16
begin
 
-- ****************************************************************************************************************************
-- Instruction Fetch
-- Instruction Fetch (always fetches aligned 32-bit chunks of data)
-- ****************************************************************************************************************************
 
-- Fetch Engine FSM Sync ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
-- registers that require a specific reset state --
fetch_engine_fsm_sync_rst: process(rstn_i, clk_i)
fetch_engine_fsm_sync: process(rstn_i, clk_i)
begin
if (rstn_i = '0') then
fetch_engine.state <= IFETCH_RESET;
fetch_engine.pc <= (others => '0');
elsif rising_edge(clk_i) then
if (fetch_engine.reset = '1') then
fetch_engine.state <= IFETCH_RESET;
273,114 → 293,55
else
fetch_engine.state <= fetch_engine.state_nxt;
end if;
fetch_engine.pc <= fetch_engine.pc_nxt;
end if;
end process fetch_engine_fsm_sync_rst;
 
 
-- registers that DO NOT require a specific reset state --
fetch_engine_fsm_sync: process(clk_i)
begin
if rising_edge(clk_i) then
if (fetch_engine.state = IFETCH_RESET) then
fetch_engine.pc <= execute_engine.pc(data_width_c-1 downto 1) & '0'; -- initialize with "real" application PC
else
fetch_engine.pc <= std_ulogic_vector(unsigned(fetch_engine.pc(data_width_c-1 downto 1) & '0') + unsigned(fetch_engine.pc_add(data_width_c-1 downto 1) & '0'));
end if;
--
fetch_engine.i_buf <= fetch_engine.i_buf_nxt;
fetch_engine.i_buf2 <= fetch_engine.i_buf2_nxt;
fetch_engine.i_buf_state <= fetch_engine.i_buf_state_nxt;
end if;
end process fetch_engine_fsm_sync;
 
-- PC output --
fetch_pc_o <= fetch_engine.pc(data_width_c-1 downto 1) & '0';
fetch_pc_o <= fetch_engine.pc(data_width_c-1 downto 1) & '0'; -- half-word aligned
 
 
-- Fetch Engine FSM Comb ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
fetch_engine_fsm_comb: process(fetch_engine, csr, ipb, instr_i, bus_i_wait_i, ci_instr32, ci_illegal, be_instr_i, ma_instr_i)
fetch_engine_fsm_comb: process(fetch_engine, execute_engine, ipb, instr_i, bus_i_wait_i, be_instr_i, ma_instr_i)
begin
-- arbiter defaults --
bus_fast_ir <= '0';
fetch_engine.state_nxt <= fetch_engine.state;
fetch_engine.pc_add <= (others => '0');
fetch_engine.i_buf_nxt <= fetch_engine.i_buf;
fetch_engine.i_buf2_nxt <= fetch_engine.i_buf2;
fetch_engine.i_buf_state_nxt <= fetch_engine.i_buf_state;
fetch_engine.ci_input <= fetch_engine.i_buf2(15 downto 00);
fetch_engine.bus_err_ack <= '0';
bus_fast_ir <= '0';
fetch_engine.state_nxt <= fetch_engine.state;
fetch_engine.pc_nxt <= fetch_engine.pc;
fetch_engine.bus_err_ack <= '0';
 
-- instruction prefetch buffer interface --
ipb.we <= '0';
ipb.wdata <= be_instr_i & ma_instr_i & instr_i(31 downto 0); -- store exception info and instruction word
ipb.clear <= '0';
ipb.wdata <= (others => '0');
 
-- state machine --
case fetch_engine.state is
 
when IFETCH_RESET => -- reset engine, prefetch buffer, get appilcation PC
when IFETCH_RESET => -- reset engine and prefetch buffer, get appilcation PC
-- ------------------------------------------------------------
fetch_engine.i_buf_state_nxt <= (others => '0');
ipb.clear <= '1'; -- clear instruction prefetch buffer
fetch_engine.state_nxt <= IFETCH_0;
fetch_engine.bus_err_ack <= '1'; -- acknowledge any instruction bus errors, the execute engine has to take care of them / terminate current transfer
fetch_engine.bus_err_ack <= '1'; -- acknowledge any instruction bus errors, the execute engine has to take care of them / terminate current transfer
fetch_engine.pc_nxt <= execute_engine.pc(data_width_c-1 downto 1) & '0'; -- initialize with "real" application PC
ipb.clear <= '1'; -- clear prefetch buffer
fetch_engine.state_nxt <= IFETCH_REQUEST;
 
when IFETCH_0 => -- output current PC to bus system, request 32-bit word
when IFETCH_REQUEST => -- output current PC to bus system and request 32-bit (aligned!) instruction data
-- ------------------------------------------------------------
bus_fast_ir <= '1'; -- fast instruction fetch request
fetch_engine.state_nxt <= IFETCH_1;
if (ipb.free = '1') then -- free entry in buffer?
bus_fast_ir <= '1'; -- fast instruction fetch request
fetch_engine.state_nxt <= IFETCH_ISSUE;
end if;
 
when IFETCH_1 => -- store data from memory to buffer(s)
when IFETCH_ISSUE => -- store instruction data to prefetch buffer
-- ------------------------------------------------------------
if (bus_i_wait_i = '0') or (be_instr_i = '1') or (ma_instr_i = '1') then -- wait for bus response
fetch_engine.i_buf_nxt <= be_instr_i & ma_instr_i & instr_i(31 downto 0); -- store data word and exception info
fetch_engine.i_buf2_nxt <= fetch_engine.i_buf;
fetch_engine.i_buf_state_nxt <= fetch_engine.i_buf_state(0) & '1';
if (fetch_engine.i_buf_state(0) = '1') then -- buffer filled?
fetch_engine.state_nxt <= IFETCH_2;
else
fetch_engine.pc_add <= std_ulogic_vector(to_unsigned(4, data_width_c));
fetch_engine.state_nxt <= IFETCH_0; -- get another instruction word
end if;
fetch_engine.bus_err_ack <= '1'; -- acknowledge any instruction bus errors, the execute engine has to take care of them / terminate current transfer
fetch_engine.pc_nxt <= std_ulogic_vector(unsigned(fetch_engine.pc) + 4);
ipb.we <= '1';
fetch_engine.state_nxt <= IFETCH_REQUEST;
end if;
 
when IFETCH_2 => -- construct instruction word and issue
-- ------------------------------------------------------------
fetch_engine.bus_err_ack <= '1'; -- acknowledge any instruction bus errors, the execute engine has to take care of them / terminate current transfer
if (fetch_engine.pc(1) = '0') or (CPU_EXTENSION_RISCV_C = false) then -- 32-bit aligned
fetch_engine.ci_input <= fetch_engine.i_buf2(15 downto 00);
 
if (ipb.free = '1') then -- free entry in buffer?
ipb.we <= '1';
if (fetch_engine.i_buf2(01 downto 00) = "11") or (CPU_EXTENSION_RISCV_C = false) then -- uncompressed
ipb.wdata <= '0' & fetch_engine.i_buf2(33 downto 32) & '0' & fetch_engine.i_buf2(31 downto 0);
fetch_engine.pc_add <= std_ulogic_vector(to_unsigned(4, data_width_c));
fetch_engine.state_nxt <= IFETCH_0;
else -- compressed
ipb.wdata <= ci_illegal & fetch_engine.i_buf2(33 downto 32) & '1' & ci_instr32;
fetch_engine.pc_add <= std_ulogic_vector(to_unsigned(2, data_width_c));
fetch_engine.state_nxt <= IFETCH_2; -- try to get another 16-bit instruction word in next round
end if;
end if;
 
else -- 16-bit aligned
fetch_engine.ci_input <= fetch_engine.i_buf2(31 downto 16);
 
if (ipb.free = '1') then -- free entry in buffer?
ipb.we <= '1';
if (fetch_engine.i_buf2(17 downto 16) = "11") then -- uncompressed and "unaligned"
ipb.wdata <= '0' & fetch_engine.i_buf(33 downto 32) & '0' & fetch_engine.i_buf(15 downto 00) & fetch_engine.i_buf2(31 downto 16);
fetch_engine.pc_add <= std_ulogic_vector(to_unsigned(4, data_width_c));
fetch_engine.state_nxt <= IFETCH_0;
else -- compressed
ipb.wdata <= ci_illegal & fetch_engine.i_buf(33 downto 32) & '1' & ci_instr32;
fetch_engine.pc_add <= std_ulogic_vector(to_unsigned(2, data_width_c));
fetch_engine.state_nxt <= IFETCH_0;
end if;
end if;
end if;
 
when others => -- undefined
-- ------------------------------------------------------------
fetch_engine.state_nxt <= IFETCH_RESET;
389,27 → 350,6
end process fetch_engine_fsm_comb;
 
 
-- Compressed Instructions Recoding -------------------------------------------------------
-- -------------------------------------------------------------------------------------------
neorv32_cpu_decompressor_inst_true:
if (CPU_EXTENSION_RISCV_C = true) generate
neorv32_cpu_decompressor_inst: neorv32_cpu_decompressor
port map (
-- instruction input --
ci_instr16_i => fetch_engine.ci_input, -- compressed instruction input
-- instruction output --
ci_illegal_o => ci_illegal, -- is an illegal compressed instruction
ci_instr32_o => ci_instr32 -- 32-bit decompressed instruction
);
end generate;
 
neorv32_cpu_decompressor_inst_false:
if (CPU_EXTENSION_RISCV_C = false) generate
ci_instr32 <= (others => '0');
ci_illegal <= '0';
end generate;
 
 
-- ****************************************************************************************************************************
-- Instruction Prefetch Buffer
-- ****************************************************************************************************************************
429,7 → 369,7
elsif (ipb.we = '1') then
ipb.w_pnt <= std_ulogic_vector(unsigned(ipb.w_pnt) + 1);
end if;
-- read port --
-- read ports --
if (ipb.clear = '1') then
ipb.r_pnt <= (others => '0');
elsif (ipb.re = '1') then
448,17 → 388,182
end process instr_prefetch_buffer_data;
 
-- async read --
ipb.rdata <= ipb.data(to_integer(unsigned(ipb.r_pnt(ipb.w_pnt'left-1 downto 0))));
ipb.rdata <= ipb.data(to_integer(unsigned(ipb.r_pnt(ipb.r_pnt'left-1 downto 0))));
 
-- status --
ipb.full <= '1' when (ipb.r_pnt(ipb.r_pnt'left) /= ipb.w_pnt(ipb.w_pnt'left)) and (ipb.r_pnt(ipb.r_pnt'left-1 downto 0) = ipb.w_pnt(ipb.w_pnt'left-1 downto 0)) else '0';
ipb.empty <= '1' when (ipb.r_pnt(ipb.r_pnt'left) = ipb.w_pnt(ipb.w_pnt'left)) and (ipb.r_pnt(ipb.r_pnt'left-1 downto 0) = ipb.w_pnt(ipb.w_pnt'left-1 downto 0)) else '0';
 
ipb.free <= not ipb.full;
ipb.avail <= not ipb.empty;
 
 
-- ****************************************************************************************************************************
-- Instruction Issue (recoding of compressed instructions and 32-bit instruction word construction)
-- ****************************************************************************************************************************
 
 
-- Issue Engine FSM Sync ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
issue_engine_fsm_sync: process(rstn_i, clk_i)
begin
if (rstn_i = '0') then
issue_engine.state <= ISSUE_ACTIVE;
issue_engine.align <= CPU_BOOT_ADDR(1);
issue_engine.buf <= (others => '0');
elsif rising_edge(clk_i) then
if (ipb.clear = '1') then
if (CPU_EXTENSION_RISCV_C = true) then
if (execute_engine.pc(1) = '1') then -- branch to unaligned address?
issue_engine.state <= ISSUE_REALIGN;
issue_engine.align <= '1'; -- aligned on 16-bit boundary
else
issue_engine.state <= issue_engine.state_nxt;
issue_engine.align <= '0'; -- aligned on 32-bit boundary
end if;
else
issue_engine.state <= issue_engine.state_nxt;
issue_engine.align <= '0'; -- always aligned on 32-bit boundaries
end if;
else
issue_engine.state <= issue_engine.state_nxt;
issue_engine.align <= issue_engine.align_nxt;
end if;
issue_engine.buf <= issue_engine.buf_nxt;
end if;
end process issue_engine_fsm_sync;
 
 
-- Issue Engine FSM Comb ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
issue_engine_fsm_comb: process(issue_engine, ipb, i_buf, execute_engine, ci_illegal, ci_instr32)
begin
-- arbiter defaults --
issue_engine.state_nxt <= issue_engine.state;
issue_engine.align_nxt <= issue_engine.align;
issue_engine.buf_nxt <= issue_engine.buf;
 
-- instruction prefetch buffer interface defaults --
ipb.re <= '0';
 
-- instruction buffer interface defaults --
i_buf.we <= '0';
i_buf.wdata <= '0' & ipb.rdata(33 downto 32) & '0' & ipb.rdata(31 downto 0);
 
-- state machine --
case issue_engine.state is
 
when ISSUE_ACTIVE => -- issue instruction if available
-- ------------------------------------------------------------
if (ipb.avail = '1') then -- instructions available?
 
if (issue_engine.align = '0') or (CPU_EXTENSION_RISCV_C = false) then -- begin check in LOW instruction half-word
if (i_buf.free = '1') then
issue_engine.buf_nxt <= ipb.rdata(33 downto 32) & ipb.rdata(31 downto 16); -- store high half-word - we might need it for an unaligned uncompressed instruction
if (ipb.rdata(1 downto 0) = "11") or (CPU_EXTENSION_RISCV_C = false) then -- uncompressed and "aligned"
ipb.re <= '1';
i_buf.wdata <= '0' & ipb.rdata(33 downto 32) & '0' & ipb.rdata(31 downto 0);
i_buf.we <= '1';
else -- compressed
ipb.re <= '1';
i_buf.wdata <= ci_illegal & ipb.rdata(33 downto 32) & '1' & ci_instr32;
i_buf.we <= '1';
issue_engine.align_nxt <= '1';
end if;
end if;
 
else -- begin check in HIGH instruction half-word
if (i_buf.free = '1') then
issue_engine.buf_nxt <= ipb.rdata(33 downto 32) & ipb.rdata(31 downto 16); -- store high half-word - we might need it for an unaligned uncompressed instruction
if (issue_engine.buf(1 downto 0) = "11") then -- uncompressed and "unaligned"
ipb.re <= '1';
i_buf.wdata <= '0' & issue_engine.buf(17 downto 16) & '0' & (ipb.rdata(15 downto 0) & issue_engine.buf(15 downto 0));
i_buf.we <= '1';
else -- compressed
--ipb.re <= '1';
i_buf.wdata <= ci_illegal & ipb.rdata(33 downto 32) & '1' & ci_instr32;
i_buf.we <= '1';
issue_engine.align_nxt <= '0';
end if;
end if;
end if;
end if;
 
when ISSUE_REALIGN => -- re-align input fifos after a branch to an unaligned address
-- ------------------------------------------------------------
issue_engine.buf_nxt <= ipb.rdata(33 downto 32) & ipb.rdata(31 downto 16);
if (ipb.avail = '1') then -- instructions available?
ipb.re <= '1';
issue_engine.state_nxt <= ISSUE_ACTIVE;
end if;
 
when others => -- undefined
-- ------------------------------------------------------------
issue_engine.state_nxt <= ISSUE_ACTIVE;
 
end case;
end process issue_engine_fsm_comb;
 
-- 16-bit instruction: half-word select --
ci_instr16 <= ipb.rdata(15 downto 0) when (issue_engine.align = '0') else issue_engine.buf(15 downto 0);
 
 
-- Compressed Instructions Recoding -------------------------------------------------------
-- -------------------------------------------------------------------------------------------
neorv32_cpu_decompressor_inst_true:
if (CPU_EXTENSION_RISCV_C = true) generate
neorv32_cpu_decompressor_inst: neorv32_cpu_decompressor
port map (
-- instruction input --
ci_instr16_i => ci_instr16, -- compressed instruction input
-- instruction output --
ci_illegal_o => ci_illegal, -- is an illegal compressed instruction
ci_instr32_o => ci_instr32 -- 32-bit decompressed instruction
);
end generate;
 
neorv32_cpu_decompressor_inst_false:
if (CPU_EXTENSION_RISCV_C = false) generate
ci_instr32 <= (others => '0');
ci_illegal <= '0';
end generate;
 
 
-- Instruction Buffer ---------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
instruction_buffer_ctrl: process(rstn_i, clk_i)
begin
if (rstn_i = '0') then
i_buf.status <= '0';
elsif rising_edge(clk_i) then
if (i_buf.clear = '1') then
i_buf.status <= '0';
elsif (i_buf.we = '1') then
i_buf.status <= '1';
elsif (i_buf.re = '1') then
i_buf.status <= '0';
end if;
end if;
end process instruction_buffer_ctrl;
 
instruction_buffer_data: process(clk_i)
begin
if rising_edge(clk_i) then
if (i_buf.we = '1') and (ipb.clear = '0') then
i_buf.rdata <= i_buf.wdata;
end if;
end if;
end process instruction_buffer_data;
 
-- status --
i_buf.free <= not i_buf.status;
i_buf.avail <= i_buf.status;
 
-- clear i_buf when clearing ipb --
i_buf.clear <= ipb.clear;
 
 
-- ****************************************************************************************************************************
-- Instruction Execution
-- ****************************************************************************************************************************
 
584,7 → 689,7
 
-- Execute Engine FSM Comb ----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
execute_engine_fsm_comb: process(execute_engine, fetch_engine, ipb, trap_ctrl, csr, ctrl, csr_acc_valid,
execute_engine_fsm_comb: process(execute_engine, fetch_engine, i_buf, trap_ctrl, csr, ctrl, csr_acc_valid,
alu_res_i, alu_wait_i, bus_d_wait_i, ma_load_i, be_load_i, ma_store_i, be_store_i)
variable alu_immediate_v : std_ulogic;
variable rs1_is_r0_v : std_ulogic;
602,7 → 707,7
 
-- instruction dispatch --
fetch_engine.reset <= '0';
ipb.re <= '0';
i_buf.re <= '0';
 
-- trap environment control --
trap_ctrl.env_start_ack <= '0';
658,28 → 763,28
--
execute_engine.state_nxt <= DISPATCH;
 
when DISPATCH => -- Get new command from instruction prefetch buffer (IPB)
when DISPATCH => -- Get new command from instruction buffer (I_BUF)
-- ------------------------------------------------------------
ctrl_nxt(ctrl_rf_rd_adr4_c downto ctrl_rf_rd_adr0_c) <= ipb.rdata(instr_rd_msb_c downto instr_rd_lsb_c); -- rd addr
ctrl_nxt(ctrl_rf_rs1_adr4_c downto ctrl_rf_rs1_adr0_c) <= ipb.rdata(instr_rs1_msb_c downto instr_rs1_lsb_c); -- rs1 addr
ctrl_nxt(ctrl_rf_rs2_adr4_c downto ctrl_rf_rs2_adr0_c) <= ipb.rdata(instr_rs2_msb_c downto instr_rs2_lsb_c); -- rs2 addr
ctrl_nxt(ctrl_rf_rd_adr4_c downto ctrl_rf_rd_adr0_c) <= i_buf.rdata(instr_rd_msb_c downto instr_rd_lsb_c); -- rd addr
ctrl_nxt(ctrl_rf_rs1_adr4_c downto ctrl_rf_rs1_adr0_c) <= i_buf.rdata(instr_rs1_msb_c downto instr_rs1_lsb_c); -- rs1 addr
ctrl_nxt(ctrl_rf_rs2_adr4_c downto ctrl_rf_rs2_adr0_c) <= i_buf.rdata(instr_rs2_msb_c downto instr_rs2_lsb_c); -- rs2 addr
--
if (ipb.avail = '1') then -- instruction available?
ipb.re <= '1';
if (i_buf.avail = '1') then -- instruction available?
i_buf.re <= '1';
--
execute_engine.is_ci_nxt <= ipb.rdata(32); -- flag to indicate this is a de-compressed instruction beeing executed
execute_engine.i_reg_nxt <= ipb.rdata(31 downto 0);
execute_engine.is_ci_nxt <= i_buf.rdata(32); -- flag to indicate this is a de-compressed instruction beeing executed
execute_engine.i_reg_nxt <= i_buf.rdata(31 downto 0);
execute_engine.if_rst_nxt <= '0';
--
trap_ctrl.instr_ma <= ipb.rdata(33); -- misaligned instruction fetch address
trap_ctrl.instr_be <= ipb.rdata(34); -- bus access fault during instrucion fetch
illegal_compressed <= ipb.rdata(35); -- invalid decompressed instruction
trap_ctrl.instr_ma <= i_buf.rdata(33); -- misaligned instruction fetch address
trap_ctrl.instr_be <= i_buf.rdata(34); -- bus access fault during instrucion fetch
illegal_compressed <= i_buf.rdata(35); -- invalid decompressed instruction
--
if (execute_engine.if_rst = '0') then -- if there was NO non-linear PC modification
execute_engine.pc_nxt <= execute_engine.next_pc;
end if;
--
if (execute_engine.sleep = '1') or (trap_ctrl.env_start = '1') or ((ipb.rdata(33) or ipb.rdata(34)) = '1') then
if (execute_engine.sleep = '1') or (trap_ctrl.env_start = '1') or ((i_buf.rdata(33) or i_buf.rdata(34)) = '1') then
execute_engine.state_nxt <= TRAP;
else
execute_engine.state_nxt <= EXECUTE;
/rtl/core/neorv32_package.vhd
41,7 → 41,7
-- Architecture Constants -----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
constant data_width_c : natural := 32; -- data width - do not change!
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01040405"; -- no touchy!
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01040408"; -- no touchy!
constant pmp_max_r_c : natural := 8; -- max PMP regions - FIXED!
 
-- Architecture Configuration -------------------------------------------------------------
49,6 → 49,7
constant ispace_base_c : std_ulogic_vector(data_width_c-1 downto 0) := x"00000000"; -- default instruction memory address space base address
constant dspace_base_c : std_ulogic_vector(data_width_c-1 downto 0) := x"80000000"; -- default data memory address space base address
constant bus_timeout_c : natural := 127; -- cycles after which a valid bus access will timeout and triggers an access exception
constant wb_pipe_mode_c : boolean := false; -- false: classic/standard wishbone mode, true: pipelined wishbone mode (better timing)
constant ipb_entries_c : natural := 2; -- entries in instruction prefetch buffer, must be a power of 2, default=2
constant rf_r0_is_reg_c : boolean := true; -- reg_file.r0 is a physical register that has to be initialized to zero
 
1057,6 → 1058,7
component neorv32_wishbone
generic (
INTERFACE_REG_STAGES : natural := 2; -- number of interface register stages (0,1,2)
WB_PIPELINED_MODE : boolean := false; -- false: classic/standard wishbone mode, true: pipelined wishbone mode
-- Internal instruction memory --
MEM_INT_IMEM_USE : boolean := true; -- implement processor-internal instruction memory
MEM_INT_IMEM_SIZE : natural := 8*1024; -- size of processor-internal instruction memory in bytes
/rtl/core/neorv32_top.vhd
511,6 → 511,7
neorv32_wishbone_inst: neorv32_wishbone
generic map (
INTERFACE_REG_STAGES => MEM_EXT_REG_STAGES, -- number of interface register stages (0,1,2)
WB_PIPELINED_MODE => wb_pipe_mode_c, -- false: classic/standard wishbone mode, true: pipelined wishbone mode
-- Internal instruction memory --
MEM_INT_IMEM_USE => MEM_INT_IMEM_USE, -- implement processor-internal instruction memory
MEM_INT_IMEM_SIZE => MEM_INT_IMEM_SIZE, -- size of processor-internal instruction memory in bytes
/rtl/core/neorv32_wishbone.vhd
3,7 → 3,9
-- # ********************************************************************************************* #
-- # The interface is either unregistered (INTERFACE_REG_STAGES = 0), only outgoing signals are #
-- # registered (INTERFACE_REG_STAGES = 1) or incoming and outgoing signals are registered #
-- # (INTERFACE_REG_STAGES = 2). #
-- # (INTERFACE_REG_STAGES = 2). This interface supports classic/standard Wishbone transactions #
-- # (WB_PIPELINED_MODE = false) and also pipelined transactions for improved timing #
-- # (WB_PIPELINED_MODE = true). #
-- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
-- # All bus accesses from the CPU, which do not target the internal IO region, the internal boot- #
-- # loader or the internal instruction or data memories (if implemented), are delegated via this #
50,6 → 52,7
entity neorv32_wishbone is
generic (
INTERFACE_REG_STAGES : natural := 2; -- number of interface register stages (0,1,2)
WB_PIPELINED_MODE : boolean := false; -- false: classic/standard wishbone mode, true: pipelined wishbone mode
-- Internal instruction memory --
MEM_INT_IMEM_USE : boolean := true; -- implement processor-internal instruction memory
MEM_INT_IMEM_SIZE : natural := 8*1024; -- size of processor-internal instruction memory in bytes
95,6 → 98,7
signal rb_en : std_ulogic;
 
-- bus arbiter --
signal wb_we_ff : std_ulogic;
signal wb_stb_ff0 : std_ulogic;
signal wb_stb_ff1 : std_ulogic;
signal wb_cyc_ff : std_ulogic;
101,6 → 105,10
signal wb_ack_ff : std_ulogic;
signal wb_err_ff : std_ulogic;
 
-- wishbone mode: standard / pipelined --
signal stb_int_std : std_ulogic;
signal stb_int_pipe : std_ulogic;
 
-- data read-back --
signal wb_rdata : std_ulogic_vector(31 downto 0);
 
115,11 → 123,14
-- Access Control -------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
-- access to internal IMEM or DMEM? --
int_imem_acc <= '1' when (addr_i >= imem_base_c) and (addr_i < std_ulogic_vector(unsigned(imem_base_c) + MEM_INT_IMEM_SIZE)) else '0';
int_dmem_acc <= '1' when (addr_i >= dmem_base_c) and (addr_i < std_ulogic_vector(unsigned(dmem_base_c) + MEM_INT_DMEM_SIZE)) else '0';
int_imem_acc <= '1' when (addr_i(31 downto index_size_f(MEM_INT_IMEM_SIZE)) = imem_base_c(31 downto index_size_f(MEM_INT_IMEM_SIZE))) else '0';
int_dmem_acc <= '1' when (addr_i(31 downto index_size_f(MEM_INT_DMEM_SIZE)) = dmem_base_c(31 downto index_size_f(MEM_INT_DMEM_SIZE))) else '0';
int_imem_acc_real <= int_imem_acc when (MEM_INT_IMEM_USE = true) else '0';
int_dmem_acc_real <= int_dmem_acc when (MEM_INT_DMEM_USE = true) else '0';
 
-- access to internal BOOTROM or IO devices? --
int_boot_acc <= '1' when (addr_i >= boot_rom_base_c) else '0'; -- this also covers access to the IO space
--int_boot_acc <= '1' when (addr_i(31 downto index_size_f(2*boot_rom_max_size_c)) = boot_rom_base_c(31 downto index_size_f(2*boot_rom_max_size_c))) else '0'; -- this also covers access to the IO space
--int_io_acc <= '1' when (addr_i >= io_base_c) else '0';
 
-- actual external bus access? --
131,6 → 142,7
bus_arbiter: process(rstn_i, clk_i)
begin
if (rstn_i = '0') then
wb_we_ff <= '0';
wb_cyc_ff <= '0';
wb_stb_ff1 <= '0';
wb_stb_ff0 <= '0';
139,6 → 151,8
wb_access_ff <= '0';
wb_access_ff_ff <= '0';
elsif rising_edge(clk_i) then
-- read/write --
wb_we_ff <= (wb_we_ff or wren_i) and wb_access and (not wb_ack_i) and (not wb_err_i) and (not cancel_i);
-- bus cycle --
if (INTERFACE_REG_STAGES = 0) then
wb_cyc_ff <= '0'; -- unused
162,11 → 176,14
end if;
end process bus_arbiter;
 
-- bus cycle --
-- valid bus cycle --
wb_cyc_o <= wb_access when (INTERFACE_REG_STAGES = 0) else wb_cyc_ff;
 
-- bus_strobe: rising edge detector --
wb_stb_o <= (wb_access and (not wb_stb_ff0)) when (INTERFACE_REG_STAGES = 0) else (wb_stb_ff0 and (not wb_stb_ff1));
-- bus strobe --
stb_int_std <= wb_access when (INTERFACE_REG_STAGES = 0) else wb_cyc_ff; -- same as wb_cyc
stb_int_pipe <= (wb_access and (not wb_stb_ff0)) when (INTERFACE_REG_STAGES = 0) else (wb_stb_ff0 and (not wb_stb_ff1)); -- wb_access rising edge detector
--
wb_stb_o <= stb_int_std when (WB_PIPELINED_MODE = false) else stb_int_pipe; -- standard or pipelined mode
 
-- cpu ack --
ack_o <= wb_ack_ff when (INTERFACE_REG_STAGES = 2) else wb_ack_i;
187,7 → 204,7
wb_adr_o <= addr_i;
wb_dat_o <= data_i;
wb_sel_o <= ben_i;
wb_we_o <= wren_i;
wb_we_o <= wren_i or wb_we_ff;
end generate;
 
interface_reg_level_one:
199,7 → 216,7
wb_adr_o <= addr_i;
wb_dat_o <= data_i;
wb_sel_o <= ben_i;
wb_we_o <= wren_i;
wb_we_o <= wren_i or wb_we_ff;
end if;
end if;
end process buffer_stages_one;
215,7 → 232,7
wb_adr_o <= addr_i;
wb_dat_o <= data_i;
wb_sel_o <= ben_i;
wb_we_o <= wren_i;
wb_we_o <= wren_i or wb_we_ff;
end if;
if (wb_ack_i = '1') then
wb_rdata <= wb_dat_i;
/sim/ghdl/ghdl_sim.sh
31,7 → 31,7
 
# Just a hint
echo ""
echo "Tip: Compile application with USER_FLAGS+=-UART_SIM_MODE to have UART/console via direct simulation output."
echo "Tip: Compile application with USER_FLAGS+=-DUART_SIM_MODE to auto-enable UART's SIM MODE."
echo ""
 
# Analyse sources; libs and images at first!
/sim/vivado/neorv32_tb_behav.wcfg
12,15 → 12,15
</db_ref>
</db_ref_list>
<zoom_setting>
<ZoomStartTime time="820966fs"></ZoomStartTime>
<ZoomEndTime time="923467fs"></ZoomEndTime>
<Cursor1Time time="866866fs"></Cursor1Time>
<ZoomStartTime time="953250fs"></ZoomStartTime>
<ZoomEndTime time="1057351fs"></ZoomEndTime>
<Cursor1Time time="997350fs"></Cursor1Time>
</zoom_setting>
<column_width_setting>
<NameColumnWidth column_width="203"></NameColumnWidth>
<ValueColumnWidth column_width="94"></ValueColumnWidth>
<ValueColumnWidth column_width="78"></ValueColumnWidth>
</column_width_setting>
<WVObjectSize size="112" />
<WVObjectSize size="111" />
<wvobject type="divider" fp_name="divider273">
<obj_property name="label">CPU: Control.FETCH</obj_property>
<obj_property name="DisplayName">label</obj_property>
70,11 → 70,121
<obj_property name="ElementShortName">ipb</obj_property>
<obj_property name="ObjectShortName">ipb</obj_property>
<obj_property name="isExpanded"></obj_property>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.wdata" type="array">
<obj_property name="ElementShortName">.wdata[33:0]</obj_property>
<obj_property name="ObjectShortName">.wdata[33:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.we" type="logic">
<obj_property name="ElementShortName">.we</obj_property>
<obj_property name="ObjectShortName">.we</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.free" type="logic">
<obj_property name="ElementShortName">.free</obj_property>
<obj_property name="ObjectShortName">.free</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.clear" type="logic">
<obj_property name="ElementShortName">.clear</obj_property>
<obj_property name="ObjectShortName">.clear</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.rdata" type="array">
<obj_property name="ElementShortName">.rdata[33:0]</obj_property>
<obj_property name="ObjectShortName">.rdata[33:0]</obj_property>
<obj_property name="CustomSignalColor">#FFFFFF</obj_property>
<obj_property name="UseCustomSignalColor">true</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.re" type="logic">
<obj_property name="ElementShortName">.re</obj_property>
<obj_property name="ObjectShortName">.re</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.avail" type="logic">
<obj_property name="ElementShortName">.avail</obj_property>
<obj_property name="ObjectShortName">.avail</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.w_pnt" type="array">
<obj_property name="ElementShortName">.w_pnt[1:0]</obj_property>
<obj_property name="ObjectShortName">.w_pnt[1:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.r_pnt" type="array">
<obj_property name="ElementShortName">.r_pnt[1:0]</obj_property>
<obj_property name="ObjectShortName">.r_pnt[1:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.empty" type="logic">
<obj_property name="ElementShortName">.empty</obj_property>
<obj_property name="ObjectShortName">.empty</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.full" type="logic">
<obj_property name="ElementShortName">.full</obj_property>
<obj_property name="ObjectShortName">.full</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.data" type="array">
<obj_property name="ElementShortName">.data[0:1][33:0]</obj_property>
<obj_property name="ObjectShortName">.data[0:1][33:0]</obj_property>
</wvobject>
</wvobject>
<wvobject type="divider" fp_name="divider273">
<obj_property name="label">CPU: Control.EXE</obj_property>
<obj_property name="label">CPU: Control.ISSUE</obj_property>
<obj_property name="DisplayName">label</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/issue_engine" type="array">
<obj_property name="ElementShortName">issue_engine</obj_property>
<obj_property name="ObjectShortName">issue_engine</obj_property>
<obj_property name="isExpanded"></obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ci_instr16" type="array">
<obj_property name="ElementShortName">ci_instr16[15:0]</obj_property>
<obj_property name="ObjectShortName">ci_instr16[15:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ci_instr32" type="array">
<obj_property name="ElementShortName">ci_instr32[31:0]</obj_property>
<obj_property name="ObjectShortName">ci_instr32[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ci_illegal" type="logic">
<obj_property name="ElementShortName">ci_illegal</obj_property>
<obj_property name="ObjectShortName">ci_illegal</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf" type="array">
<obj_property name="ElementShortName">i_buf</obj_property>
<obj_property name="ObjectShortName">i_buf</obj_property>
<obj_property name="isExpanded"></obj_property>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.wdata" type="array">
<obj_property name="ElementShortName">.wdata[35:0]</obj_property>
<obj_property name="ObjectShortName">.wdata[35:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.rdata" type="array">
<obj_property name="ElementShortName">.rdata[35:0]</obj_property>
<obj_property name="ObjectShortName">.rdata[35:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.status" type="logic">
<obj_property name="ElementShortName">.status</obj_property>
<obj_property name="ObjectShortName">.status</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.clear" type="logic">
<obj_property name="ElementShortName">.clear</obj_property>
<obj_property name="ObjectShortName">.clear</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.we" type="logic">
<obj_property name="ElementShortName">.we</obj_property>
<obj_property name="ObjectShortName">.we</obj_property>
<obj_property name="CustomSignalColor">#FFFFFF</obj_property>
<obj_property name="UseCustomSignalColor">true</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.re" type="logic">
<obj_property name="ElementShortName">.re</obj_property>
<obj_property name="ObjectShortName">.re</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.free" type="logic">
<obj_property name="ElementShortName">.free</obj_property>
<obj_property name="ObjectShortName">.free</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.avail" type="logic">
<obj_property name="ElementShortName">.avail</obj_property>
<obj_property name="ObjectShortName">.avail</obj_property>
</wvobject>
</wvobject>
<wvobject type="divider" fp_name="divider273">
<obj_property name="label">CPU: Control.EXECUTE</obj_property>
<obj_property name="DisplayName">label</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/alu_wait_i" type="logic">
<obj_property name="ElementShortName">alu_wait_i</obj_property>
<obj_property name="ObjectShortName">alu_wait_i</obj_property>
96,8 → 206,8
<obj_property name="ObjectShortName">be_store_i</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ctrl_o" type="array">
<obj_property name="ElementShortName">ctrl_o[49:0]</obj_property>
<obj_property name="ObjectShortName">ctrl_o[49:0]</obj_property>
<obj_property name="ElementShortName">ctrl_o[45:0]</obj_property>
<obj_property name="ObjectShortName">ctrl_o[45:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ci_instr32" type="array">
<obj_property name="ElementShortName">ci_instr32[31:0]</obj_property>
127,6 → 237,96
<obj_property name="ElementShortName">execute_engine</obj_property>
<obj_property name="ObjectShortName">execute_engine</obj_property>
<obj_property name="isExpanded"></obj_property>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.state" type="other">
<obj_property name="ElementShortName">.state</obj_property>
<obj_property name="ObjectShortName">.state</obj_property>
<obj_property name="CustomSignalColor">#FFFFFF</obj_property>
<obj_property name="UseCustomSignalColor">true</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.state_prev" type="other">
<obj_property name="ElementShortName">.state_prev</obj_property>
<obj_property name="ObjectShortName">.state_prev</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.state_nxt" type="other">
<obj_property name="ElementShortName">.state_nxt</obj_property>
<obj_property name="ObjectShortName">.state_nxt</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.i_reg" type="array">
<obj_property name="ElementShortName">.i_reg[31:0]</obj_property>
<obj_property name="ObjectShortName">.i_reg[31:0]</obj_property>
<obj_property name="CustomSignalColor">#FFFFFF</obj_property>
<obj_property name="UseCustomSignalColor">true</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.i_reg_nxt" type="array">
<obj_property name="ElementShortName">.i_reg_nxt[31:0]</obj_property>
<obj_property name="ObjectShortName">.i_reg_nxt[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.is_ci" type="logic">
<obj_property name="ElementShortName">.is_ci</obj_property>
<obj_property name="ObjectShortName">.is_ci</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.is_ci_nxt" type="logic">
<obj_property name="ElementShortName">.is_ci_nxt</obj_property>
<obj_property name="ObjectShortName">.is_ci_nxt</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.is_jump" type="logic">
<obj_property name="ElementShortName">.is_jump</obj_property>
<obj_property name="ObjectShortName">.is_jump</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.is_jump_nxt" type="logic">
<obj_property name="ElementShortName">.is_jump_nxt</obj_property>
<obj_property name="ObjectShortName">.is_jump_nxt</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.is_cp_op" type="logic">
<obj_property name="ElementShortName">.is_cp_op</obj_property>
<obj_property name="ObjectShortName">.is_cp_op</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.is_cp_op_nxt" type="logic">
<obj_property name="ElementShortName">.is_cp_op_nxt</obj_property>
<obj_property name="ObjectShortName">.is_cp_op_nxt</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.branch_taken" type="logic">
<obj_property name="ElementShortName">.branch_taken</obj_property>
<obj_property name="ObjectShortName">.branch_taken</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.pc" type="array">
<obj_property name="ElementShortName">.pc[31:0]</obj_property>
<obj_property name="ObjectShortName">.pc[31:0]</obj_property>
<obj_property name="CustomSignalColor">#FFFFFF</obj_property>
<obj_property name="UseCustomSignalColor">true</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.pc_nxt" type="array">
<obj_property name="ElementShortName">.pc_nxt[31:0]</obj_property>
<obj_property name="ObjectShortName">.pc_nxt[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.next_pc" type="array">
<obj_property name="ElementShortName">.next_pc[31:0]</obj_property>
<obj_property name="ObjectShortName">.next_pc[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.last_pc" type="array">
<obj_property name="ElementShortName">.last_pc[31:0]</obj_property>
<obj_property name="ObjectShortName">.last_pc[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.last_pc_nxt" type="array">
<obj_property name="ElementShortName">.last_pc_nxt[31:0]</obj_property>
<obj_property name="ObjectShortName">.last_pc_nxt[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.sleep" type="logic">
<obj_property name="ElementShortName">.sleep</obj_property>
<obj_property name="ObjectShortName">.sleep</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.sleep_nxt" type="logic">
<obj_property name="ElementShortName">.sleep_nxt</obj_property>
<obj_property name="ObjectShortName">.sleep_nxt</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.if_rst" type="logic">
<obj_property name="ElementShortName">.if_rst</obj_property>
<obj_property name="ObjectShortName">.if_rst</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.if_rst_nxt" type="logic">
<obj_property name="ElementShortName">.if_rst_nxt</obj_property>
<obj_property name="ObjectShortName">.if_rst_nxt</obj_property>
</wvobject>
</wvobject>
<wvobject type="divider" fp_name="divider139">
<obj_property name="label">CPU: Control.TRAP</obj_property>
174,18 → 374,10
<obj_property name="ElementShortName">imm_i[31:0]</obj_property>
<obj_property name="ObjectShortName">imm_i[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_alu_inst/csr_i" type="array">
<obj_property name="ElementShortName">csr_i[31:0]</obj_property>
<obj_property name="ObjectShortName">csr_i[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_alu_inst/cmp_o" type="array">
<obj_property name="ElementShortName">cmp_o[1:0]</obj_property>
<obj_property name="ObjectShortName">cmp_o[1:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_alu_inst/add_o" type="array">
<obj_property name="ElementShortName">add_o[31:0]</obj_property>
<obj_property name="ObjectShortName">add_o[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_alu_inst/res_o" type="array">
<obj_property name="ElementShortName">res_o[31:0]</obj_property>
<obj_property name="ObjectShortName">res_o[31:0]</obj_property>
202,10 → 394,6
<obj_property name="ElementShortName">opb[31:0]</obj_property>
<obj_property name="ObjectShortName">opb[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_alu_inst/opc" type="array">
<obj_property name="ElementShortName">opc[31:0]</obj_property>
<obj_property name="ObjectShortName">opc[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_alu_inst/shifter" type="array">
<obj_property name="ElementShortName">shifter</obj_property>
<obj_property name="ObjectShortName">shifter</obj_property>
281,10 → 469,12
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/cpu_i" type="array">
<obj_property name="ElementShortName">cpu_i</obj_property>
<obj_property name="ObjectShortName">cpu_i</obj_property>
<obj_property name="isExpanded"></obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/cpu_d" type="array">
<obj_property name="ElementShortName">cpu_d</obj_property>
<obj_property name="ObjectShortName">cpu_d</obj_property>
<obj_property name="isExpanded"></obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_bus_inst/PMP_USE" type="other">
<obj_property name="ElementShortName">PMP_USE</obj_property>
391,22 → 581,6
<obj_property name="ObjectShortName">mtime_hi[31:0]</obj_property>
</wvobject>
<wvobject type="divider" fp_name="divider238">
<obj_property name="label">IO: DEVNULL</obj_property>
<obj_property name="DisplayName">label</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_devnull_inst_true/neorv32_devnull_inst/wren_i" type="logic">
<obj_property name="ElementShortName">wren_i</obj_property>
<obj_property name="ObjectShortName">wren_i</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_devnull_inst_true/neorv32_devnull_inst/data_i" type="array">
<obj_property name="ElementShortName">data_i[31:0]</obj_property>
<obj_property name="ObjectShortName">data_i[31:0]</obj_property>
</wvobject>
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_devnull_inst_true/neorv32_devnull_inst/acc_en" type="logic">
<obj_property name="ElementShortName">acc_en</obj_property>
<obj_property name="ObjectShortName">acc_en</obj_property>
</wvobject>
<wvobject type="divider" fp_name="divider238">
<obj_property name="label">EXT_MEM Interface</obj_property>
<obj_property name="DisplayName">label</obj_property>
</wvobject>
/sim/neorv32_tb.vhd
126,7 → 126,7
-- How to simulate a boot from an external memory --
-- ---------------------------------------------- --
-- The simulated Wishbone memory can be initialized with the compiled application init.
-- 1. Uncomment the init_wbmen function below
-- 1. Uncomment the init_wbmen function below; this will initialize the simulated wishbone memory with the neorv32_application_image.vhd image
-- 2. Increase the wb_mem_size_c constant above to (at least) the size of the application image (like 16kB)
-- 3. Disable the processor-internal IMEM in the processor instantiation below (MEM_INT_IMEM_USE => false)
-- 4. Set the Wishbone memory base address wb_mem_base_addr_c (above) to zero (constant wb_mem_base_addr_c : std_ulogic_vector(31 downto 0) := x"00000000";)
320,7 → 320,7
 
-- output to cpu --
wb_cpu.rdata <= wb_mem.rdata(wb_mem_latency_c-1) when (wb_mem.rb_en(wb_mem_latency_c-1) = '1') else (others=> '0'); -- data output gate
wb_cpu.ack <= wb_mem.ack(wb_mem_latency_c-1);
wb_cpu.ack <= wb_mem.ack(wb_mem_latency_c-1) and wb_cpu.cyc; -- another AND for classic/standard wishbone transactions
wb_cpu.err <= '0';
 
 
/sw/common/neorv32.ld
55,7 → 55,7
/* "rom" section: first value of ORIGIN/LENGHT => bootloader ROM; second value of ORIGIN/LENGHT => instruction memory */
 
rom (rx) : ORIGIN = DEFINED(make_bootloader) ? 0xFFFF0000 : 0x00000000, LENGTH = DEFINED(make_bootloader) ? 4*1024 : 16*1024
ram (rwx) : ORIGIN = 0x80000000, LENGTH = 8*1024
ram (rwx) : ORIGIN = 0x80000000, LENGTH = 8*1024
}
/* ************************************************************************* */
 
/sw/example/cpu_test/main.c
1050,7 → 1050,7
// ---------------------------------------------
neorv32_uart_printf("Creating protected page (NAPOT, 64kB) @ 0xFFFFA000, [!x, !w, r]...\n");
neorv32_cpu_csr_write(CSR_PMPADDR0, 0xffffdfff); // 64k area @ 0xFFFFA000
neorv32_cpu_csr_write(CSR_PMPCFG0, 0b00011001); // NAPOT, read permission, NO write and execute permissions
neorv32_cpu_csr_write(CSR_PMPCFG0, 0b00011001); // NAPOT, read permission, NO write and NO execute permissions
 
 
// ------ LOAD: should work ------
1103,10 → 1103,29
test_fail();
}
#endif
 
 
// ------ Lock test ------
neorv32_uart_printf("Locking pmpcfg0 [mode=off]: ");
cnt_test++;
exception_handler_answer = 0xFFFFFFFF;
 
neorv32_cpu_csr_write(CSR_PMPCFG0, 0b10000001); // locked but entry is deactivated (mode = off)
 
// make sure a locked cfg cannot be written
tmp_a = neorv32_cpu_csr_read(CSR_PMPCFG0);
neorv32_cpu_csr_write(CSR_PMPCFG0, 0b00011001); // try to re-write CFG content
 
if ((tmp_a != neorv32_cpu_csr_read(CSR_PMPCFG0)) || (exception_handler_answer != 0xFFFFFFFF)) {
test_fail();
}
else {
test_ok();
}
 
}
 
 
 
// ----------------------------------------------------------
// Final test reports
// ----------------------------------------------------------
/sw/lib/include/neorv32.h
158,17 → 158,17
* Trap codes from mcause CSR.
**************************************************************************/
enum NEORV32_EXCEPTION_CODES_enum {
TRAP_CODE_I_MISALIGNED = 0x00000000, /**< 0.0: Instruction address misaligned */
TRAP_CODE_I_ACCESS = 0x00000001, /**< 0.1: Instruction (bus) access fault */
TRAP_CODE_I_ILLEGAL = 0x00000002, /**< 0.2: Illegal instruction */
TRAP_CODE_BREAKPOINT = 0x00000003, /**< 0.3: Breakpoint (EBREAK instruction) */
TRAP_CODE_L_MISALIGNED = 0x00000004, /**< 0.4: Load address misaligned */
TRAP_CODE_L_ACCESS = 0x00000005, /**< 0.5: Load (bus) access fault */
TRAP_CODE_S_MISALIGNED = 0x00000006, /**< 0.6: Store address misaligned */
TRAP_CODE_S_ACCESS = 0x00000007, /**< 0.7: Store (bus) access fault */
TRAP_CODE_I_MISALIGNED = 0x00000000, /**< 0.0: Instruction address misaligned */
TRAP_CODE_I_ACCESS = 0x00000001, /**< 0.1: Instruction (bus) access fault */
TRAP_CODE_I_ILLEGAL = 0x00000002, /**< 0.2: Illegal instruction */
TRAP_CODE_BREAKPOINT = 0x00000003, /**< 0.3: Breakpoint (EBREAK instruction) */
TRAP_CODE_L_MISALIGNED = 0x00000004, /**< 0.4: Load address misaligned */
TRAP_CODE_L_ACCESS = 0x00000005, /**< 0.5: Load (bus) access fault */
TRAP_CODE_S_MISALIGNED = 0x00000006, /**< 0.6: Store address misaligned */
TRAP_CODE_S_ACCESS = 0x00000007, /**< 0.7: Store (bus) access fault */
TRAP_CODE_MENV_CALL = 0x0000000b, /**< 0.11: Environment call from machine mode (ECALL instruction) */
TRAP_CODE_MSI = 0x80000003, /**< 1.3: Machine software interrupt */
TRAP_CODE_MTI = 0x80000007, /**< 1.7: Machine timer interrupt */
TRAP_CODE_MSI = 0x80000003, /**< 1.3: Machine software interrupt */
TRAP_CODE_MTI = 0x80000007, /**< 1.7: Machine timer interrupt */
TRAP_CODE_MEI = 0x8000000b, /**< 1.11: Machine external interrupt */
TRAP_CODE_FIRQ_0 = 0x80000010, /**< 1.16: Fast interrupt channel 0 */
TRAP_CODE_FIRQ_1 = 0x80000011, /**< 1.17: Fast interrupt channel 1 */
/CHANGELOG.md
0,0 → 1,52
## Project Change Log
 
The most recent **NEORV32** project version can be found on top of this list.
"Officially released" versions are linked and highlighted (:rocket:).
The latest release is [![release](https://img.shields.io/github/v/release/stnolting/neorv32)](https://github.com/stnolting/neorv32/releases).
A list of all releases can be found [here](https://github.com/stnolting/neorv32/releases). The most recent version of the *NEORV32 data sheet*
can be found [here](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf) (pdf).
 
The processor can determine its version from the `mimpid` CSR (at CSR address 0xf13). A 2x4-bit decimal coded representation is used.
Example: `CSR(mimpid) = 0x01040309 -> 01.04.03.09 -> Version 1.4.3.9 = v1.4.3.9`
 
For the HDL sources the version number is globally defined by the `hw_version_c` constant in the main VHDL package file
[`rtl/core/neorv32_package.vhd`](https://github.com/stnolting/neorv32/blob/master/rtl/core/neorv32_package.vhd).
 
| Date (*dd.mm.yyyy*) | Version | Comment |
|:----------:|:-------:|:--------|
| 11.10.2020 | 1.4.4.8 | Reworked pipeline frontend: Optimized fetch enginge, added issue engine, faster instruction fetch after taken branches + reduced hardware requirements; updated synthesis and performance results |
| 11.10.2020 | 1.4.4.6 | Added option to configure external memory interface (Wishbone) to either use *standard/classic protocol* (default) or *pipelined protocol* (for better timing): via `wb_pipe_mode_c` constant in VHDL package file (`rtl/core/neorv32_package.vhd`); added help text to NEORV32.pdf section "3.4.4. Processor-External Memory Interface (WISHBONE)" |
| 08.10.2020 | 1.4.4.5 | Removed CPU’s `BUS_TIMEOUT` and processor’s `MEM_EXT_TIMEOUT` generics; instead, a global configuration `bus_timeout_c` in the VHDL package file is used now |
| 08.10.2020 | 1.4.4.4 | Removed `DEVNULL` device; all simulation output options from this device are now available as `SIM_MODE` in the `UART`; `mcause` CSR can now also be written; FIXED: trying to write a read-only CSR will cause an illegal instruction exception; for compatibility reasons any write access to the misa CSR will be ignored and will NOT cause an exception |
| 07.10.2020 | 1.4.4.2 | Simplified ALU's set of core operations; removed co-processor data mux right after ALU -> shorter critical path; CPU control VHDL code clean-up and CSR write logic optimization; optimized IMEM/DMEM access logic; added note regarding alignment of IMEM/DMEM |
| 05.10.2020 | [**:rocket:1.4.4.0**](https://github.com/stnolting/neorv32/releases/tag/v1.4.4.0) | Fixed bug in external memory interface: Executing code from external memory was causing an instruction fetch stall |
| 02.10.2020 | 1.4.3.9 | `[m]cycleh` and `[m]instreth` CSRs are now 32-bit wide |
| 01.10.2020 | 1.4.3.8 | Added CPU top entity wrapper with resolved port signals `rtl/top_templetes/neorv32_cpu_stdlogic.vhd`; optimized ALU core functions – shorter critical path, less control overhead, reduced HW footprint |
| 27.09.2020 | 1.4.3.3 | Further improved ALU and control logic; CSR access instruction require one additional cycle now (to let side effects kick in); updated synthesis results; added CFU hardware driver dummy |
| 26.09.2020 | 1.4.3.2 | Fixed bug in `CSRRWI` instruction (introduced with version 1.4.3.1); further ALU operand logic optimizations; updated CPU data path figure |
| 25.09.2020 | 1.4.3.1 | Register file’s `x0` is now a physical register; this register is initialized by the hardware and locked afterwards; removed "set to zero" stage -> smaller hardware footprint and shorter critical path; added processor top entity wrapper with resolved signals `rtl/top_templetes/neorv32_top_stdlogic.vhd` |
| 16.09.2020 | [**:rocket:1.4.3.0**](https://github.com/stnolting/neorv32/releases/tag/v1.4.3.0) | Simplified memory configuration: removed processor top’s memory space configuration generics (`MEM_ISPACE_BASE`, `MEM_ISPACE_SIZE`, `MEM_DSPACE_BASE`, `MEM_DSPACE_SIZE`); data/instruction space sizes are irrelevant for hardware; instruction/data space base addresses are fixed (but can be modified in NEORV32 VHDL package file); modified SYSINFO registers; adapted bootloader, crt0 start-up code and linker script; stack configuration is now done via linker script; reworked chapter "address space"; added CFU interrupt -> fast interrupt channel 1 (shared with GPIO) |
| 14.09.2020 | 1.4.2.0 | Removed option to disable CSR counters (via `CSR_COUNTERS_USE` generic) since these counters are mandatory according to the RISC-V specs; added new IO/peripheral device: custom functions unit (`CFU`) for tightly-coupled custom co-processors; improved timing of processor-internal clock generator; fixed wrong labels in address space figure and removed dedicated exception vectors box; added mask register to GPIO unit to specify which input pins can trigger a pin-change interrupt |
| 11.09.2020 | 1.4.0.4 | Reworked `TRNG` architecture and interface; added text regarding fast interrupt channels usage for the NEORV32 processor |
| 02.09.2020 | 1.4.0.2 | Fixed bugs in external memory interface; added option to define latency of simulated external memory in testbench; hardware configuration sanity checks will now only appear once in console; added more details to data sheet section 3.3. Address Space; fixed typos in MEM_*_BASE and MEM_*_SIZE generic names |
| 01.09.2020 | 1.4.0.1 | Using registers above `x15` when the `E` extensions is enabled will now correctly cause an illegal instruction exception |
| 29.08.2020 | [**:rocket:1.4.0.0**](https://github.com/stnolting/neorv32/releases/tag/v1.4.0.0) | Rearranged and reworked this document; added FreeRTOS port, demo & short referencing chapter; removed booloader-specific linker scripts – main linker script is used for both, applications and bootloader; bootloader can now have `.data` and `.bss` sections; improved IMEM and BOOTROM memory initialization – faster synthesis; image generator now constrains init array size to actual executable size; peripheral/IO devices can only be written in full word mode (= 32-bit); GPIO ports are now 32-bit wide |
| 23.08.2020 | 1.3.7.3 | Added custom `mzext` CSR to check for available Z* CPU extensions; multiplier’s FAST_MUL mode is one cycle faster now; updated performance data |
| 20.08.2020 | 1.3.7.2 | Removed bootloader-specific crt0 – bootloader now uses std crt0; makefiles now also support asm and cpp files; made linker scripts more general; renamed makefile "compile" (which is still available for compatibility) target into "exe" |
| 14.08.2020 | [**:rocket:1.3.7.0**](https://github.com/stnolting/neorv32/releases/tag/v1.3.7.0) | Simplified CPU fetch engine; added configurable CPU instruction prefetch buffer (ipb) FIFO; optimized CPU execute engine; updated performance data |
| 06.08.2020 | 1.3.6.5 | Added `FAST_MUL_EN` generic to enable mapping of the multiplier core to DSP blocks; ALU.shifter is no more triggered when executing MULDIV operations; added benchmark results for DSP-based multiplier configurations; updated implementation and performance results; simplified makefiles – using implicit libc definition; crt0 only initializes lowest 16 registers |
| 03.08.2020 | [**:rocket:1.3.6.0**](https://github.com/stnolting/neorv32/releases/tag/v1.3.6.0) | Relocated `DEVNULL` (changed base address); minor edits, optimization and clean-ups |
| 30.07.2020 | 1.3.5.2 | Added register stage to PMP mask generation to shorten critical path; removed automatic IRQ enable/disable from RTE install/uninstall functions |
| 30.07.2020 | 1.3.5.1 | Fixed bug(s) in PMP mask generation; `misa.Z` flag is not yet defined by the RISC-V specs., hence it is read-only and read as zero |
| 29.07.2020 | 1.3.5.0 | Added user privilege level, enabled via new `CPU_EXTENSION_RISCV_U` generic; fixed error in `mstatus(mpie)` logic; implemented RISC-V spec.-compliant Physical Memory Protection (PMP); allows up to 8 regions but only NAPOT mode is supported yet |
| 25.07.2020 | 1.3.0.0 | `mcause` CSR is read-only now!; removed `CLIC`, added 4 fast IRQ channels to CPU with according flags in `mie` and `mip` and trap IDs; updated core libraries; updated NEORV32 RTE; highly reworked data sheet; updated synthesis and performance results |
| 21.07.2020 | 1.2.0.6 | Added doc section regarding the CPU’s data and instruction interfaces; optimized CPU fetch engine; updated iCE40 synthesis results |
| 20.07.2020 | 1.2.0.5 | Less penalty for taken branches and jumps (2 cycles faster) |
| 19.07.2020 | 1.2.0.0 | CPU bus unit now has independent busses for instruction fetch and data access – merged into single processor bus via new bus switch unit; doubled speed of ALU shifter unit again; all bits of `mcause` CSR can now be modified by application program (full RISC-V-compliant); performance counters CSRs `[m]cycleh` and `[m]instreth` are only 20-bit wide; removed NEORV32-specific custom CSRs – all processor-related information can be obtained from the new `SYSINFO` IO module (CPU is now more independent from processor configuration); changed IO address of `DEVNULL`; fixed bug in bootloader’s trap handler; added `USER_CODE` generic to assign a custom user code that can be read by software (from `SYSINFO`) |
| 14.07.2020 | 1.1.0.0 | Added `fence_o` and `fencei_o` signals to top entity to show if a `fence` or `fencei` instruction is executed; added `mvendorid` and `marchid` CSRs (both are always zero); ALU shift unit is faster now; two lowest bits of `mtvec` are always zero; fixed wrong instruction exception priority; removed `HART_ID` generic – `mhartid` CSR is always read as zero; performance counters (`[m]cycle[h]`, `[m]instret[h]` and `time[h]`) are also available in embedded mode – but can be explicitly disabled via the `CSR_COUNTERS_USE` generic; mcause CSR only allows write access to bit 31 and bits 3:0; updated synthesis reports |
| 10.07.2020 | 1.0.6.0 | Non-taken branches are now 1 cycle faster; the `time[h]` CSR now correctly reflects the system time from the MTIME unit; fixed WFI instruction permanently stalling the CPU; `[m]cycle[h]` counters now stop counting when CPU is in sleep mode; `minstret[h]` and `mcycle[h]` now also allow write-access |
| 09.07.2020 | 1.0.5.0 | `X` flag of `misa` CSR is zero now; the default SPI flash boot address of the bootloader is now `0x0080000`; new exemplary FPGA utilization results for Intel, Lattice and Xilinx; `misa` CSR is read-only again, switching compressed extension on/off is pretty bad for the fetch engine; `mtval` and `mcause` CSRs now allow write accesses and are finally RISC-V-compliant; time low and high registers of `MTIME` peripheral can now also be written by user; `MTIME` registers only allow full-word write accesses |
| 06.07.2020 | 1.0.1.0 | Added missing `fence` instruction; added new generic to enable optional Zifencei CPU extension for instruction stream synchronization |
| 05.07.2020 | 1.0.0.0 | New CPU architecture: Fetch and execute engines; increased CPI; timer and counter CSRs are now all 64-bit wide; fixed CSR access errors; fixed `C.LW` decompression logic; `misa` flags `C` and `M` are now r/w – compressed mode and multiplier/divider support can be switched on/off during runtime; PC(0) is now always zero; fixed bug in multiplier/divider co-processor; renamed SPI signals; added RISC-V compliance check information – processor now passes the official RISC-V compliance tests |
| 25.06.2020 | 0.0.2.5 | Added `DEVNULL` device; added chapter regarding processor simulation; fixed/added links; fixed typos; added FPGA implementation results for iCE40 UP |
| 23.06.2020 | [**:rocket:0.0.2.3**](https://github.com/stnolting/neorv32/releases/tag/v1.2.0.5) | Publication |
/README.md
61,12 → 61,9
with the NEORV32. If you do not want to [compile the GCC toolchains](https://github.com/riscv/riscv-gnu-toolchain) by yourself, you can also
download [pre-compiled toolchains](https://github.com/stnolting/riscv_gcc_prebuilt) for Linux.
 
For more information take a look at the [![NEORV32 datasheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/figures/PDF_32.png) NEORV32 datasheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf).
For more information take a look at the [![NEORV32 data sheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/figures/PDF_32.png) NEORV32 data sheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf).
 
This project is hosted on [GitHub](https://github.com/stnolting/neorv32) and [opencores.org](https://opencores.org/projects/neorv32).
A not-so-complete project log can be found on [hackaday.io](https://hackaday.io/project/174167-the-neorv32-risc-v-processor).
 
 
### Key Features
 
- RISC-V-compliant `rv32i` CPU with optional `C`, `E`, `M`, `U`, `Zicsr`, `Zifencei` and `PMP` (physical memory protection) extensions
73,7 → 70,7
- GCC-based toolchain ([pre-compiled rv32i and rv32e toolchains available](https://github.com/stnolting/riscv_gcc_prebuilt))
- Application compilation based on [GNU makefiles](https://github.com/stnolting/neorv32/blob/master/sw/example/blink_led/makefile)
- [Doxygen-based](https://github.com/stnolting/neorv32/blob/master/docs/doxygen_makefile_sw) documentation of the software framework: available on [GitHub pages](https://stnolting.github.io/neorv32/files.html)
- Detailed [datasheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf) (pdf)
- [**Detailed data sheet**](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf) (pdf)
- Completely described in behavioral, platform-independent VHDL – no primitives, macros, etc.
- Fully synchronous design, no latches, no gated clocks
- Small hardware footprint and high operating frequency
93,11 → 90,13
 
### Status
 
The processor is [synthesizable](#NEORV32-Processor-Exemplary-FPGA-Setups) (tested with *real hardware* using Intel Quartus Prime, Xilinx Vivado and Lattice Radiant/Synplify Pro) and can successfully execute
The processor is [synthesizable](#FPGA-Implementation-Results) (tested on *real hardware* using Intel Quartus Prime, Xilinx Vivado and Lattice Radiant/Synplify Pro) and can successfully execute
all the [provided example programs](https://github.com/stnolting/neorv32/tree/master/sw/example) including the [CoreMark benchmark](#CoreMark-Benchmark).
 
The processor passes the official `rv32i`, `rv32im`, `rv32imc`, `rv32Zicsr` and `rv32Zifencei` [RISC-V compliance tests](https://github.com/riscv/riscv-compliance).
The processor passes the official `rv32i`, `rv32im`, `rv32imc`, `rv32Zicsr` and `rv32Zifencei` [RISC-V compliance tests](https://github.com/riscv/riscv-compliance).
 
The project’s change log is available in the [CHANGELOG.md](https://github.com/stnolting/neorv32/blob/master/CHANGELOG.md) file in the root directory of this repository.
 
| Project component | CI status | Note |
|:--------------------------------------------------------------------------------|:----------|:---------|
| [NEORV32 processor](https://github.com/stnolting/neorv32) | [![Build Status](https://travis-ci.com/stnolting/neorv32.svg?branch=master)](https://travis-ci.com/stnolting/neorv32) | [![sw doc](https://img.shields.io/badge/SW%20documentation-gh--pages-blue)](https://stnolting.github.io/neorv32/files.html) |
119,6 → 118,9
 
## Features
 
The full-blown data sheet of the NEORV32 Processor/CPU is available as pdf file:
[![NEORV32 data sheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/figures/PDF_32.png) NEORV32 data sheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf).
 
### Processor Features
 
![neorv32 Overview](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/figures/neorv32_processor.png)
133,7 → 135,7
- Optional 8/16/24/32-bit serial peripheral interface controller (**SPI**) with 8 dedicated chip select lines
- Optional two wire serial interface controller (**TWI**), compatible to the I²C standard
- Optional general purpose parallel IO port (**GPIO**), 32xOut & 32xIn, with pin-change interrupt
- Optional 32-bit external bus interface, Wishbone b4 compliant (**WISHBONE**)
- Optional 32-bit external bus interface, Wishbone b4 compliant (**WISHBONE**), *standard* or *pipelined* handshake/transactions mode
- Optional watchdog timer (**WDT**)
- Optional PWM controller with 4 channels and 8-bit duty cycle resolution (**PWM**)
- Optional GARO-based true random number generator (**TRNG**)
149,7 → 151,7
[RISC-V privileged architecture specifications (1.12-draft)](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/riscv-spec.pdf).
 
More information regarding the CPU including a detailed list of the instruction set and the available CSRs can be found in
the [![NEORV32 datasheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/figures/PDF_32.png) NEORV32 datasheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf).
the [NEORV32 data sheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf).
 
 
**General**:
237,53 → 239,54
information is derived from the Timing Analyzer / Slow 1200mV 0C Model. If not otherwise specified, the default configuration
of the CPU's generics is assumed (for example no PMP). No constraints were used at all.
 
Results generated for hardware version: `1.4.3.3`
Results generated for hardware version: `1.4.4.8`
 
| CPU Configuration | LEs | FFs | Memory bits | DSPs | f_max |
|:---------------------------------------|:----------:|:--------:|:-----------:|:----:|:-------:|
| `rv32i` | 1033 | 567 | 2048 | 0 | 120 MHz |
| `rv32i` + `u` + `Zicsr` + `Zifencei` | 1778 | 806 | 2048 | 0 | 103 MHz |
| `rv32im` + `u` + `Zicsr` + `Zifencei` | 2389 | 1052 | 2048 | 0 | 102 MHz |
| `rv32imc` + `u` + `Zicsr` + `Zifencei` | 2644 | 1053 | 2048 | 0 | 106 MHz |
| `rv32emc` + `u` + `Zicsr` + `Zifencei` | 2646 | 1050 | 1024 | 0 | 103 MHz |
| CPU Configuration | LEs | FFs | Memory bits | DSPs | f_max |
|:---------------------------------------|:----------:|:--------:|:-----------:|:----:|:--------:|
| `rv32i` | 983 | 438 | 2048 | 0 | ~120 MHz |
| `rv32i` + `u` + `Zicsr` + `Zifencei` | 1877 | 802 | 2048 | 0 | ~112 MHz |
| `rv32im` + `u` + `Zicsr` + `Zifencei` | 2374 | 1048 | 2048 | 0 | ~110 MHz |
| `rv32imc` + `u` + `Zicsr` + `Zifencei` | 2650 | 1064 | 2048 | 0 | ~110 MHz |
| `rv32emc` + `u` + `Zicsr` + `Zifencei` | 2680 | 1061 | 1024 | 0 | ~110 MHz |
 
 
### NEORV32 Processor-Internal Peripherals and Memories
 
Results generated for hardware version: `1.4.3.3`
Results generated for hardware version: `1.4.4.8`
 
| Module | Description | LEs | FFs | Memory bits | DSPs |
|:----------|:-----------------------------------------------------|:---:|:---:|:-----------:|:----:|
| BOOT ROM | Bootloader ROM (default 4kB) | 3 | 1 | 32 768 | 0 |
| BUSSWITCH | Mux for CPU I & D interfaces | 59 | 8 | 0 | 0 |
|:----------|:-----------------------------------------------------|----:|----:|------------:|-----:|
| BOOT ROM | Bootloader ROM (default 4kB) | 4 | 1 | 32 768 | 0 |
| BUSSWITCH | Mux for CPU I & D interfaces | 62 | 8 | 0 | 0 |
| CFU | Custom functions unit | - | - | - | - |
| DMEM | Processor-internal data memory (default 8kB) | 13 | 2 | 65 536 | 0 |
| GPIO | General purpose input/output ports | 69 | 65 | 0 | 0 |
| IMEM | Processor-internal instruction memory (default 16kb) | 9 | 2 | 131 072 | 0 |
| MTIME | Machine system timer | 281 | 166 | 0 | 0 |
| GPIO | General purpose input/output ports | 66 | 65 | 0 | 0 |
| IMEM | Processor-internal instruction memory (default 16kb) | 7 | 2 | 131 072 | 0 |
| MTIME | Machine system timer | 268 | 166 | 0 | 0 |
| PWM | Pulse-width modulation controller | 72 | 69 | 0 | 0 |
| SPI | Serial peripheral interface | 189 | 125 | 0 | 0 |
| SYSINFO | System configuration information memory | 10 | 9 | 0 | 0 |
| TRNG | True random number generator | 175 | 132 | 0 | 0 |
| TWI | Two-wire interface | 72 | 44 | 0 | 0 |
| SPI | Serial peripheral interface | 184 | 125 | 0 | 0 |
| SYSINFO | System configuration information memory | 11 | 9 | 0 | 0 |
| TRNG | True random number generator | 132 | 105 | 0 | 0 |
| TWI | Two-wire interface | 74 | 44 | 0 | 0 |
| UART | Universal asynchronous receiver/transmitter | 175 | 132 | 0 | 0 |
| WDT | Watchdog timer | 60 | 45 | 0 | 0 |
| WDT | Watchdog timer | 58 | 45 | 0 | 0 |
| WISHBONE | External memory interface (`MEM_EXT_REG_STAGES` = 2) | 106 | 104 | 0 | 0 |
 
 
### NEORV32 Processor - Exemplary FPGA Setups
 
Exemplary processor implementation results for different FPGA platforms. The processor setup uses *all provided peripherals* (but not the _CFU_),
Exemplary processor implementation results for different FPGA platforms. The processor setup uses *the default peripheral configuration* (like no _CFU_ and no _TRNG_),
no external memory interface and only internal instruction and data memories. IMEM uses 16kB and DMEM uses 8kB memory space. The setup's top entity connects most of the
processor's [top entity](https://github.com/stnolting/neorv32/blob/master/rtl/core/neorv32_top.vhd) signals
to FPGA pins - except for the Wishbone bus and the interrupt signals.
 
Results generated for hardware version: `1.4.3.3`
Results generated for hardware version: `1.4.4.8`
 
| Vendor | FPGA | Board | Toolchain | Strategy | CPU Configuration | LUT / LE | FF / REG | DSP | Memory Bits | BRAM / EBR | SPRAM | Frequency |
|:--------|:----------------------------------|:-----------------|:---------------------------|:-------- |:-----------------------------------------------|:-----------|:-----------|:-------|:-------------|:-----------|:---------|--------------:|
| Intel | Cyclone IV `EP4CE22F17C6N` | Terasic DE0-Nano | Quartus Prime Lite 19.1 | balanced | `rv32imc` + `u` + `Zicsr` + `Zifencei` + `PMP` | 4120 (18%) | 1944 (9%) | 0 (0%) | 231424 (38%) | - | - | 103 MHz |
| Lattice | iCE40 UltraPlus `iCE40UP5K-SG48I` | Upduino v2.0 | Radiant 2.1 (Synplify Pro) | default | `rv32ic` + `u` + `Zicsr` + `Zifencei` | 4288 (81%) | 1693 (32%) | 0 (0%) | - | 12 (40%) | 4 (100%) | *c* 22.5 MHz |
| Xilinx | Artix-7 `XC7A35TICSG324-1L` | Arty A7-35T | Vivado 2019.2 | default | `rv32imc` + `u` + `Zicsr` + `Zifencei` + `PMP` | 2385 (11%) | 2008 (5%) | 0 (0%) | - | 8 (16%) | - | *c* 100 MHz |
| Intel | Cyclone IV `EP4CE22F17C6N` | Terasic DE0-Nano | Quartus Prime Lite 19.1 | balanced | `rv32imc` + `u` + `Zicsr` + `Zifencei` + `PMP` | 4008 (18%) | 1849 (9%) | 0 (0%) | 231424 (38%) | - | - | 105 MHz |
| Lattice | iCE40 UltraPlus `iCE40UP5K-SG48I` | Upduino v2.0 | Radiant 2.1 (Synplify Pro) | default | `rv32ic` + `u` + `Zicsr` + `Zifencei` | 4296 (81%) | 1611 (30%) | 0 (0%) | - | 12 (40%) | 4 (100%) | *c* 22.5 MHz |
| Xilinx | Artix-7 `XC7A35TICSG324-1L` | Arty A7-35T | Vivado 2019.2 | default | `rv32imc` + `u` + `Zicsr` + `Zifencei` + `PMP` | 2390 (11%) | 1888 (5%) | 0 (0%) | - | 8 (16%) | - | *c* 100 MHz |
 
**_Notes_**
* The Lattice iCE40 UltraPlus setup uses the FPGA's SPRAM memory primitives for the internal IMEM and DMEM (each 64kb).
303,7 → 306,7
[sw/example/coremark](https://github.com/stnolting/neorv32/blob/master/sw/example/coremark) project folder. This benchmark
tests the capabilities of a CPU itself rather than the functions provided by the whole system / SoC.
 
Results generated for hardware version: `1.3.7.3`
Results generated for hardware version: `1.4.4.8`
 
~~~
**Configuration**
313,15 → 316,17
Peripherals: UART for printing the results
~~~
 
| CPU | Executable Size | Optimization | CoreMark Score | CoreMarks/MHz |
|:-----------------------|:---------------:|:------------:|:--------------:|:-------------:|
| `rv32i` | 26 748 bytes | `-O3` | 28.98 | 0.2898 |
| `rv32im` | 25 580 bytes | `-O3` | 60.60 | 0.6060 |
| `rv32imc` | 19 636 bytes | `-O3` | 62.50 | 0.6250 |
| `rv32imc` + _FAST_MUL_ | 19 636 bytes | `-O3` | 76.92 | 0.7692 |
| CPU | Executable Size | Optimization | CoreMark Score | CoreMarks/MHz |
|:--------------------------|:---------------:|:------------:|:--------------:|:-------------:|
| `rv32i` | 26 940 bytes | `-O3` | 33.89 | 0.3389 |
| `rv32im` | 25 772 bytes | `-O3` | 64.51 | 0.6451 |
| `rv32im` + `FAST_MUL_EN` | 25 772 bytes | `-O3` | 80.00 | 0.8000 |
| `rv32imc` | 19 812 bytes | `-O3` | 62.50 | 0.6250 |
| `rv32imc` + `FAST_MUL_EN` | 19 812 bytes | `-O3` | 76.92 | 0.7692 |
 
The _FAST_MUL_ configuration uses DSPs for the multiplier of the `M` extension (enabled via the `FAST_MUL_EN` generic).
The `FAST_MUL_EN` configuration uses DSPs for the multiplier of the `M` extension (enabled via the `FAST_MUL_EN` generic).
 
When the `C` extension is enabled, branches to an unaligned uncompressed instruction require additional instruction fetch cycles.
 
### Instruction Cycles
 
338,19 → 343,22
dividing the total number of required clock cycles (only the timed core to avoid distortion due to IO wait cycles; sampled via the `cycle[h]` CSRs)
by the number of executed instructions (`instret[h]` CSRs). The executables were generated using optimization `-O3`.
 
Results generated for hardware version: `1.3.7.3`
Results generated for hardware version: `1.4.4.8`
 
| CPU | Required Clock Cycles | Executed Instructions | Average CPI |
|:-----------------------|----------------------:|----------------------:|:-----------:|
| `rv32i` | 6 955 817 507 | 1 468 927 290 | 4.73 |
| `rv32im` | 3 376 961 507 | 601 565 750 | 5.61 |
| `rv32imc` | 3 274 832 513 | 601 565 964 | 5.44 |
| `rv32imc` + _FAST_MUL_ | 2 689 845 200 | 601 565 890 | 4.47 |
| CPU | Required Clock Cycles | Executed Instructions | Average CPI |
|:------------------------|----------------------:|----------------------:|:-----------:|
| `rv32i` | 5 945 938 586 | 1 469 587 406 | 4.05 |
| `rv32im` | 3 110 282 586 | 602 225 760 | 5.16 |
| `rv32im` `FAST_MUL_EN` | 2 527 730 586 | 602 225 728 | 4.19 |
| `rv32imc` | 3 217 064 278 | 602 225 530 | 5.34 |
| `rv32imc` `FAST_MUL_EN` | 2 634 512 278 | 602 225 574 | 4.37 |
 
The _FAST_MUL_ configuration uses DSPs for the multiplier of the `M` extension (enabled via the `FAST_MUL_EN` generic).
The `FAST_MUL_EN` configuration uses DSPs for the multiplier of the `M` extension (enabled via the `FAST_MUL_EN` generic).
 
When the `C` extension is enabled, branches to an unaligned uncompressed instruction require additional instruction fetch cycles.
 
 
 
## Top Entities
 
The top entity of the **NEORV32 Processor** is [**neorv32_top.vhd**](https://github.com/stnolting/neorv32/blob/master/rtl/core/neorv32_top.vhd) (from `rtl/core`).
518,7 → 526,7
 
This overview is just a short excerpt from the *Let's Get It Started* section of the NEORV32 documentary:
 
[![NEORV32 datasheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/figures/PDF_32.png) NEORV32 datasheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf)
[![NEORV32 data sheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/figures/PDF_32.png) NEORV32 data sheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf)
 
 
### Toolchain
637,7 → 645,7
Blinking LED demo program
```
 
Going further: Take a look at the _Let's Get It Started!_ chapter of the [![NEORV32 datasheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/figures/PDF_32.png) NEORV32 datasheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf).
Going further: Take a look at the _Let's Get It Started!_ chapter of the [![NEORV32 data sheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/figures/PDF_32.png) NEORV32 data sheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf).
 
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.