URL
https://opencores.org/ocsvn/neorv32/neorv32/trunk
Subversion Repositories neorv32
Compare Revisions
- This comparison shows the changes necessary to convert path
/neorv32
- from Rev 19 to Rev 20
- ↔ Reverse comparison
Rev 19 → Rev 20
/trunk/docs/NEORV32.pdf
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
/trunk/rtl/core/neorv32_cpu_alu.vhd
1,7 → 1,8
-- ################################################################################################# |
-- # << NEORV32 - Arithmetical/Logical Unit >> # |
-- # ********************************************************************************************* # |
-- # Main data and address ALU. Include comparator unit. # |
-- # Main data and address ALU. Includes comparator unit and co-processor interface/arbiter. # |
-- # The shifter sub-unit uses an iterative approach. # |
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
-- # # |
/trunk/rtl/core/neorv32_cpu_control.vhd
119,10 → 119,8
ci_input : std_ulogic_vector(15 downto 0); -- input to compressed instr. decoder |
i_buf_state : std_ulogic_vector(01 downto 0); |
i_buf_state_nxt : std_ulogic_vector(01 downto 0); |
pc_real : std_ulogic_vector(data_width_c-1 downto 0); |
pc_real_add : std_ulogic_vector(data_width_c-1 downto 0); |
pc_fetch : std_ulogic_vector(data_width_c-1 downto 0); |
pc_fetch_add : std_ulogic_vector(data_width_c-1 downto 0); |
pc : std_ulogic_vector(data_width_c-1 downto 0); |
pc_add : std_ulogic_vector(data_width_c-1 downto 0); |
reset : std_ulogic; |
bus_err_ack : std_ulogic; |
end record; |
133,17 → 131,23
signal ci_illegal : std_ulogic; |
|
-- instrucion prefetch buffer (IPB) -- |
type ipb_dbuf_t is array (0 to ipb_entries_c-1) of std_ulogic_vector(35 downto 0); |
type ipb_t is record |
wdata : std_ulogic_vector(35 downto 0); |
rdata : std_ulogic_vector(35 downto 0); |
waddr : std_ulogic_vector(31 downto 0); |
raddr : std_ulogic_vector(31 downto 0); |
status : std_ulogic; |
free : std_ulogic; |
avail : std_ulogic; |
we : std_ulogic; |
re : std_ulogic; |
clear : std_ulogic; |
wdata : std_ulogic_vector(35 downto 0); -- data (+ status) to be written |
we : std_ulogic; -- trigger write |
free : std_ulogic; -- free entry available? |
-- |
rdata : std_ulogic_vector(35 downto 0); -- read data (+ status) |
re : std_ulogic; -- trigger read |
avail : std_ulogic; -- data available? |
-- |
clear : std_ulogic; -- clear all entries |
-- |
data : ipb_dbuf_t; -- the data fifo |
w_pnt : std_ulogic_vector(index_size_f(ipb_entries_c) downto 0); -- write pointer |
r_pnt : std_ulogic_vector(index_size_f(ipb_entries_c) downto 0); -- read pointer |
empty : std_ulogic; |
full : std_ulogic; |
end record; |
signal ipb : ipb_t; |
|
166,6 → 170,8
last_pc : std_ulogic_vector(data_width_c-1 downto 0); -- PC of last executed instruction |
sleep : std_ulogic; -- CPU in sleep mode |
sleep_nxt : std_ulogic; -- CPU in sleep mode |
if_rst : std_ulogic; -- instruction fetch was reset |
if_rst_nxt : std_ulogic; -- instruction fetch was reset |
end record; |
signal execute_engine : execute_engine_t; |
|
292,11 → 298,9
begin |
if rising_edge(clk_i) then |
if (fetch_engine.state = IFETCH_RESET) then |
fetch_engine.pc_fetch <= execute_engine.pc(data_width_c-1 downto 1) & '0'; -- initialize with "real" application PC |
fetch_engine.pc_real <= execute_engine.pc(data_width_c-1 downto 1) & '0'; -- initialize with "real" application PC |
fetch_engine.pc <= execute_engine.pc(data_width_c-1 downto 1) & '0'; -- initialize with "real" application PC |
else |
fetch_engine.pc_real <= std_ulogic_vector(unsigned(fetch_engine.pc_real(data_width_c-1 downto 1) & '0') + unsigned(fetch_engine.pc_real_add(data_width_c-1 downto 1) & '0')); |
fetch_engine.pc_fetch <= std_ulogic_vector(unsigned(fetch_engine.pc_fetch(data_width_c-1 downto 1) & '0') + unsigned(fetch_engine.pc_fetch_add(data_width_c-1 downto 1) & '0')); |
fetch_engine.pc <= std_ulogic_vector(unsigned(fetch_engine.pc(data_width_c-1 downto 1) & '0') + unsigned(fetch_engine.pc_add(data_width_c-1 downto 1) & '0')); |
end if; |
-- |
fetch_engine.i_buf <= fetch_engine.i_buf_nxt; |
306,7 → 310,7
end process fetch_engine_fsm_sync; |
|
-- PC output -- |
fetch_pc_o <= fetch_engine.pc_fetch(data_width_c-1 downto 1) & '0'; |
fetch_pc_o <= fetch_engine.pc(data_width_c-1 downto 1) & '0'; |
|
|
-- Fetch Engine FSM Comb ------------------------------------------------------------------ |
316,8 → 320,7
-- arbiter defaults -- |
bus_fast_ir <= '0'; |
fetch_engine.state_nxt <= fetch_engine.state; |
fetch_engine.pc_fetch_add <= (others => '0'); |
fetch_engine.pc_real_add <= (others => '0'); |
fetch_engine.pc_add <= (others => '0'); |
fetch_engine.i_buf_nxt <= fetch_engine.i_buf; |
fetch_engine.i_buf2_nxt <= fetch_engine.i_buf2; |
fetch_engine.i_buf_state_nxt <= fetch_engine.i_buf_state; |
328,7 → 331,6
ipb.we <= '0'; |
ipb.clear <= '0'; |
ipb.wdata <= (others => '0'); |
ipb.waddr <= fetch_engine.pc_real(data_width_c-1 downto 1) & '0'; |
|
-- state machine -- |
case fetch_engine.state is |
354,28 → 356,26
if (fetch_engine.i_buf_state(0) = '1') then -- buffer filled? |
fetch_engine.state_nxt <= IFETCH_2; |
else |
fetch_engine.pc_fetch_add <= std_ulogic_vector(to_unsigned(4, data_width_c)); |
fetch_engine.state_nxt <= IFETCH_0; -- get another instruction word |
fetch_engine.pc_add <= std_ulogic_vector(to_unsigned(4, data_width_c)); |
fetch_engine.state_nxt <= IFETCH_0; -- get another instruction word |
end if; |
end if; |
|
when IFETCH_2 => -- construct instruction word and issue |
-- ------------------------------------------------------------ |
if (fetch_engine.pc_fetch(1) = '0') or (CPU_EXTENSION_RISCV_C = false) then -- 32-bit aligned |
if (fetch_engine.pc(1) = '0') or (CPU_EXTENSION_RISCV_C = false) then -- 32-bit aligned |
fetch_engine.ci_input <= fetch_engine.i_buf2(15 downto 00); |
|
if (ipb.free = '1') then -- free entry in buffer? |
ipb.we <= '1'; |
if (fetch_engine.i_buf2(01 downto 00) = "11") or (CPU_EXTENSION_RISCV_C = false) then -- uncompressed |
ipb.wdata <= '0' & fetch_engine.i_buf2(33 downto 32) & '0' & fetch_engine.i_buf2(31 downto 0); |
fetch_engine.pc_real_add <= std_ulogic_vector(to_unsigned(4, data_width_c)); |
fetch_engine.pc_fetch_add <= std_ulogic_vector(to_unsigned(4, data_width_c)); |
fetch_engine.state_nxt <= IFETCH_0; |
ipb.wdata <= '0' & fetch_engine.i_buf2(33 downto 32) & '0' & fetch_engine.i_buf2(31 downto 0); |
fetch_engine.pc_add <= std_ulogic_vector(to_unsigned(4, data_width_c)); |
fetch_engine.state_nxt <= IFETCH_0; |
else -- compressed |
ipb.wdata <= ci_illegal & fetch_engine.i_buf2(33 downto 32) & '1' & ci_instr32; |
fetch_engine.pc_fetch_add <= std_ulogic_vector(to_unsigned(2, data_width_c)); |
fetch_engine.pc_real_add <= std_ulogic_vector(to_unsigned(2, data_width_c)); |
fetch_engine.state_nxt <= IFETCH_2; -- try to get another 16-bit instruction word in next round |
ipb.wdata <= ci_illegal & fetch_engine.i_buf2(33 downto 32) & '1' & ci_instr32; |
fetch_engine.pc_add <= std_ulogic_vector(to_unsigned(2, data_width_c)); |
fetch_engine.state_nxt <= IFETCH_2; -- try to get another 16-bit instruction word in next round |
end if; |
end if; |
|
385,15 → 385,13
if (ipb.free = '1') then -- free entry in buffer? |
ipb.we <= '1'; |
if (fetch_engine.i_buf2(17 downto 16) = "11") then -- uncompressed |
ipb.wdata <= '0' & fetch_engine.i_buf(33 downto 32) & '0' & fetch_engine.i_buf(15 downto 00) & fetch_engine.i_buf2(31 downto 16); |
fetch_engine.pc_real_add <= std_ulogic_vector(to_unsigned(4, data_width_c)); |
fetch_engine.pc_fetch_add <= std_ulogic_vector(to_unsigned(4, data_width_c)); |
fetch_engine.state_nxt <= IFETCH_0; |
ipb.wdata <= '0' & fetch_engine.i_buf(33 downto 32) & '0' & fetch_engine.i_buf(15 downto 00) & fetch_engine.i_buf2(31 downto 16); |
fetch_engine.pc_add <= std_ulogic_vector(to_unsigned(4, data_width_c)); |
fetch_engine.state_nxt <= IFETCH_0; |
else -- compressed |
ipb.wdata <= ci_illegal & fetch_engine.i_buf(33 downto 32) & '1' & ci_instr32; |
fetch_engine.pc_fetch_add <= std_ulogic_vector(to_unsigned(2, data_width_c)); |
fetch_engine.pc_real_add <= std_ulogic_vector(to_unsigned(2, data_width_c)); |
fetch_engine.state_nxt <= IFETCH_0; |
ipb.wdata <= ci_illegal & fetch_engine.i_buf(33 downto 32) & '1' & ci_instr32; |
fetch_engine.pc_add <= std_ulogic_vector(to_unsigned(2, data_width_c)); |
fetch_engine.state_nxt <= IFETCH_0; |
end if; |
end if; |
end if; |
411,32 → 409,47
-- **************************************************************************************************************************** |
|
|
-- Instruction Prefetch Buffer Stage ------------------------------------------------------ |
-- Instruction Prefetch Buffer (FIFO) ----------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
instr_prefetch_buffer: process(rstn_i, clk_i) -- once upon a time, this was a fifo with 8 entries |
instr_prefetch_buffer_ctrl: process(rstn_i, clk_i) |
begin |
if (rstn_i = '0') then |
ipb.status <= '0'; |
ipb.rdata <= (others => '0'); |
ipb.raddr <= (others => '0'); |
ipb.w_pnt <= (others => '0'); |
ipb.r_pnt <= (others => '0'); |
elsif rising_edge(clk_i) then |
-- write port -- |
if (ipb.clear = '1') then |
ipb.status <= '0'; |
ipb.w_pnt <= (others => '0'); |
elsif (ipb.we = '1') then |
ipb.status <= '1'; |
ipb.w_pnt <= std_ulogic_vector(unsigned(ipb.w_pnt) + 1); |
end if; |
-- read port -- |
if (ipb.clear = '1') then |
ipb.r_pnt <= (others => '0'); |
elsif (ipb.re = '1') then |
ipb.status <= '0'; |
ipb.r_pnt <= std_ulogic_vector(unsigned(ipb.r_pnt) + 1); |
end if; |
if (ipb.we = '1') then |
ipb.rdata <= ipb.wdata; |
ipb.raddr <= ipb.waddr; |
end if; |
end process instr_prefetch_buffer_ctrl; |
|
instr_prefetch_buffer_data: process(clk_i) |
begin |
if rising_edge(clk_i) then |
if (ipb.we = '1') then -- write port |
ipb.data(to_integer(unsigned(ipb.w_pnt(ipb.w_pnt'left-1 downto 0)))) <= ipb.wdata; |
end if; |
end if; |
end process instr_prefetch_buffer; |
end process instr_prefetch_buffer_data; |
|
-- async read -- |
ipb.rdata <= ipb.data(to_integer(unsigned(ipb.r_pnt(ipb.w_pnt'left-1 downto 0)))); |
|
-- status -- |
ipb.free <= not ipb.status; |
ipb.avail <= ipb.status; |
ipb.full <= '1' when (ipb.r_pnt(ipb.r_pnt'left) /= ipb.w_pnt(ipb.w_pnt'left)) and (ipb.r_pnt(ipb.r_pnt'left-1 downto 0) = ipb.w_pnt(ipb.w_pnt'left-1 downto 0)) else '0'; |
ipb.empty <= '1' when (ipb.r_pnt(ipb.r_pnt'left) = ipb.w_pnt(ipb.w_pnt'left)) and (ipb.r_pnt(ipb.r_pnt'left-1 downto 0) = ipb.w_pnt(ipb.w_pnt'left-1 downto 0)) else '0'; |
|
ipb.free <= not ipb.full; |
ipb.avail <= not ipb.empty; |
|
|
-- **************************************************************************************************************************** |
514,13 → 527,15
execute_engine.last_pc <= CPU_BOOT_ADDR(data_width_c-1 downto 1) & '0'; |
execute_engine.state <= SYS_WAIT; |
execute_engine.sleep <= '0'; |
execute_engine.if_rst <= '1'; -- IF is reset after system reset |
elsif rising_edge(clk_i) then |
execute_engine.pc <= execute_engine.pc_nxt(data_width_c-1 downto 1) & '0'; |
if (execute_engine.state = EXECUTE) then |
execute_engine.last_pc <= execute_engine.pc(data_width_c-1 downto 1) & '0'; |
end if; |
execute_engine.state <= execute_engine.state_nxt; |
execute_engine.sleep <= execute_engine.sleep_nxt; |
execute_engine.state <= execute_engine.state_nxt; |
execute_engine.sleep <= execute_engine.sleep_nxt; |
execute_engine.if_rst <= execute_engine.if_rst_nxt; |
end if; |
end process execute_engine_fsm_sync_rst; |
|
538,13 → 553,15
end if; |
end process execute_engine_fsm_sync; |
|
-- PC output -- |
curr_pc_o <= execute_engine.pc(data_width_c-1 downto 1) & '0'; |
next_pc_tmp <= std_ulogic_vector(unsigned(execute_engine.pc) + 2) when (execute_engine.is_ci = '1') else std_ulogic_vector(unsigned(execute_engine.pc) + 4); |
-- next PC -- |
next_pc_tmp <= std_ulogic_vector(unsigned(execute_engine.pc) + 2) when (execute_engine.is_ci = '1') else std_ulogic_vector(unsigned(execute_engine.pc) + 4); |
execute_engine.next_pc <= next_pc_tmp(data_width_c-1 downto 1) & '0'; |
next_pc_o <= next_pc_tmp(data_width_c-1 downto 1) & '0'; |
|
-- PC output -- |
curr_pc_o <= execute_engine.pc(data_width_c-1 downto 1) & '0'; |
next_pc_o <= next_pc_tmp(data_width_c-1 downto 1) & '0'; |
|
|
-- CPU Control Bus Output ----------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
ctrl_output: process(ctrl, execute_engine, fetch_engine, trap_ctrl, csr, bus_fast_ir) |
577,6 → 594,7
execute_engine.is_ci_nxt <= execute_engine.is_ci; |
execute_engine.pc_nxt <= execute_engine.pc; |
execute_engine.sleep_nxt <= execute_engine.sleep; |
execute_engine.if_rst_nxt <= execute_engine.if_rst; |
|
-- instruction dispatch -- |
fetch_engine.reset <= '0'; |
654,12 → 672,15
if (ipb.avail = '1') then -- instruction available? |
ipb.re <= '1'; |
trap_ctrl.instr_ma <= ipb.rdata(33); -- misaligned instruction fetch address |
trap_ctrl.instr_be <= ipb.rdata(34); -- bus access fault druing instrucion fetch |
trap_ctrl.instr_be <= ipb.rdata(34); -- bus access fault during instrucion fetch |
illegal_compressed <= ipb.rdata(35); -- invalid decompressed instruction |
execute_engine.is_ci_nxt <= ipb.rdata(32); -- flag to indicate this is a compressed instruction beeing executed |
execute_engine.i_reg_nxt <= ipb.rdata(31 downto 0); |
execute_engine.pc_nxt <= ipb.raddr; -- the PC according to the current instruction |
-- ipb.rdata(35) is not immediately checked here! |
execute_engine.if_rst_nxt <= '0'; |
if (execute_engine.if_rst = '0') then -- if there was no non-linear PC modification |
execute_engine.pc_nxt <= execute_engine.next_pc; |
end if; |
-- ipb.rdata(35) (invalid decompressed instruction) is not immediately checked here! |
if (execute_engine.sleep = '1') or (trap_ctrl.env_start = '1') or ((ipb.rdata(33) or ipb.rdata(34)) = '1') then |
execute_engine.state_nxt <= TRAP; |
else |
669,7 → 690,8
|
when TRAP => -- Start trap environment (also used as cpu sleep state) |
-- ------------------------------------------------------------ |
fetch_engine.reset <= '1'; |
fetch_engine.reset <= '1'; |
execute_engine.if_rst_nxt <= '1'; -- this is a non-linear PC modification |
if (trap_ctrl.env_start = '1') then -- check here again if we came directly from DISPATCH |
trap_ctrl.env_start_ack <= '1'; |
execute_engine.pc_nxt <= csr.mtvec; |
751,9 → 773,10
|
when opcode_fence_c => -- fence operations |
-- ------------------------------------------------------------ |
execute_engine.pc_nxt <= execute_engine.next_pc; -- "refetch" next instruction (only relevant for fencei) |
if (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_fencei_c) and (CPU_EXTENSION_RISCV_Zifencei = true) then -- FENCEI |
if (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_fencei_c) and (CPU_EXTENSION_RISCV_Zifencei = true) then -- FENCE.I |
fetch_engine.reset <= '1'; |
execute_engine.if_rst_nxt <= '1'; -- this is a non-linear PC modification |
execute_engine.pc_nxt <= execute_engine.next_pc; -- "refetch" next instruction (only relevant for fence.i) |
ctrl_nxt(ctrl_bus_fencei_c) <= '1'; |
end if; |
if (execute_engine.i_reg(instr_funct3_msb_c downto instr_funct3_lsb_c) = funct3_fence_c) then -- FENCE |
771,9 → 794,10
when funct12_ebreak_c => -- EBREAK |
trap_ctrl.break_point <= '1'; |
when funct12_mret_c => -- MRET |
trap_ctrl.env_end <= '1'; |
execute_engine.pc_nxt <= csr.mepc; |
fetch_engine.reset <= '1'; |
trap_ctrl.env_end <= '1'; |
execute_engine.pc_nxt <= csr.mepc; |
fetch_engine.reset <= '1'; |
execute_engine.if_rst_nxt <= '1'; -- this is a non-linear PC modification |
when funct12_wfi_c => -- WFI = "CPU sleep" |
execute_engine.sleep_nxt <= '1'; -- good night |
when others => -- undefined |
856,10 → 880,11
|
when BRANCH => -- update PC for taken branches and jumps |
-- ------------------------------------------------------------ |
execute_engine.pc_nxt <= alu_add_i; -- branch/jump destination |
if (execute_engine.is_jump = '1') or (execute_engine.branch_taken = '1') then |
fetch_engine.reset <= '1'; -- trigger new instruction fetch from modified PC |
execute_engine.state_nxt <= SYS_WAIT; |
execute_engine.pc_nxt <= alu_add_i; -- branch/jump destination |
fetch_engine.reset <= '1'; -- trigger new instruction fetch from modified PC |
execute_engine.if_rst_nxt <= '1'; -- this is a non-linear PC modification |
execute_engine.state_nxt <= SYS_WAIT; |
else |
execute_engine.state_nxt <= DISPATCH; |
end if; |
1391,7 → 1416,7
if (execute_engine.i_reg(27 downto 24) = x"b") then |
for i in 0 to PMP_NUM_REGIONS-1 loop |
if (execute_engine.i_reg(23 downto 20) = std_ulogic_vector(to_unsigned(i, 4))) and (csr.pmpcfg(i)(7) = '0') then -- unlocked pmpaddr access |
csr.pmpaddr(i) <= csr_wdata_i; |
csr.pmpaddr(i) <= csr_wdata_i(31 downto 1) & '0'; -- min granularity is 8 bytes -> bit zero cannot be configured |
end if; |
end loop; -- i (CSRs) |
end if; |
/trunk/rtl/core/neorv32_package.vhd
40,9 → 40,10
|
-- Architecture Constants ----------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
constant data_width_c : natural := 32; -- data width - FIXED! |
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01030605"; -- no touchy! |
constant pmp_max_r_c : natural := 8; -- max PMP regions |
constant data_width_c : natural := 32; -- data width - FIXED! |
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01030700"; -- no touchy! |
constant pmp_max_r_c : natural := 8; -- max PMP regions |
constant ipb_entries_c : natural := 2; -- entries in instruction prefetch buffer, power of 2 |
|
-- Helper Functions ----------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
/trunk/sim/vivado/neorv32_tb_behav.wcfg
12,13 → 12,13
</db_ref> |
</db_ref_list> |
<zoom_setting> |
<ZoomStartTime time="1400100fs"></ZoomStartTime> |
<ZoomEndTime time="1609501fs"></ZoomEndTime> |
<Cursor1Time time="1435000fs"></Cursor1Time> |
<ZoomStartTime time="244577083fs"></ZoomStartTime> |
<ZoomEndTime time="245084584fs"></ZoomEndTime> |
<Cursor1Time time="244825000fs"></Cursor1Time> |
</zoom_setting> |
<column_width_setting> |
<NameColumnWidth column_width="203"></NameColumnWidth> |
<ValueColumnWidth column_width="72"></ValueColumnWidth> |
<ValueColumnWidth column_width="102"></ValueColumnWidth> |
</column_width_setting> |
<WVObjectSize size="95" /> |
<wvobject type="divider" fp_name="divider273"> |
127,6 → 127,80
<obj_property name="ElementShortName">execute_engine</obj_property> |
<obj_property name="ObjectShortName">execute_engine</obj_property> |
<obj_property name="isExpanded"></obj_property> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.state" type="other"> |
<obj_property name="ElementShortName">.state</obj_property> |
<obj_property name="ObjectShortName">.state</obj_property> |
<obj_property name="CustomSignalColor">#FF00FF</obj_property> |
<obj_property name="UseCustomSignalColor">true</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.state_prev" type="other"> |
<obj_property name="ElementShortName">.state_prev</obj_property> |
<obj_property name="ObjectShortName">.state_prev</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.state_nxt" type="other"> |
<obj_property name="ElementShortName">.state_nxt</obj_property> |
<obj_property name="ObjectShortName">.state_nxt</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.i_reg" type="array"> |
<obj_property name="ElementShortName">.i_reg[31:0]</obj_property> |
<obj_property name="ObjectShortName">.i_reg[31:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.i_reg_nxt" type="array"> |
<obj_property name="ElementShortName">.i_reg_nxt[31:0]</obj_property> |
<obj_property name="ObjectShortName">.i_reg_nxt[31:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.is_ci" type="logic"> |
<obj_property name="ElementShortName">.is_ci</obj_property> |
<obj_property name="ObjectShortName">.is_ci</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.is_ci_nxt" type="logic"> |
<obj_property name="ElementShortName">.is_ci_nxt</obj_property> |
<obj_property name="ObjectShortName">.is_ci_nxt</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.is_jump" type="logic"> |
<obj_property name="ElementShortName">.is_jump</obj_property> |
<obj_property name="ObjectShortName">.is_jump</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.is_jump_nxt" type="logic"> |
<obj_property name="ElementShortName">.is_jump_nxt</obj_property> |
<obj_property name="ObjectShortName">.is_jump_nxt</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.branch_taken" type="logic"> |
<obj_property name="ElementShortName">.branch_taken</obj_property> |
<obj_property name="ObjectShortName">.branch_taken</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.pc" type="array"> |
<obj_property name="ElementShortName">.pc[31:0]</obj_property> |
<obj_property name="ObjectShortName">.pc[31:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.pc_nxt" type="array"> |
<obj_property name="ElementShortName">.pc_nxt[31:0]</obj_property> |
<obj_property name="ObjectShortName">.pc_nxt[31:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.next_pc" type="array"> |
<obj_property name="ElementShortName">.next_pc[31:0]</obj_property> |
<obj_property name="ObjectShortName">.next_pc[31:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.last_pc" type="array"> |
<obj_property name="ElementShortName">.last_pc[31:0]</obj_property> |
<obj_property name="ObjectShortName">.last_pc[31:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.sleep" type="logic"> |
<obj_property name="ElementShortName">.sleep</obj_property> |
<obj_property name="ObjectShortName">.sleep</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.sleep_nxt" type="logic"> |
<obj_property name="ElementShortName">.sleep_nxt</obj_property> |
<obj_property name="ObjectShortName">.sleep_nxt</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.if_rst" type="logic"> |
<obj_property name="ElementShortName">.if_rst</obj_property> |
<obj_property name="ObjectShortName">.if_rst</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.if_rst_nxt" type="logic"> |
<obj_property name="ElementShortName">.if_rst_nxt</obj_property> |
<obj_property name="ObjectShortName">.if_rst_nxt</obj_property> |
</wvobject> |
</wvobject> |
<wvobject type="divider" fp_name="divider139"> |
<obj_property name="label">CPU: Control.TRAP</obj_property> |
281,10 → 355,12
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/cpu_i" type="array"> |
<obj_property name="ElementShortName">cpu_i</obj_property> |
<obj_property name="ObjectShortName">cpu_i</obj_property> |
<obj_property name="isExpanded"></obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/cpu_d" type="array"> |
<obj_property name="ElementShortName">cpu_d</obj_property> |
<obj_property name="ObjectShortName">cpu_d</obj_property> |
<obj_property name="isExpanded"></obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_bus_inst/PMP_USE" type="other"> |
<obj_property name="ElementShortName">PMP_USE</obj_property> |
/trunk/sw/common/crt0.S
60,20 → 60,20
__crt0_reg_file_clear: |
//addi x0, x0, 0 // hardwired to zero |
addi x1, x0, 0 |
addi x2, x1, 0 |
addi x3, x2, 0 |
addi x4, x3, 0 |
addi x5, x4, 0 |
addi x6, x5, 0 |
addi x7, x6, 0 |
addi x8, x7, 0 |
addi x9, x8, 0 |
//addi x10, x9, 0 |
//addi x11, x10, 0 |
//addi x12, x11, 0 |
//addi x13, x12, 0 |
//addi x14, x13, 0 |
addi x15, x14, 0 |
addi x2, x0, 0 |
addi x3, x0, 0 |
addi x4, x0, 0 |
addi x5, x0, 0 |
addi x6, x0, 0 |
addi x7, x0, 0 |
addi x8, x0, 0 |
addi x9, x0, 0 |
//addi x10, x0, 0 |
//addi x11, x0, 0 |
//addi x12, x0, 0 |
//addi x13, x0, 0 |
//addi x14, x0, 0 |
addi x15, x0, 0 |
|
// since we dont know here if we are compiling for a rv32e architecture |
// we won't touch registers above x15 |
/trunk/sw/example/cpu_test/main.c
36,7 → 36,7
/**********************************************************************//** |
* @file cpu_test/main.c |
* @author Stephan Nolting |
* @brief Simple CPU (interrupts and exceptions) test program. |
* @brief Simple CPU test program. |
**************************************************************************/ |
|
#include <neorv32.h> |
304,7 → 304,7
exception_handler_answer = 0xFFFFFFFF; |
neorv32_uart_printf("FENCE.I: "); |
asm volatile ("fence.i"); |
|
|
if (exception_handler_answer == TRAP_CODE_I_ILLEGAL) { |
neorv32_uart_printf("skipped (not implemented)\n"); |
} |
426,24 → 426,24
// skip if C-mode is not implemented |
if ((neorv32_cpu_csr_read(CSR_MISA) & (1<<CPU_MISA_C_EXT)) != 0) { |
|
cnt_test++; |
cnt_test++; |
|
// create test program in RAM |
static const uint32_t dummy_sub_program_ci[2] __attribute__((aligned(8))) = { |
0x00000001, // 2nd: official_illegal_op | 1st: NOP -> illegal instruction exception |
0x00008067 // ret (32-bit) |
}; |
// create test program in RAM |
static const uint32_t dummy_sub_program_ci[2] __attribute__((aligned(8))) = { |
0x00000001, // 2nd: official_illegal_op | 1st: NOP -> illegal instruction exception |
0x00008067 // ret (32-bit) |
}; |
|
tmp_a = (uint32_t)&dummy_sub_program_ci; // call the dummy sub program |
asm volatile ( "jalr ra, %0 " : "=r" (tmp_a) : "r" (tmp_a)); |
tmp_a = (uint32_t)&dummy_sub_program_ci; // call the dummy sub program |
asm volatile ( "jalr ra, %0 " : "=r" (tmp_a) : "r" (tmp_a)); |
|
#if (DETAILED_EXCEPTION_DEBUG==0) |
if (exception_handler_answer == TRAP_CODE_I_ILLEGAL) { |
test_ok(); |
} |
else { |
test_fail(); |
} |
if (exception_handler_answer == TRAP_CODE_I_ILLEGAL) { |
test_ok(); |
} |
else { |
test_fail(); |
} |
#endif |
} |
else { |
/trunk/sw/example/game_of_life/main.c
34,7 → 34,7
|
|
/**********************************************************************//** |
* @file blink_led/main.c |
* @file game_of_life/main.c |
* @author Stephan Nolting |
* @brief Simple blinking LED demo program using the lowest 8 bits of the GPIO.output port. |
**************************************************************************/ |
/trunk/sw/lib/include/neorv32.h
37,10 → 37,7
* @file neorv32.h |
* @author Stephan Nolting |
* |
* @brief Main NEORV32 core library file. |
* |
* @details This file defines the addresses of the IO devices and their according |
* registers and register bits as well as the available CPU CSRs and flags. |
* @brief Main NEORV32 core library include file. |
**************************************************************************/ |
|
#ifndef neorv32_h |
/trunk/sw/lib/source/neorv32_rte.c
278,7 → 278,7
} |
|
// CPU extensions |
neorv32_uart_printf("\nCPU extensions: "); |
neorv32_uart_printf(" + "); |
tmp = neorv32_cpu_csr_read(CSR_MISA); |
for (i=0; i<26; i++) { |
if (tmp & (1 << i)) { |
/trunk/README.md
1,4 → 1,4
# [The NEORV32 Processor](https://github.com/stnolting/neorv32) (RISC-V-compliant) |
# [The NEORV32 Processor](https://github.com/stnolting/neorv32) (RISC-V) |
|
[![Build Status](https://travis-ci.com/stnolting/neorv32.svg?branch=master)](https://travis-ci.com/stnolting/neorv32) |
[![license](https://img.shields.io/github/license/stnolting/neorv32)](https://github.com/stnolting/neorv32/blob/master/LICENSE) |
45,6 → 45,10
|
For more information take a look a the [![NEORV32 datasheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/figures/PDF_32.png) NEORV32 datasheet](https://raw.githubusercontent.com/stnolting/neorv32/master/docs/NEORV32.pdf). |
|
This project is hosted on [GitHub](https://github.com/stnolting/neorv32) and [opencores.org](https://opencores.org/projects/neorv32). |
A not-so-complete project log can be found on [hackaday.io](https://hackaday.io/project/174167-the-neorv32-risc-v-processor). |
|
|
### Key Features |
|
- RISC-V-compliant `rv32i` CPU with optional `C`, `E`, `M`, `U`, `Zicsr`, `Zifencei` and PMP (physical memory protection) extensions |
104,7 → 108,6
- Port Dhrystone benchmark |
- Implement atomic operations (`A` extension) and floating-point operations (`F` extension) |
- Maybe port an RTOS (like [Zephyr](https://github.com/zephyrproject-rtos/zephyr), [freeRTOS](https://www.freertos.org) or [RIOT](https://www.riot-os.org)) |
- Make a 64-bit branch someday |
|
|
|
257,7 → 260,7
| Xilinx | Artix-7 `XC7A35TICSG324-1L` | Arty A7-35T | Vivado 2019.2 | default | `rv32imcu` + `Zicsr` + `Zifencei` | 2445 (12%) | 1893 (4%) | 0 (0%) | - | 8 (16%) | - | *c* 100 MHz | |
|
**Notes** |
* The Lattice iCE40 UltraPlus setup uses the FPGA's SPRAM memory primitives for the internal IMEM and DEMEM (each 64kb). |
* The Lattice iCE40 UltraPlus setup uses the FPGA's SPRAM memory primitives for the internal IMEM and DMEM (each 64kb). |
The FPGA-specific memory components can be found in [`rtl/fpga_specific`](https://github.com/stnolting/neorv32/blob/master/rtl/fpga_specific/lattice_ice40up). |
* The clock frequencies marked with a "c" are constrained clocks. The remaining ones are _f_max_ results from the place and route timing reports. |
* The Upduino and the Arty board have on-board SPI flash memories for storing the FPGA configuration. These device can also be used by the default NEORV32 |
271,7 → 274,7
[sw/example/coremark](https://github.com/stnolting/neorv32/blob/master/sw/example/coremark) project folder. This benchmark |
tests the capabilities of a CPU itself rather than the functions provided by the whole system / SoC. |
|
Results generated for hardware version: `1.3.6.5` |
Results generated for hardware version: `1.3.7.0` |
|
~~~ |
**Configuration** |
283,12 → 286,12
|
| CPU | Executable Size | Optimization | CoreMark Score | CoreMarks/MHz | |
|:---------------------|:---------------:|:------------:|:--------------:|:-------------:| |
| `rv32i` | 26 764 bytes | `-O3` | 28.98 | 0.2898 | |
| `rv32im` | 25 612 bytes | `-O3` | 58.82 | 0.5882 | |
| `rv32imc` | 19 652 bytes | `-O3` | 60.61 | 0.6061 | |
| `rv32imc` + FAST_MUL | 19 652 bytes | `-O3` | 71.43 | 0.7143 | |
| `rv32i` | 26 748 bytes | `-O3` | 28.98 | 0.2898 | |
| `rv32im` | 25 580 bytes | `-O3` | 60.60 | 0.6060 | |
| `rv32imc` | 19 636 bytes | `-O3` | 62.50 | 0.6250 | |
| `rv32imc` + FAST_MUL | 19 636 bytes | `-O3` | 74.07 | 0.7407 | |
|
The _FAST_MUL_ configuration uses DSPs for the multiplier of the `M` extensions (enabled via the `FAST_MUL_EN` generic). |
The _FAST_MUL_ configuration uses DSPs for the multiplier of the `M` extension (enabled via the `FAST_MUL_EN` generic). |
|
### Instruction Cycles |
|
305,16 → 308,16
dividing the total number of required clock cycles (only the timed core to avoid distortion due to IO wait cycles; sampled via the `cycle[h]` CSRs) |
by the number of executed instructions (`instret[h]` CSRs). The executables were generated using optimization `-O3`. |
|
Results generated for hardware version: `1.3.6.5` |
Results generated for hardware version: `1.3.7.0` |
|
| CPU | Required Clock Cycles | Executed Instructions | Average CPI | |
|:---------------------|----------------------:|----------------------:|:-----------:| |
| `rv32i` | 6 984 305 325 | 1 468 927 290 | 4.75 | |
| `rv32im` | 3 415 761 325 | 601 565 734 | 5.67 | |
| `rv32imc` | 3 398 881 094 | 601 565 832 | 5.65 | |
| `rv32imc` + FAST_MUL | 2 835 121 094 | 601 565 846 | 4.71 | |
| `rv32i` | 6 955 817 507 | 1 468 927 290 | 4.73 | |
| `rv32im` | 3 376 961 507 | 601 565 750 | 5.61 | |
| `rv32imc` | 3 274 832 513 | 601 565 964 | 5.44 | |
| `rv32imc` + FAST_MUL | 2 711 072 513 | 601 566 024 | 4.51 | |
|
The _FAST_MUL_ configuration uses DSPs for the multiplier of the `M` extensions (enabled via the `FAST_MUL_EN` generic). |
The _FAST_MUL_ configuration uses DSPs for the multiplier of the `M` extension (enabled via the `FAST_MUL_EN` generic). |
|
|
## Top Entities |