URL
https://opencores.org/ocsvn/neorv32/neorv32/trunk
Subversion Repositories neorv32
Compare Revisions
- This comparison shows the changes necessary to convert path
/neorv32/trunk
- from Rev 37 to Rev 38
- ↔ Reverse comparison
Rev 37 → Rev 38
/.ci/sw_check.sh
20,7 → 20,7
make -C $test_app_dir check |
|
# Generate executables for all example projects |
make -C $srcdir_examples MARCH=-march=rv32imc clean_all exe |
make -C $srcdir_examples clean_all exe |
|
# Compile and install bootloader |
make -C $srcdir_bootloader clean_all info bootloader |
28,7 → 28,7
# Compile and install test application |
# Redirect UART TX to text.iosimulation_output via <UART_SIM_MODE> user flag |
echo "Compiling and installing test application" |
make -C $test_app_dir clean_all USER_FLAGS+=-DUART_SIM_MODE MARCH=-march=rv32imc info all |
make -C $test_app_dir clean_all USER_FLAGS+=-DRUN_CPUTEST USER_FLAGS+=-DUART_SIM_MODE MARCH=-march=rv32imc info all |
|
# Verification reference string |
touch $homedir/check_reference.out |
/docs/figures/neorv32_logo_smcard.jpg
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
docs/figures/neorv32_logo_smcard.jpg
Property changes :
Added: svn:mime-type
## -0,0 +1 ##
+application/octet-stream
\ No newline at end of property
Index: docs/NEORV32.pdf
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
Index: rtl/core/neorv32_cpu.vhd
===================================================================
--- rtl/core/neorv32_cpu.vhd (revision 37)
+++ rtl/core/neorv32_cpu.vhd (revision 38)
@@ -10,6 +10,8 @@
-- # * neorv32_cpu_decompressor.vhd - Compressed instructions decoder #
-- # * neorv32_cpu_regfile.vhd - Data register file #
-- # #
+-- # * neorv32_package.vhd - Main CPU/processor package file #
+-- # #
-- # Check out the processor's data sheet for more information: docs/NEORV32.pdf #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License #
@@ -159,6 +161,8 @@
assert not ((PMP_NUM_REGIONS > pmp_max_r_c) and (PMP_USE = true)) report "NEORV32 CPU CONFIG ERROR! Number of PMP regions out of valid range." severity error;
-- PMP granulartiy --
assert not (((PMP_GRANULARITY < 1) or (PMP_GRANULARITY > 32)) and (PMP_USE = true)) report "NEORV32 CPU CONFIG ERROR! Invalid PMP granulartiy (0 < PMP_GRANULARITY < 33)." severity error;
+ -- Instruction prefetch buffer size --
+ assert not (is_power_of_two_f(ipb_entries_c) = false) report "NEORV32 CPU CONFIG ERROR! Number of entries in instruction prefetch buffer has to be a power of two." severity error;
-- Control Unit ---------------------------------------------------------------------------
@@ -289,7 +293,7 @@
if (CPU_EXTENSION_RISCV_M = true) generate
neorv32_cpu_cp_muldiv_inst: neorv32_cpu_cp_muldiv
generic map (
- FAST_MUL_EN => FAST_MUL_EN -- use DSPs for faster multiplication
+ FAST_MUL_EN => FAST_MUL_EN -- use DSPs for faster multiplication
)
port map (
-- global control --
@@ -313,7 +317,7 @@
end generate;
- -- Co-Processor 1: Not implemented yet ----------------------------------------------------
+ -- Co-Processor 1: Not implemented (yet) --------------------------------------------------
-- -------------------------------------------------------------------------------------------
-- control: ctrl cp1_start
-- inputs: rs1 rs2 alu_cmp alu_opb
@@ -321,7 +325,7 @@
cp1_valid <= '0';
- -- Co-Processor 2: Not implemented yet ----------------------------------------------------
+ -- Co-Processor 2: Not implemented (yet) --------------------------------------------------
-- -------------------------------------------------------------------------------------------
-- control: ctrl cp2_start
-- inputs: rs1 rs2 alu_cmp alu_opb
@@ -329,7 +333,7 @@
cp2_valid <= '0';
- -- Co-Processor 3: Not implemented yet ----------------------------------------------------
+ -- Co-Processor 3: Not implemented (yet) --------------------------------------------------
-- -------------------------------------------------------------------------------------------
-- control: ctrl cp3_start
-- inputs: rs1 rs2 alu_cmp alu_opb
@@ -350,6 +354,7 @@
port map (
-- global control --
clk_i => clk_i, -- global clock, rising edge
+ rstn_i => rstn_i, -- global reset, low-active, async
ctrl_i => ctrl, -- main control bus
-- cpu instruction fetch interface --
fetch_pc_i => fetch_pc, -- PC for instruction fetch
/rtl/core/neorv32_cpu_bus.vhd
52,6 → 52,7
port ( |
-- global control -- |
clk_i : in std_ulogic; -- global clock, rising edge |
rstn_i : in std_ulogic := '0'; -- global reset, low-active, async |
ctrl_i : in std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus |
-- cpu instruction fetch interface -- |
fetch_pc_i : in std_ulogic_vector(data_width_c-1 downto 0); -- PC for instruction fetch |
289,9 → 290,14
|
-- Instruction Fetch Arbiter -------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
ifetch_arbiter: process(clk_i) |
ifetch_arbiter: process(rstn_i, clk_i) |
begin |
if rising_edge(clk_i) then |
if (rstn_i = '0') then |
i_arbiter.rd_req <= '0'; |
i_arbiter.err_align <= '0'; |
i_arbiter.err_bus <= '0'; |
i_arbiter.timeout <= (others => '0'); |
elsif rising_edge(clk_i) then |
-- instruction fetch request -- |
if (i_arbiter.rd_req = '0') then -- idle |
i_arbiter.rd_req <= ctrl_i(ctrl_bus_if_c); |
333,9 → 339,15
|
-- Data Access Arbiter -------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
data_access_arbiter: process(clk_i) |
data_access_arbiter: process(rstn_i, clk_i) |
begin |
if rising_edge(clk_i) then |
if (rstn_i = '0') then |
d_arbiter.wr_req <= '0'; |
d_arbiter.rd_req <= '0'; |
d_arbiter.err_align <= '0'; |
d_arbiter.err_bus <= '0'; |
d_arbiter.timeout <= (others => '0'); |
elsif rising_edge(clk_i) then |
-- data access request -- |
if (d_arbiter.wr_req = '0') and (d_arbiter.rd_req = '0') then -- idle |
d_arbiter.wr_req <= ctrl_i(ctrl_bus_wr_c); |
/rtl/core/neorv32_cpu_control.vhd
521,11 → 521,11
|
-- Immediate Generator -------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
imm_gen: process(clk_i) |
imm_gen: process(execute_engine.i_reg, clk_i) |
variable opcode_v : std_ulogic_vector(6 downto 0); |
begin |
opcode_v := execute_engine.i_reg(instr_opcode_msb_c downto instr_opcode_lsb_c+2) & "11"; |
if rising_edge(clk_i) then |
opcode_v := execute_engine.i_reg(instr_opcode_msb_c downto instr_opcode_lsb_c+2) & "11"; |
case opcode_v is -- save some bits here, LSBs are always 11 for rv32 |
when opcode_store_c => -- S-immediate |
imm_o(31 downto 11) <= (others => execute_engine.i_reg(31)); -- sign extension |
1233,7 → 1233,7
|
|
-- **************************************************************************************************************************** |
-- Exception and Interrupt Control |
-- Exception and Interrupt (= Trap) Control |
-- **************************************************************************************************************************** |
|
|
1305,7 → 1305,7
trap_ctrl.cause_nxt <= (others => '0'); |
trap_ctrl.irq_ack_nxt <= (others => '0'); |
|
-- the following traps are caused by asynchronous exceptions (-> interrupts) |
-- the following traps are caused by *asynchronous* exceptions (= interrupts) |
-- here we do need a specific acknowledge mask since several sources can trigger at once |
|
-- interrupt: 1.11 machine external interrupt -- |
1345,45 → 1345,45
trap_ctrl.irq_ack_nxt(interrupt_firq_3_c) <= '1'; |
|
|
-- the following traps are caused by synchronous exceptions |
-- the following traps are caused by *synchronous* exceptions (= classic exceptions) |
-- here we do not need a specific acknowledge mask since only one exception (the one |
-- with highest priority) can trigger at once |
-- with highest priority) is evaluated at once |
|
-- trap/fault: 0.1 instruction access fault -- |
-- exception: 0.1 instruction access fault -- |
elsif (trap_ctrl.exc_buf(exception_iaccess_c) = '1') then |
trap_ctrl.cause_nxt <= trap_iba_c; |
|
-- trap/fault: 0.2 illegal instruction -- |
-- exception: 0.2 illegal instruction -- |
elsif (trap_ctrl.exc_buf(exception_iillegal_c) = '1') then |
trap_ctrl.cause_nxt <= trap_iil_c; |
|
-- trap/fault: 0.0 instruction address misaligned -- |
-- exception: 0.0 instruction address misaligned -- |
elsif (trap_ctrl.exc_buf(exception_ialign_c) = '1') then |
trap_ctrl.cause_nxt <= trap_ima_c; |
|
|
-- trap/fault: 0.11 environment call from M-mode -- |
-- exception: 0.11 environment call from M-mode -- |
elsif (trap_ctrl.exc_buf(exception_m_envcall_c) = '1') then |
trap_ctrl.cause_nxt <= trap_menv_c; |
|
-- trap/fault: 0.3 breakpoint -- |
-- exception: 0.3 breakpoint -- |
elsif (trap_ctrl.exc_buf(exception_break_c) = '1') then |
trap_ctrl.cause_nxt <= trap_brk_c; |
|
|
-- trap/fault: 0.6 store address misaligned - |
-- exception: 0.6 store address misaligned - |
elsif (trap_ctrl.exc_buf(exception_salign_c) = '1') then |
trap_ctrl.cause_nxt <= trap_sma_c; |
|
-- trap/fault: 0.4 load address misaligned -- |
-- exception: 0.4 load address misaligned -- |
elsif (trap_ctrl.exc_buf(exception_lalign_c) = '1') then |
trap_ctrl.cause_nxt <= trap_lma_c; |
|
-- trap/fault: 0.7 store access fault -- |
-- exception: 0.7 store access fault -- |
elsif (trap_ctrl.exc_buf(exception_saccess_c) = '1') then |
trap_ctrl.cause_nxt <= trap_sbe_c; |
|
-- trap/fault: 0.5 load access fault -- |
-- exception: 0.5 load access fault -- |
elsif (trap_ctrl.exc_buf(exception_laccess_c) = '1') then |
trap_ctrl.cause_nxt <= trap_lbe_c; |
|
1407,11 → 1407,11
-- CSR operand source -- |
if (execute_engine.i_reg(instr_funct3_msb_c) = '1') then -- immediate |
csr_operand_v := (others => '0'); |
csr_operand_v(4 downto 0) := execute_engine.i_reg(19 downto 15); |
csr_operand_v(4 downto 0) := execute_engine.i_reg(19 downto 15); -- uimm5 |
else -- register |
csr_operand_v := rs1_i; |
end if; |
-- "mini ALU" for CSR update operations -- |
-- tiny ALU for CSR access operations -- |
case execute_engine.i_reg(instr_funct3_lsb_c+1 downto instr_funct3_lsb_c) is |
when "10" => csr.wdata <= csr.rdata or csr_operand_v; -- CSRRS(I) |
when "11" => csr.wdata <= csr.rdata and (not csr_operand_v); -- CSRRC(I) |
1680,7 → 1680,9
csr.rdata(00) <= '0'; -- A CPU extension |
csr.rdata(01) <= '0'; -- B CPU extension |
csr.rdata(02) <= bool_to_ulogic_f(CPU_EXTENSION_RISCV_C); -- C CPU extension |
csr.rdata(03) <= '0'; -- D CPU extension |
csr.rdata(04) <= bool_to_ulogic_f(CPU_EXTENSION_RISCV_E); -- E CPU extension |
csr.rdata(05) <= '0'; -- F CPU extension |
csr.rdata(08) <= not bool_to_ulogic_f(CPU_EXTENSION_RISCV_E); -- I CPU extension (if not E) |
csr.rdata(12) <= bool_to_ulogic_f(CPU_EXTENSION_RISCV_M); -- M CPU extension |
csr.rdata(20) <= bool_to_ulogic_f(CPU_EXTENSION_RISCV_U); -- U CPU extension |
/rtl/core/neorv32_imem.vhd
57,7 → 57,6
rden_i : in std_ulogic; -- read enable |
wren_i : in std_ulogic; -- write enable |
ben_i : in std_ulogic_vector(03 downto 0); -- byte write enable |
upen_i : in std_ulogic; -- update enable |
addr_i : in std_ulogic_vector(31 downto 0); -- address |
data_i : in std_ulogic_vector(31 downto 0); -- data in |
data_o : out std_ulogic_vector(31 downto 0); -- data out |
167,7 → 166,7
rdata(31 downto 24) <= imem_file_rom_hh(to_integer(unsigned(addr))); |
|
elsif (BOOTLOADER_USE = true) then -- implement IMEM as non-initialized RAM |
if (wren_i = '1') and (upen_i = '1') then |
if (wren_i = '1') then |
if (ben_i(0) = '1') then |
imem_file_ram_ll(to_integer(unsigned(addr))) <= data_i(07 downto 00); |
end if; |
187,7 → 186,7
rdata(31 downto 24) <= imem_file_ram_hh(to_integer(unsigned(addr))); |
|
else -- implement IMEM as PRE-INITIALIZED RAM |
if (wren_i = '1') and (upen_i = '1') then |
if (wren_i = '1') then |
if (ben_i(0) = '1') then |
imem_file_init_ram_ll(to_integer(unsigned(addr))) <= data_i(07 downto 00); |
end if; |
/rtl/core/neorv32_package.vhd
42,7 → 42,7
-- ------------------------------------------------------------------------------------------- |
constant ispace_base_c : std_ulogic_vector(31 downto 0) := x"00000000"; -- default instruction memory address space base address |
constant dspace_base_c : std_ulogic_vector(31 downto 0) := x"80000000"; -- default data memory address space base address |
constant bus_timeout_c : natural := 127; -- cycles after which a valid bus access will timeout and trigger an access exception |
constant bus_timeout_c : natural := 127; -- cycles after which an *unacknwoledged* bus access will timeout and trigger an access exception |
constant wb_pipe_mode_c : boolean := false; -- false: classic/standard wishbone mode, true: pipelined wishbone mode |
constant ipb_entries_c : natural := 2; -- entries in instruction prefetch buffer, must be a power of 2, default=2 |
constant rf_r0_is_reg_c : boolean := true; -- reg_file.r0 is a physical register that has to be initialized to zero by the CPU HW |
50,7 → 50,7
-- Architecture Constants ----------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
constant data_width_c : natural := 32; -- data width - do not change! |
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01040700"; -- no touchy! |
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01040702"; -- no touchy! |
constant pmp_max_r_c : natural := 8; -- max PMP regions - FIXED! |
constant archid_c : natural := 19; -- official NEORV32 architecture ID - hands off! |
|
762,6 → 762,7
port ( |
-- global control -- |
clk_i : in std_ulogic; -- global clock, rising edge |
rstn_i : in std_ulogic := '0'; -- global reset, low-active, async |
ctrl_i : in std_ulogic_vector(ctrl_width_c-1 downto 0); -- main control bus |
-- cpu instruction fetch interface -- |
fetch_pc_i : in std_ulogic_vector(data_width_c-1 downto 0); -- PC for instruction fetch |
880,7 → 881,6
rden_i : in std_ulogic; -- read enable |
wren_i : in std_ulogic; -- write enable |
ben_i : in std_ulogic_vector(03 downto 0); -- byte write enable |
upen_i : in std_ulogic; -- update enable |
addr_i : in std_ulogic_vector(31 downto 0); -- address |
data_i : in std_ulogic_vector(31 downto 0); -- data in |
data_o : out std_ulogic_vector(31 downto 0); -- data out |
1361,8 → 1361,10
-- ------------------------------------------------------------------------------------------- |
function is_power_of_two_f(input : natural) return boolean is |
begin |
if ((input / 2) /= 0) and ((input mod 2) = 0) then |
if (input = 1) then -- 2^0 |
return true; |
elsif ((input / 2) /= 0) and ((input mod 2) = 0) then |
return true; |
else |
return false; |
end if; |
/rtl/core/neorv32_top.vhd
463,7 → 463,6
rden_i => p_bus.re, -- read enable |
wren_i => p_bus.we, -- write enable |
ben_i => p_bus.ben, -- byte write enable |
upen_i => '1', -- update enable |
addr_i => p_bus.addr, -- address |
data_i => p_bus.wdata, -- data in |
data_o => imem_rdata, -- data out |
/rtl/core/neorv32_wishbone.vhd
101,7 → 101,7
signal wb_access : std_ulogic; |
|
-- bus arbiter |
type ctrl_state_t is (IDLE, BUSY, CANCELED); |
type ctrl_state_t is (IDLE, BUSY, CANCELED, RESYNC); |
type ctrl_t is record |
state : ctrl_state_t; |
state_prev : ctrl_state_t; |
204,7 → 204,7
ctrl.state <= IDLE; |
end if; |
|
when CANCELED => -- |
when CANCELED => -- wait for cycle to be completed either by peripheral or by timeout (ignore result of transfer) |
-- ------------------------------------------------------------ |
ctrl.wr_req <= ctrl.wr_req or wren_i; -- buffer new request |
ctrl.rd_req <= ctrl.rd_req or rden_i; -- buffer new request |
212,6 → 212,12
-- or wait for a timeout and force termination |
ctrl.timeout <= std_ulogic_vector(unsigned(ctrl.timeout) - 1); -- timeout counter |
if (wb_ack_i = '1') or (or_all_f(ctrl.timeout) = '0') then |
ctrl.state <= RESYNC; |
end if; |
|
when RESYNC => -- make sure transfer is done! |
-- ------------------------------------------------------------ |
if (wb_ack_i = '0') then |
ctrl.state <= IDLE; |
end if; |
|
241,7 → 247,7
wb_cyc_o <= cyc_int; |
|
stb_int <= '1' when ((ctrl.state = BUSY) and (ctrl.state_prev = IDLE)) else '0'; |
cyc_int <= '0' when (ctrl.state = IDLE) else '1'; |
cyc_int <= '0' when ((ctrl.state = IDLE) or (ctrl.state = RESYNC)) else '1'; |
|
|
end neorv32_wishbone_rtl; |
/rtl/fpga_specific/lattice_ice40up/neorv32_imem.ice40up_spram.vhd
56,7 → 56,6
rden_i : in std_ulogic; -- read enable |
wren_i : in std_ulogic; -- write enable |
ben_i : in std_ulogic_vector(03 downto 0); -- byte write enable |
upen_i : in std_ulogic; -- update enable |
addr_i : in std_ulogic_vector(31 downto 0); -- address |
data_i : in std_ulogic_vector(31 downto 0); -- data in |
data_o : out std_ulogic_vector(31 downto 0); -- data out |
136,7 → 135,7
spram_addr <= std_logic_vector(addr_i(13+2 downto 0+2)); |
spram_di_lo <= std_logic_vector(data_i(15 downto 00)); |
spram_di_hi <= std_logic_vector(data_i(31 downto 16)); |
spram_we <= '1' when ((acc_en and upen_i and wren_i) = '1') else '0'; -- global write enable |
spram_we <= '1' when ((acc_en and wren_i) = '1') else '0'; -- global write enable |
spram_cs <= std_logic(mem_cs); |
spram_be_lo <= std_logic(ben_i(1)) & std_logic(ben_i(1)) & std_logic(ben_i(0)) & std_logic(ben_i(0)); -- low byte write enable |
spram_be_hi <= std_logic(ben_i(3)) & std_logic(ben_i(3)) & std_logic(ben_i(2)) & std_logic(ben_i(2)); -- high byte write enable |
/sim/vivado/neorv32_tb_behav.wcfg
12,15 → 12,15
</db_ref> |
</db_ref_list> |
<zoom_setting> |
<ZoomStartTime time="953250fs"></ZoomStartTime> |
<ZoomEndTime time="1057351fs"></ZoomEndTime> |
<Cursor1Time time="997350fs"></Cursor1Time> |
<ZoomStartTime time="5295000fs"></ZoomStartTime> |
<ZoomEndTime time="5412501fs"></ZoomEndTime> |
<Cursor1Time time="5715000fs"></Cursor1Time> |
</zoom_setting> |
<column_width_setting> |
<NameColumnWidth column_width="203"></NameColumnWidth> |
<ValueColumnWidth column_width="78"></ValueColumnWidth> |
<ValueColumnWidth column_width="111"></ValueColumnWidth> |
</column_width_setting> |
<WVObjectSize size="111" /> |
<WVObjectSize size="108" /> |
<wvobject type="divider" fp_name="divider273"> |
<obj_property name="label">CPU: Control.FETCH</obj_property> |
<obj_property name="DisplayName">label</obj_property> |
70,56 → 70,6
<obj_property name="ElementShortName">ipb</obj_property> |
<obj_property name="ObjectShortName">ipb</obj_property> |
<obj_property name="isExpanded"></obj_property> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.wdata" type="array"> |
<obj_property name="ElementShortName">.wdata[33:0]</obj_property> |
<obj_property name="ObjectShortName">.wdata[33:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.we" type="logic"> |
<obj_property name="ElementShortName">.we</obj_property> |
<obj_property name="ObjectShortName">.we</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.free" type="logic"> |
<obj_property name="ElementShortName">.free</obj_property> |
<obj_property name="ObjectShortName">.free</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.clear" type="logic"> |
<obj_property name="ElementShortName">.clear</obj_property> |
<obj_property name="ObjectShortName">.clear</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.rdata" type="array"> |
<obj_property name="ElementShortName">.rdata[33:0]</obj_property> |
<obj_property name="ObjectShortName">.rdata[33:0]</obj_property> |
<obj_property name="CustomSignalColor">#FFFFFF</obj_property> |
<obj_property name="UseCustomSignalColor">true</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.re" type="logic"> |
<obj_property name="ElementShortName">.re</obj_property> |
<obj_property name="ObjectShortName">.re</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.avail" type="logic"> |
<obj_property name="ElementShortName">.avail</obj_property> |
<obj_property name="ObjectShortName">.avail</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.w_pnt" type="array"> |
<obj_property name="ElementShortName">.w_pnt[1:0]</obj_property> |
<obj_property name="ObjectShortName">.w_pnt[1:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.r_pnt" type="array"> |
<obj_property name="ElementShortName">.r_pnt[1:0]</obj_property> |
<obj_property name="ObjectShortName">.r_pnt[1:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.empty" type="logic"> |
<obj_property name="ElementShortName">.empty</obj_property> |
<obj_property name="ObjectShortName">.empty</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.full" type="logic"> |
<obj_property name="ElementShortName">.full</obj_property> |
<obj_property name="ObjectShortName">.full</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ipb.data" type="array"> |
<obj_property name="ElementShortName">.data[0:1][33:0]</obj_property> |
<obj_property name="ObjectShortName">.data[0:1][33:0]</obj_property> |
</wvobject> |
</wvobject> |
<wvobject type="divider" fp_name="divider273"> |
<obj_property name="label">CPU: Control.ISSUE</obj_property> |
142,45 → 92,6
<obj_property name="ElementShortName">ci_illegal</obj_property> |
<obj_property name="ObjectShortName">ci_illegal</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf" type="array"> |
<obj_property name="ElementShortName">i_buf</obj_property> |
<obj_property name="ObjectShortName">i_buf</obj_property> |
<obj_property name="isExpanded"></obj_property> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.wdata" type="array"> |
<obj_property name="ElementShortName">.wdata[35:0]</obj_property> |
<obj_property name="ObjectShortName">.wdata[35:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.rdata" type="array"> |
<obj_property name="ElementShortName">.rdata[35:0]</obj_property> |
<obj_property name="ObjectShortName">.rdata[35:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.status" type="logic"> |
<obj_property name="ElementShortName">.status</obj_property> |
<obj_property name="ObjectShortName">.status</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.clear" type="logic"> |
<obj_property name="ElementShortName">.clear</obj_property> |
<obj_property name="ObjectShortName">.clear</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.we" type="logic"> |
<obj_property name="ElementShortName">.we</obj_property> |
<obj_property name="ObjectShortName">.we</obj_property> |
<obj_property name="CustomSignalColor">#FFFFFF</obj_property> |
<obj_property name="UseCustomSignalColor">true</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.re" type="logic"> |
<obj_property name="ElementShortName">.re</obj_property> |
<obj_property name="ObjectShortName">.re</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.free" type="logic"> |
<obj_property name="ElementShortName">.free</obj_property> |
<obj_property name="ObjectShortName">.free</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/i_buf.avail" type="logic"> |
<obj_property name="ElementShortName">.avail</obj_property> |
<obj_property name="ObjectShortName">.avail</obj_property> |
</wvobject> |
</wvobject> |
<wvobject type="divider" fp_name="divider273"> |
<obj_property name="label">CPU: Control.EXECUTE</obj_property> |
<obj_property name="DisplayName">label</obj_property> |
206,8 → 117,8
<obj_property name="ObjectShortName">be_store_i</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ctrl_o" type="array"> |
<obj_property name="ElementShortName">ctrl_o[45:0]</obj_property> |
<obj_property name="ObjectShortName">ctrl_o[45:0]</obj_property> |
<obj_property name="ElementShortName">ctrl_o[59:0]</obj_property> |
<obj_property name="ObjectShortName">ctrl_o[59:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/ci_instr32" type="array"> |
<obj_property name="ElementShortName">ci_instr32[31:0]</obj_property> |
261,6 → 172,10
<obj_property name="ElementShortName">.i_reg_nxt[31:0]</obj_property> |
<obj_property name="ObjectShortName">.i_reg_nxt[31:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.i_reg_last" type="array"> |
<obj_property name="ElementShortName">.i_reg_last[31:0]</obj_property> |
<obj_property name="ObjectShortName">.i_reg_last[31:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.is_ci" type="logic"> |
<obj_property name="ElementShortName">.is_ci</obj_property> |
<obj_property name="ObjectShortName">.is_ci</obj_property> |
292,12 → 207,12
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.pc" type="array"> |
<obj_property name="ElementShortName">.pc[31:0]</obj_property> |
<obj_property name="ObjectShortName">.pc[31:0]</obj_property> |
<obj_property name="CustomSignalColor">#FFFFFF</obj_property> |
<obj_property name="UseCustomSignalColor">true</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.pc_nxt" type="array"> |
<obj_property name="ElementShortName">.pc_nxt[31:0]</obj_property> |
<obj_property name="ObjectShortName">.pc_nxt[31:0]</obj_property> |
<obj_property name="CustomSignalColor">#FFFFFF</obj_property> |
<obj_property name="UseCustomSignalColor">true</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_control_inst/execute_engine.next_pc" type="array"> |
<obj_property name="ElementShortName">.next_pc[31:0]</obj_property> |
406,10 → 321,6
<obj_property name="label">CPU: BUS_UNIT</obj_property> |
<obj_property name="DisplayName">label</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_bus_inst/rstn_i" type="logic"> |
<obj_property name="ElementShortName">rstn_i</obj_property> |
<obj_property name="ObjectShortName">rstn_i</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_bus_inst/clk_i" type="logic"> |
<obj_property name="ElementShortName">clk_i</obj_property> |
<obj_property name="ObjectShortName">clk_i</obj_property> |
500,10 → 411,6
<obj_property name="ElementShortName">pmp_ctrl_i[0:7][7:0]</obj_property> |
<obj_property name="ObjectShortName">pmp_ctrl_i[0:7][7:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_bus_inst/priv_mode_i" type="array"> |
<obj_property name="ElementShortName">priv_mode_i[1:0]</obj_property> |
<obj_property name="ObjectShortName">priv_mode_i[1:0]</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cpu_inst/neorv32_cpu_bus_inst/if_pmp_fault" type="logic"> |
<obj_property name="ElementShortName">if_pmp_fault</obj_property> |
<obj_property name="ObjectShortName">if_pmp_fault</obj_property> |
584,6 → 491,10
<obj_property name="label">EXT_MEM Interface</obj_property> |
<obj_property name="DisplayName">label</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_wishbone_inst_true/neorv32_wishbone_inst/ctrl" type="array"> |
<obj_property name="ElementShortName">ctrl</obj_property> |
<obj_property name="ObjectShortName">ctrl</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/wb_adr_o" type="array"> |
<obj_property name="ElementShortName">wb_adr_o[31:0]</obj_property> |
<obj_property name="ObjectShortName">wb_adr_o[31:0]</obj_property> |
629,23 → 540,20
<obj_property name="ObjectShortName">fencei_o</obj_property> |
</wvobject> |
<wvobject type="divider" fp_name="divider238"> |
<obj_property name="label">IO: CFU</obj_property> |
<obj_property name="label">Testbench memory busses</obj_property> |
<obj_property name="DisplayName">label</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cfu_inst_true/neorv32_cfu_inst/wr_en" type="logic"> |
<obj_property name="ElementShortName">wr_en</obj_property> |
<obj_property name="ObjectShortName">wr_en</obj_property> |
<wvobject fp_name="/neorv32_tb/wb_cpu" type="array"> |
<obj_property name="ElementShortName">wb_cpu</obj_property> |
<obj_property name="ObjectShortName">wb_cpu</obj_property> |
<obj_property name="isExpanded"></obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cfu_inst_true/neorv32_cfu_inst/rd_en" type="logic"> |
<obj_property name="ElementShortName">rd_en</obj_property> |
<obj_property name="ObjectShortName">rd_en</obj_property> |
<wvobject fp_name="/neorv32_tb/wb_mem_a" type="array"> |
<obj_property name="ElementShortName">wb_mem_a</obj_property> |
<obj_property name="ObjectShortName">wb_mem_a</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cfu_inst_true/neorv32_cfu_inst/cfu_reg_in" type="array"> |
<obj_property name="ElementShortName">cfu_reg_in[0:3][31:0]</obj_property> |
<obj_property name="ObjectShortName">cfu_reg_in[0:3][31:0]</obj_property> |
<wvobject fp_name="/neorv32_tb/wb_mem_b" type="array"> |
<obj_property name="ElementShortName">wb_mem_b</obj_property> |
<obj_property name="ObjectShortName">wb_mem_b</obj_property> |
</wvobject> |
<wvobject fp_name="/neorv32_tb/neorv32_top_inst/neorv32_cfu_inst_true/neorv32_cfu_inst/cfu_reg_out" type="array"> |
<obj_property name="ElementShortName">cfu_reg_out[0:3][31:0]</obj_property> |
<obj_property name="ObjectShortName">cfu_reg_out[0:3][31:0]</obj_property> |
</wvobject> |
</wave_config> |
/sim/neorv32_tb.vhd
3,9 → 3,14
-- # ********************************************************************************************* # |
-- # This testbench provides a virtual UART receiver connected to the processor's uart_txd_o # |
-- # signal. The received chars are shown in the simulator console and also written to a file # |
-- # ("neorv32.testbench_uart.out"). Futhermore, this testbench provides a simple RAM connected # |
-- # to the external Wishbone bus. The testbench configures the processor with all optional # |
-- # elements enabled by default. # |
-- # ("neorv32.testbench_uart.out"). # |
-- # # |
-- # Furthermore, this testbench provides two external memories (ext_mem_a and ext_mem_b) coupled # |
-- # via Wishbone. ext_mem_a is initialized with the application_init_image and can be used as # |
-- # external boot memory (external IMEM). # |
-- # ext_mem_b is a small uninitialized memory that can be uased as external memory-mapped IO. # |
-- # # |
-- # Use the "User Configuration" section to configure the testbench according to your need. # |
-- # ********************************************************************************************* # |
-- # BSD 3-Clause License # |
-- # # |
55,22 → 60,31
|
-- User Configuration --------------------------------------------------------------------- |
-- ------------------------------------------------------------------------------------------- |
constant t_clock_c : time := 10 ns; -- main clock period |
constant f_clock_c : real := 100000000.0; -- main clock in Hz |
constant f_clock_nat_c : natural := 100000000; -- main clock in Hz |
constant baud_rate_c : real := 19200.0; -- standard UART baudrate |
-- |
constant wb_mem_base_addr_c : std_ulogic_vector(31 downto 0) := x"F0000000"; -- wishbone memory base address |
constant wb_mem_size_c : natural := 256; -- wishbone memory size in bytes |
constant wb_mem_latency_c : natural := 8; -- latency in clock cycles (min 1) |
-- general -- |
constant boot_external_c : boolean := false; -- false: boot from proc-internal IMEM, true: boot from (initialized) simulated ext. mem A |
constant imem_size_c : natural := 16*1024; -- size in bytes of processor-internal IMEM / external mem A |
constant f_clock_c : natural := 100000000; -- main clock in Hz |
-- UART -- |
constant baud_rate_c : natural := 19200; -- standard UART baudrate |
-- simulated external Wishbone memory A (can be used as external IMEM) -- |
constant ext_mem_a_base_addr_c : std_ulogic_vector(31 downto 0) := x"00000000"; -- wishbone memory base address (IMEM base) |
constant ext_mem_a_size_c : natural := imem_size_c; -- wishbone memory size in bytes |
constant ext_mem_a_latency_c : natural := 8; -- latency in clock cycles (min 1, max 255), plus 1 cycle initiali delay |
-- simulated external Wishbone memory B (can be used as external IO) -- |
constant ext_mem_b_base_addr_c : std_ulogic_vector(31 downto 0) := x"F0000000"; -- wishbone memory base address (default begin of EXTERNAL IO area) |
constant ext_mem_b_size_c : natural := 64; -- wishbone memory size in bytes |
constant ext_mem_b_latency_c : natural := 3; -- latency in clock cycles (min 1, max 255), plus 1 cycle initiali delay |
-- ------------------------------------------------------------------------------------------- |
|
-- internals - hands off! -- |
constant boot_imem_c : boolean := not boot_external_c; |
|
-- text.io -- |
file file_uart_tx_out : text open write_mode is "neorv32.testbench_uart.out"; |
|
-- internal configuration -- |
constant baud_val_c : real := f_clock_c / baud_rate_c; |
constant f_clk_c : natural := natural(f_clock_c); |
constant baud_val_c : real := real(f_clock_c) / real(baud_rate_c); |
constant t_clock_c : time := (1 sec) / f_clock_c; |
|
-- generators -- |
signal clk_gen, rst_gen : std_ulogic := '0'; |
105,16 → 119,17
err : std_ulogic; -- transfer error |
tag : std_ulogic_vector(2 downto 0); -- tag |
end record; |
signal wb_cpu : wishbone_t; |
signal wb_cpu, wb_mem_a, wb_mem_b : wishbone_t; |
|
-- Wishbone memory -- |
type wb_mem_ram_t is array (0 to wb_mem_size_c/4-1) of std_ulogic_vector(31 downto 0); |
type wb_mem_read_latency_t is array (0 to wb_mem_latency_c-1) of std_ulogic_vector(31 downto 0); |
-- Wishbone memories -- |
type ext_mem_a_ram_t is array (0 to ext_mem_a_size_c/4-1) of std_ulogic_vector(31 downto 0); |
type ext_mem_b_ram_t is array (0 to ext_mem_b_size_c/4-1) of std_ulogic_vector(31 downto 0); |
type ext_mem_read_latency_t is array (0 to 255) of std_ulogic_vector(31 downto 0); |
|
-- init function -- |
-- impure function: returns NOT the same result every time it is evaluated with the same arguments since the source file might have changed |
impure function init_wbmem(init : application_init_image_t) return wb_mem_ram_t is |
variable mem_v : wb_mem_ram_t; |
impure function init_wbmem(init : application_init_image_t) return ext_mem_a_ram_t is |
variable mem_v : ext_mem_a_ram_t; |
begin |
mem_v := (others => (others => '0')); |
for i in 0 to init'length-1 loop -- init only in range of source data array |
123,25 → 138,16
return mem_v; |
end function init_wbmem; |
|
-- ---------------------------------------------- -- |
-- How to simulate a boot from an external memory -- |
-- ---------------------------------------------- -- |
-- The simulated Wishbone memory can be initialized with the compiled application init. |
-- 1. Uncomment the init_wbmem function below; this will initialize the simulated wishbone memory with the neorv32_application_image.vhd image |
-- 2. Increase the wb_mem_size_c constant above to (at least) the size of the application image (like 16kB -> 16*1024) |
-- 3. Disable the processor-internal IMEM in the processor instantiation below (MEM_INT_IMEM_USE => false) |
-- 4. Set the Wishbone memory base address wb_mem_base_addr_c (above) to zero (constant wb_mem_base_addr_c : std_ulogic_vector(31 downto 0) := x"00000000";) |
-- 5. Simulate! |
-- external memory components -- |
signal ext_ram_a : ext_mem_a_ram_t := init_wbmem(application_init_image); -- initialized, used to simulate external instruction boot memory |
signal ext_ram_b : ext_mem_b_ram_t; -- uninitialized, used to simulate external IO |
|
signal wb_ram : wb_mem_ram_t;-- := init_wbmem(application_init_image); -- uncomment if you want to init the WB ram with app image |
|
type wb_mem_t is record |
rdata : wb_mem_read_latency_t; |
type ext_mem_t is record |
rdata : ext_mem_read_latency_t; |
acc_en : std_ulogic; |
ack : std_ulogic_vector(wb_mem_latency_c-1 downto 0); |
rb_en : std_ulogic_vector(wb_mem_latency_c-1 downto 0); |
ack : std_ulogic_vector(ext_mem_a_latency_c-1 downto 0); |
end record; |
signal wb_mem : wb_mem_t; |
signal ext_mem_a, ext_mem_b : ext_mem_t; |
|
begin |
|
156,7 → 162,7
neorv32_top_inst: neorv32_top |
generic map ( |
-- General -- |
CLOCK_FREQUENCY => f_clock_nat_c, -- clock frequency of clk_i in Hz |
CLOCK_FREQUENCY => f_clock_c, -- clock frequency of clk_i in Hz |
BOOTLOADER_USE => false, -- implement processor-internal bootloader? |
USER_CODE => x"12345678", -- custom user code |
HW_THREAD_ID => x"00000000", -- hardware thread id (hartid) |
175,8 → 181,8
PMP_NUM_REGIONS => 4, -- number of regions (max 16) |
PMP_GRANULARITY => 14, -- minimal region granularity (1=8B, 2=16B, 3=32B, ...) default is 64k |
-- Internal Instruction memory -- |
MEM_INT_IMEM_USE => true, -- implement processor-internal instruction memory |
MEM_INT_IMEM_SIZE => 16*1024, -- size of processor-internal instruction memory in bytes |
MEM_INT_IMEM_USE => boot_imem_c, -- implement processor-internal instruction memory |
MEM_INT_IMEM_SIZE => imem_size_c, -- size of processor-internal instruction memory in bytes |
MEM_INT_IMEM_ROM => false, -- implement processor-internal instruction memory as ROM |
-- Internal Data memory -- |
MEM_INT_DMEM_USE => true, -- implement processor-internal data memory |
259,7 → 265,7
uart_rx_busy <= '1'; |
end if; |
else |
if (uart_rx_baud_cnt = 0.0) then |
if (uart_rx_baud_cnt <= 0.0) then |
if (uart_rx_bitcnt = 1) then |
uart_rx_baud_cnt <= round(0.5 * baud_val_c); |
else |
292,42 → 298,110
end process uart_rx_console; |
|
|
-- Wishbone Memory (simulated external memory) -------------------------------------------- |
-- Wishbone Fabric ------------------------------------------------------------------------ |
-- ------------------------------------------------------------------------------------------- |
wb_mem_ram_access: process(clk_gen) |
-- CPU broadcast signals -- |
wb_mem_a.addr <= wb_cpu.addr; |
wb_mem_b.addr <= wb_cpu.addr; |
wb_mem_a.wdata <= wb_cpu.wdata; |
wb_mem_b.wdata <= wb_cpu.wdata; |
wb_mem_a.we <= wb_cpu.we; |
wb_mem_b.we <= wb_cpu.we; |
wb_mem_a.sel <= wb_cpu.sel; |
wb_mem_b.sel <= wb_cpu.sel; |
wb_mem_a.tag <= wb_cpu.tag; |
wb_mem_b.tag <= wb_cpu.tag; |
wb_mem_a.cyc <= wb_cpu.cyc; |
wb_mem_b.cyc <= wb_cpu.cyc; |
|
-- CPU read-back signals (no mux here since peripherals have "output gates") -- |
wb_cpu.rdata <= wb_mem_a.rdata or wb_mem_b.rdata; |
wb_cpu.ack <= wb_mem_a.ack or wb_mem_b.ack; |
wb_cpu.err <= wb_mem_a.err or wb_mem_b.err; |
|
-- peripheral select via STROBE signal -- |
wb_mem_a.stb <= wb_cpu.stb when (wb_cpu.addr >= ext_mem_a_base_addr_c) and (wb_cpu.addr < std_ulogic_vector(unsigned(ext_mem_a_base_addr_c) + ext_mem_a_size_c)) else '0'; |
wb_mem_b.stb <= wb_cpu.stb when (wb_cpu.addr >= ext_mem_b_base_addr_c) and (wb_cpu.addr < std_ulogic_vector(unsigned(ext_mem_b_base_addr_c) + ext_mem_b_size_c)) else '0'; |
|
|
-- Wishbone Memory A (simulated external memory) ------------------------------------------ |
-- ------------------------------------------------------------------------------------------- |
ext_mem_a_access: process(clk_gen) |
begin |
if rising_edge(clk_gen) then |
-- control -- |
wb_mem.rb_en(0) <= wb_cpu.cyc and wb_cpu.stb and wb_mem.acc_en and (not wb_cpu.we); -- read-back control |
wb_mem.ack(0) <= wb_cpu.cyc and wb_cpu.stb and wb_mem.acc_en; -- wishbone acknowledge |
ext_mem_a.ack(0) <= wb_mem_a.cyc and wb_mem_a.stb; -- wishbone acknowledge |
|
-- write access -- |
if ((wb_cpu.cyc and wb_cpu.stb and wb_mem.acc_en and wb_cpu.we) = '1') then -- valid write access |
if ((wb_mem_a.cyc and wb_mem_a.stb and wb_mem_a.we) = '1') then -- valid write access |
for i in 0 to 3 loop |
if (wb_cpu.sel(i) = '1') then |
wb_ram(to_integer(unsigned(wb_cpu.addr(index_size_f(wb_mem_size_c/4)+1 downto 2))))(7+i*8 downto 0+i*8) <= wb_cpu.wdata(7+i*8 downto 0+i*8); |
if (wb_mem_a.sel(i) = '1') then |
ext_ram_a(to_integer(unsigned(wb_mem_a.addr(index_size_f(ext_mem_a_size_c/4)+1 downto 2))))(7+i*8 downto 0+i*8) <= wb_mem_a.wdata(7+i*8 downto 0+i*8); |
end if; |
end loop; -- i |
end if; |
|
-- read access -- |
wb_mem.rdata(0) <= wb_ram(to_integer(unsigned(wb_cpu.addr(index_size_f(wb_mem_size_c/4)+1 downto 2)))); -- word aligned |
ext_mem_a.rdata(0) <= ext_ram_a(to_integer(unsigned(wb_mem_a.addr(index_size_f(ext_mem_a_size_c/4)+1 downto 2)))); -- word aligned |
-- virtual read and ack latency -- |
if (wb_mem_latency_c > 1) then |
for i in 1 to wb_mem_latency_c-1 loop |
wb_mem.rdata(i) <= wb_mem.rdata(i-1); |
wb_mem.rb_en(i) <= wb_mem.rb_en(i-1) and wb_cpu.cyc; |
wb_mem.ack(i) <= wb_mem.ack(i-1) and wb_cpu.cyc; |
if (ext_mem_a_latency_c > 1) then |
for i in 1 to ext_mem_a_latency_c-1 loop |
ext_mem_a.rdata(i) <= ext_mem_a.rdata(i-1); |
ext_mem_a.ack(i) <= ext_mem_a.ack(i-1) and wb_mem_a.cyc; |
end loop; |
end if; |
|
-- bus output register -- |
wb_mem_a.err <= '0'; |
if (ext_mem_a.ack(ext_mem_a_latency_c-1) = '1') and (wb_mem_b.cyc = '1') then |
wb_mem_a.rdata <= ext_mem_a.rdata(ext_mem_a_latency_c-1); |
wb_mem_a.ack <= '1'; |
else |
wb_mem_a.rdata <= (others => '0'); |
wb_mem_a.ack <= '0'; |
end if; |
end if; |
end process wb_mem_ram_access; |
end process ext_mem_a_access; |
|
-- wishbone memory access? -- |
wb_mem.acc_en <= '1' when (wb_cpu.addr >= wb_mem_base_addr_c) and (wb_cpu.addr < std_ulogic_vector(unsigned(wb_mem_base_addr_c) + wb_mem_size_c)) else '0'; |
|
-- output to cpu -- |
wb_cpu.rdata <= wb_mem.rdata(wb_mem_latency_c-1) when (wb_mem.rb_en(wb_mem_latency_c-1) = '1') else (others=> '0'); -- data output gate |
wb_cpu.ack <= wb_mem.ack(wb_mem_latency_c-1); |
wb_cpu.err <= '0'; |
-- Wishbone Memory B (simulated external memory) ------------------------------------------ |
-- ------------------------------------------------------------------------------------------- |
ext_mem_b_access: process(clk_gen) |
begin |
if rising_edge(clk_gen) then |
-- control -- |
ext_mem_b.ack(0) <= wb_mem_b.cyc and wb_mem_b.stb; -- wishbone acknowledge |
|
-- write access -- |
if ((wb_mem_b.cyc and wb_mem_b.stb and wb_mem_b.we) = '1') then -- valid write access |
for i in 0 to 3 loop |
if (wb_mem_b.sel(i) = '1') then |
ext_ram_b(to_integer(unsigned(wb_mem_b.addr(index_size_f(ext_mem_b_size_c/4)+1 downto 2))))(7+i*8 downto 0+i*8) <= wb_mem_b.wdata(7+i*8 downto 0+i*8); |
end if; |
end loop; -- i |
end if; |
|
-- read access -- |
ext_mem_b.rdata(0) <= ext_ram_b(to_integer(unsigned(wb_mem_b.addr(index_size_f(ext_mem_b_size_c/4)+1 downto 2)))); -- word aligned |
-- virtual read and ack latency -- |
if (ext_mem_b_latency_c > 1) then |
for i in 1 to ext_mem_b_latency_c-1 loop |
ext_mem_b.rdata(i) <= ext_mem_b.rdata(i-1); |
ext_mem_b.ack(i) <= ext_mem_b.ack(i-1) and wb_mem_b.cyc; |
end loop; |
end if; |
|
-- bus output register -- |
wb_mem_b.err <= '0'; |
if (ext_mem_b.ack(ext_mem_b_latency_c-1) = '1') and (wb_mem_b.cyc = '1') then |
wb_mem_b.rdata <= ext_mem_b.rdata(ext_mem_b_latency_c-1); |
wb_mem_b.ack <= '1'; |
else |
wb_mem_b.rdata <= (others => '0'); |
wb_mem_b.ack <= '0'; |
end if; |
end if; |
end process ext_mem_b_access; |
|
|
end neorv32_tb_rtl; |
/sw/example/coremark/README.md
5,392 → 5,74
|
For a more compute-intensive version of CoreMark that uses larger datasets and execution loops taken from common applications, please check out EEMBC's [CoreMark-PRO](https://www.github.com/eembc/coremark-pro) benchmark, also on GitHub. |
|
# Building and Running |
|
To build and run the benchmark, type |
This project folder is a port of CoreMark (from the official [GitHub repository](https://github.com/eembc/coremark)) for the NEORV32 processor. |
|
`> make` |
# Building |
|
Full results are available in the files `run1.log` and `run2.log`. CoreMark result can be found in `run1.log`. |
|
## Cross Compiling |
To build the executable (`neorv32_exe.bin`) of the benchmark, type: |
|
For cross compile platforms please adjust `core_portme.mak`, `core_portme.h` (and possibly `core_portme.c`) according to the specific platform used. When porting to a new platform, it is recommended to copy one of the default port folders (e.g. `mkdir <platform> && cp linux/* <platform>`), adjust the porting files, and run: |
~~~ |
% make PORT_DIR=<platform> |
~~~ |
`> make USER_FLAGS+=-DRUN_COREMARK clean_all exe` |
|
## Make Targets |
`run` - Default target, creates `run1.log` and `run2.log`. |
`run1.log` - Run the benchmark with performance parameters, and output to `run1.log` |
`run2.log` - Run the benchmark with validation parameters, and output to `run2.log` |
`run3.log` - Run the benchmark with profile generation parameters, and output to `run3.log` |
`compile` - compile the benchmark executable |
`link` - link the benchmark executable |
`check` - test MD5 of sources that may not be modified |
`clean` - clean temporary files |
Make sure to define `RUN_COREMARK` *when invoking* `make` (via `USER_FLAGS+=-DRUN_COREMARK`). |
|
### Make flag: `ITERATIONS` |
By default, the benchmark will run between 10-100 seconds. To override, use `ITERATIONS=N` |
~~~ |
% make ITERATIONS=10 |
~~~ |
Will run the benchmark for 10 iterations. It is recommended to set a specific number of iterations in certain situations e.g.: |
To build the executable for a certain CPU configuration and a certain optimization level of the benchmark, type (`rv32imc` and `O3` in this example): |
|
* Running with a simulator |
* Measuring power/energy |
* Timing cannot be restarted |
`> make USER_FLAGS+=-DRUN_COREMARK MARCH=-march=rv32imc EFFORT=-O3 clean_all exe` |
|
Minimum required run time: **Results are only valid for reporting if the benchmark ran for at least 10 secs!** |
|
### Make flag: `XCFLAGS` |
To add compiler flags from the command line, use `XCFLAGS` e.g.: |
# Running |
|
~~~ |
% make XCFLAGS="-g -DMULTITHREAD=4 -DUSE_FORK=1" |
~~~ |
Upload the generated executable `neorv32_exe.bin` via the bootloader ('u' command) and execute it ('e' command): |
|
### Make flag: `CORE_DEBUG` |
``` |
<< NEORV32 Bootloader >> |
|
Define to compile for a debug run if you get incorrect CRC. |
BLDV: Nov 7 2020 |
HWV: 0x01040700 |
CLK: 0x05F5E100 Hz |
USER: 0x10000DE0 |
MISA: 0x40901104 |
PROC: 0x007F0015 |
IMEM: 0x00008000 bytes @ 0x00000000 |
DMEM: 0x00008000 bytes @ 0x80000000 |
|
~~~ |
% make XCFLAGS="-DCORE_DEBUG=1" |
~~~ |
Autoboot in 8s. Press key to abort. |
Aborted. |
|
### Make flag: `REBUILD` |
Available CMDs: |
h: Help |
r: Restart |
u: Upload |
s: Store to flash |
l: Load from flash |
e: Execute |
CMD:> u |
Awaiting neorv32_exe.bin... OK |
CMD:> e |
Booting... |
|
Force a rebuild of the executable. |
NEORV32: Processor running at 100000000 Hz |
NEORV32: Executing coremark (2000 iterations). This may take some time... |
|
## Systems Without `make` |
The following files need to be compiled: |
* `core_list_join.c` |
* `core_main.c` |
* `core_matrix.c` |
* `core_state.c` |
* `core_util.c` |
* `PORT_DIR/core_portme.c` |
2K performance run parameters for coremark. |
CoreMark Size : 666 |
Total ticks : 3036959876 |
Total time (secs): 30 |
Iterations/Sec : 66 |
Iterations : 2000 |
Compiler version : GCC10.1.0 |
Compiler flags : -> default, see makefile |
Memory location : STACK |
seedcrc : 0xe9f5 |
[0]crclist : 0xe714 |
[0]crcmatrix : 0x1fd7 |
[0]crcstate : 0x8e3a |
[0]crcfinal : 0x4983 |
Correct operation validated. See README.md for run and reporting rules. |
|
For example: |
~~~ |
% gcc -O2 -o coremark.exe core_list_join.c core_main.c core_matrix.c core_state.c core_util.c simple/core_portme.c -DPERFORMANCE_RUN=1 -DITERATIONS=1000 |
% ./coremark.exe > run1.log |
~~~ |
The above will compile the benchmark for a performance run and 1000 iterations. Output is redirected to `run1.log`. |
NEORV32: All reported numbers only show the integer results. |
|
# Parallel Execution |
Use `XCFLAGS=-DMULTITHREAD=N` where N is number of threads to run in parallel. Several implementations are available to execute in multiple contexts, or you can implement your own in `core_portme.c`. |
|
~~~ |
% make XCFLAGS="-DMULTITHREAD=4 -DUSE_PTHREAD" |
~~~ |
|
Above will compile the benchmark for execution on 4 cores, using POSIX Threads API. |
|
# Run Parameters for the Benchmark Executable |
CoreMark's executable takes several parameters as follows (but only if `main()` accepts arguments): |
1st - A seed value used for initialization of data. |
2nd - A seed value used for initialization of data. |
3rd - A seed value used for initialization of data. |
4th - Number of iterations (0 for auto : default value) |
5th - Reserved for internal use. |
6th - Reserved for internal use. |
7th - For malloc users only, ovreride the size of the input data buffer. |
|
The run target from make will run coremark with 2 different data initialization seeds. |
|
## Alternative parameters: |
If not using `malloc` or command line arguments are not supported, the buffer size |
for the algorithms must be defined via the compiler define `TOTAL_DATA_SIZE`. |
`TOTAL_DATA_SIZE` must be set to 2000 bytes (default) for standard runs. |
The default for such a target when testing different configurations could be: |
|
~~~ |
% make XCFLAGS="-DTOTAL_DATA_SIZE=6000 -DMAIN_HAS_NOARGC=1" |
~~~ |
|
# Submitting Results |
|
CoreMark results can be submitted on the web. Open a web browser and go to the [submission page](https://www.eembc.org/coremark/submit.php). After registering an account you may enter a score. |
|
# Run Rules |
What is and is not allowed. |
|
## Required |
1. The benchmark needs to run for at least 10 seconds. |
2. All validation must succeed for seeds `0,0,0x66` and `0x3415,0x3415,0x66`, buffer size of 2000 bytes total. |
* If not using command line arguments to main: |
~~~ |
% make XCFLAGS="-DPERFORMANCE_RUN=1" REBUILD=1 run1.log |
% make XCFLAGS="-DVALIDATION_RUN=1" REBUILD=1 run2.log |
~~~ |
3. If using profile guided optimization, profile must be generated using seeds of `8,8,8`, and buffer size of 1200 bytes total. |
~~~ |
% make XCFLAGS="-DTOTAL_DATA_SIZE=1200 -DPROFILE_RUN=1" REBUILD=1 run3.log |
~~~ |
4. All source files must be compiled with the same flags. |
5. All data type sizes must match size in bits such that: |
* `ee_u8` is an unsigned 8-bit datatype. |
* `ee_s16` is a signed 16-bit datatype. |
* `ee_u16` is an unsigned 16-bit datatype. |
* `ee_s32` is a signed 32-bit datatype. |
* `ee_u32` is an unsigned 32-bit datatype. |
|
## Allowed |
|
1. Changing number of iterations |
2. Changing toolchain and build/load/run options |
3. Changing method of acquiring a data memory block |
5. Changing the method of acquiring seed values |
6. Changing implementation `in core_portme.c` |
7. Changing configuration values in `core_portme.h` |
8. Changing `core_portme.mak` |
|
## NOT ALLOWED |
1. Changing of source file other then `core_portme*` (use `make check` to validate) |
|
# Reporting rules |
Use the following syntax to report results on a data sheet: |
|
CoreMark 1.0 : N / C [/ P] [/ M] |
|
N - Number of iterations per second with seeds 0,0,0x66,size=2000) |
|
C - Compiler version and flags |
|
P - Parameters such as data and code allocation specifics |
|
* This parameter *may* be omitted if all data was allocated on the heap in RAM. |
* This parameter *may not* be omitted when reporting CoreMark/MHz |
|
M - Type of parallel execution (if used) and number of contexts |
* This parameter may be omitted if parallel execution was not used. |
|
e.g.: |
|
~~~ |
CoreMark 1.0 : 128 / GCC 4.1.2 -O2 -fprofile-use / Heap in TCRAM / FORK:2 |
~~~ |
or |
~~~ |
CoreMark 1.0 : 1400 / GCC 3.4 -O4 |
~~~ |
|
If reporting scaling results, the results must be reported as follows: |
|
CoreMark/MHz 1.0 : N / C / P [/ M] |
|
P - When reporting scaling results, memory parameter must also indicate memory frequency:core frequency ratio. |
1. If the core has cache and cache frequency to core frequency ratio is configurable, that must also be included. |
|
e.g.: |
|
~~~ |
CoreMark/MHz 1.0 : 1.47 / GCC 4.1.2 -O2 / DDR3(Heap) 30:1 Memory 1:1 Cache |
~~~ |
|
# Log File Format |
The log files have the following format |
|
~~~ |
2K performance run parameters for coremark. (Run type) |
CoreMark Size : 666 (Buffer size) |
Total ticks : 25875 (platform dependent value) |
Total time (secs) : 25.875000 (actual time in seconds) |
Iterations/Sec : 3864.734300 (Performance value to report) |
Iterations : 100000 (number of iterations used) |
Compiler version : GCC3.4.4 (Compiler and version) |
Compiler flags : -O2 (Compiler and linker flags) |
Memory location : Code in flash, data in on chip RAM |
seedcrc : 0xe9f5 (identifier for the input seeds) |
[0]crclist : 0xe714 (validation for list part) |
[0]crcmatrix : 0x1fd7 (validation for matrix part) |
[0]crcstate : 0x8e3a (validation for state part) |
[0]crcfinal : 0x33ff (iteration dependent output) |
Correct operation validated. See README.md for run and reporting rules. (*Only when run is successful*) |
CoreMark 1.0 : 3864.734300 / GCC3.4.4 -O2 / Heap (*Only on a successful performance run*) |
~~~ |
|
# Theory of Operation |
|
This section describes the initial goals of CoreMark and their implementation. |
|
## Small and easy to understand |
|
* X number of source code lines for timed portion of the benchmark. |
* Meaningful names for variables and functions. |
* Comments for each block of code more than 10 lines long. |
|
## Portability |
|
A thin abstraction layer will be provided for I/O and timing in a separate file. All I/O and timing of the benchmark will be done through this layer. |
|
### Code / data size |
|
* Compile with gcc on x86 and make sure all sizes are according to requirements. |
* If dynamic memory allocation is used, take total memory allocated into account as well. |
* Avoid recursive functions and keep track of stack usage. |
* Use the same memory block as data site for all algorithms, and initialize the data before each algorithm – while this means that initialization with data happens during the timed portion, it will only happen once during the timed portion and so have negligible effect on the results. |
|
## Controlled output |
|
This may be the most difficult goal. Compilers are constantly improving and getting better at analyzing code. To create work that cannot be computed at compile time and must be computed at run time, we will rely on two assumptions: |
|
* Some system functions (e.g. time, scanf) and parameters cannot be computed at compile time. In most cases, marking a variable volatile means the compiler is force to read this variable every time it is read. This will be used to introduce a factor into the input that cannot be precomputed at compile time. Since the results are input dependent, that will make sure that computation has to happen at run time. |
|
* Either a system function or I/O (e.g. scanf) or command line parameters or volatile variables will be used before the timed portion to generate data which is not available at compile time. Specific method used is not relevant as long as it can be controlled, and that it cannot be computed or eliminated by the compiler at compile time. E.g. if the clock() functions is a compiler stub, it may not be used. The derived values will be reported on the output so that verification can be done on a different machine. |
|
* We cannot rely on command line parameters since some embedded systems do not have the capability to provide command line parameters. All 3 methods above will be implemented (time based, scanf and command line parameters) and all 3 are valid if the compiler cannot determine the value at compile time. |
|
* It is important to note that The actual values that are to be supplied at run time will be standardized. The methodology is not intended to provide random data, but simply to provide controlled data that cannot be precomputed at compile time. |
|
* Printed results must be valid at run time. This will be used to make sure the computation has been executed. |
|
* Some embedded systems do not provide “printf” or other I/O functionality. All I/O will be done through a thin abstraction interface to allow execution on such systems (e.g. allow output via JTAG). |
|
## Key Algorithms |
|
### Linked List |
|
The following linked list structure will be used: |
|
~~~ |
typedef struct list_data_s { |
ee_s16 data16; |
ee_s16 idx; |
} list_data; |
|
typedef struct list_head_s { |
struct list_head_s *next; |
struct list_data_s *info; |
} list_head; |
~~~ |
|
While adding a level of indirection accessing the data, this structure is realistic and used in many embedded applications for small to medium lists. |
|
The list itself will be initialized on a block of memory that will be passed in to the initialization function. While in general linked lists use malloc for new nodes, embedded applications sometime control the memory for small data structures such as arrays and lists directly to avoid the overhead of system calls, so this approach is realistic. |
|
The linked list will be initialized such that 1/4 of the list pointers point to sequential areas in memory, and 3/4 of the list pointers are distributed in a non sequential manner. This is done to emulate a linked list that had add/remove happen for a while disrupting the neat order, and then a series of adds that are likely to come from sequential memory locations. |
|
For the benchmark itself: |
- Multiple find operations are going to be performed. These find operations may result in the whole list being traversed. The result of each find will become part of the output chain. |
- The list will be sorted using merge sort based on the data16 value, and then derive CRC of the data16 item in order for part of the list. The CRC will become part of the output chain. |
- The list will be sorted again using merge sort based on the idx value. This sort will guarantee that the list is returned to the primary state before leaving the function, so that multiple iterations of the function will have the same result. CRC of the data16 for part of the list will again be calculated and become part of the output chain. |
|
The actual `data16` in each cell will be pseudo random based on a single 16b input that cannot be determined at compile time. In addition, the part of the list which is used for CRC will also be passed to the function, and determined based on an input that cannot be determined at run time. |
|
### Matrix Multiply |
|
This very simple algorithm forms the basis of many more complex algorithms. The tight inner loop is the focus of many optimizations (compiler as well as hardware based) and is thus relevant for embedded processing. |
|
The total available data space will be divided to 3 parts: |
1. NxN matrix A. |
2. NxN matrix B. |
3. NxN matrix C. |
|
E.g. for 2K we will have 3 12x12 matrices (assuming data type of 32b 12(len)*12(wid)*4(size)*3(num) =1728 bytes). |
|
Matrix A will be initialized with small values (upper 3/4 of the bits all zero). |
Matrix B will be initialized with medium values (upper half of the bits all zero). |
Matrix C will be used for the result. |
|
For the benchmark itself: |
- Multiple A by a constant into C, add the upper bits of each of the values in the result matrix. The result will become part of the output chain. |
- Multiple A by column X of B into C, add the upper bits of each of the values in the result matrix. The result will become part of the output chain. |
- Multiple A by B into C, add the upper bits of each of the values in the result matrix. The result will become part of the output chain. |
|
The actual values for A and B must be derived based on input that is not available at compile time. |
|
### State Machine |
|
This part of the code needs to exercise switch and if statements. As such, we will use a small Moore state machine. In particular, this will be a state machine that identifies string input as numbers and divides them according to format. |
|
The state machine will parse the input string until either a “,” separator or end of input is encountered. An invalid number will cause the state machine to return invalid state and a valid number will cause the state machine to return with type of number format (int/float/scientific). |
|
This code will perform a realistic task, be small enough to easily understand, and exercise the required functionality. The other option used in embedded systems is a mealy based state machine, which is driven by a table. The table then determines the number of states and complexity of transitions. This approach, however, tests mainly the load/store and function call mechanisms and less the handling of branches. If analysis of the final results shows that the load/store functionality of the processor is not exercised thoroughly, it may be a good addition to the benchmark (codesize allowing). |
|
For input, the memory block will be initialized with comma separated values of mixed formats, as well as invalid inputs. |
|
For the benchmark itself: |
- Invoke the state machine on all of the input and count final states and state transitions. CRC of all final states and transitions will become part of the output chain. |
- Modify the input at intervals (inject errors) and repeat the state machine operation. |
- Modify the input back to original form. |
|
The actual input must be initialized based on data that cannot be determined at compile time. In addition the intervals for modification of the input and the actual modification must be based on input that cannot be determined at compile time. |
|
# Validation |
|
This release was tested on the following platforms: |
* x86 cygwin and gcc 3.4 (Quad, dual and single core systems) |
* x86 linux (Ubuntu/Fedora) and gcc (4.2/4.1) (Quad and single core systems) |
* MIPS64 BE linux and gcc 3.4 16 cores system |
* MIPS32 BE linux with CodeSourcery compiler 4.2-177 on Malta/Linux with a 1004K 3-core system |
* PPC simulator with gcc 4.2.2 (No OS) |
* PPC 64b BE linux (yellowdog) with gcc 3.4 and 4.1 (Dual core system) |
* BF533 with VDSP50 |
* Renesas R8C/H8 MCU with HEW 4.05 |
* NXP LPC1700 armcc v4.0.0.524 |
* NEC 78K with IAR v4.61 |
* ARM simulator with armcc v4 |
|
# Memory Analysis |
|
Valgrind 3.4.0 used and no errors reported. |
|
# Balance Analysis |
|
Number of instructions executed for each function tested with cachegrind and found balanced with gcc and -O0. |
|
# Statistics |
|
Lines: |
~~~ |
Lines Blank Cmnts Source AESL |
===== ===== ===== ===== ========== ======================================= |
469 66 170 251 627.5 core_list_join.c (C) |
330 18 54 268 670.0 core_main.c (C) |
256 32 80 146 365.0 core_matrix.c (C) |
240 16 51 186 465.0 core_state.c (C) |
165 11 20 134 335.0 core_util.c (C) |
150 23 36 98 245.0 coremark.h (C) |
1610 166 411 1083 2707.5 ----- Benchmark ----- (6 files) |
293 15 74 212 530.0 linux/core_portme.c (C) |
235 30 104 104 260.0 linux/core_portme.h (C) |
528 45 178 316 790.0 ----- Porting ----- (2 files) |
|
* For comparison, here are the stats for Dhrystone |
Lines Blank Cmnts Source AESL |
===== ===== ===== ===== ========== ======================================= |
311 15 242 54 135.0 dhry.h (C) |
789 132 119 553 1382.5 dhry_1.c (C) |
186 26 68 107 267.5 dhry_2.c (C) |
1286 173 429 714 1785.0 ----- C ----- (3 files) |
~~~ |
|
# Credits |
Many thanks to all of the individuals who helped with the development or testing of CoreMark including (Sorted by company name; note that company names may no longer be accurate as this was written in 2009). |
* Alan Anderson, ADI |
* Adhikary Rajiv, ADI |
* Elena Stohr, ARM |
* Ian Rickards, ARM |
* Andrew Pickard, ARM |
* Trent Parker, CAVIUM |
* Shay Gal-On, EEMBC |
* Markus Levy, EEMBC |
* Peter Torelli, EEMBC |
* Ron Olson, IBM |
* Eyal Barzilay, MIPS |
* Jens Eltze, NEC |
* Hirohiko Ono, NEC |
* Ulrich Drees, NEC |
* Frank Roscheda, NEC |
* Rob Cosaro, NXP |
* Shumpei Kawasaki, RENESAS |
|
# Legal |
Please refer to LICENSE.md in this reposity for a description of your rights to use this code. |
|
# Copyright |
Copyright © 2009 EEMBC All rights reserved. |
CoreMark is a trademark of EEMBC and EEMBC is a registered trademark of the Embedded Microprocessor Benchmark Consortium. |
|
NEORV32: Executed instructions 0x00000000_24b8576e |
NEORV32: CoreMark core clock cycles 0x00000000_b5045484 |
NEORV32: Average CPI (integer part only): 4 cycles/instruction |
``` |
/sw/example/coremark/core_list_join.c
19,431 → 19,511
#include "coremark.h" |
/* |
Topic: Description |
Benchmark using a linked list. |
Benchmark using a linked list. |
|
Linked list is a common data structure used in many applications. |
|
For our purposes, this will excercise the memory units of the processor. |
In particular, usage of the list pointers to find and alter data. |
|
We are not using Malloc since some platforms do not support this library. |
|
Instead, the memory block being passed in is used to create a list, |
and the benchmark takes care not to add more items then can be |
accomodated by the memory block. The porting layer will make sure |
that we have a valid memory block. |
|
All operations are done in place, without using any extra memory. |
|
The list itself contains list pointers and pointers to data items. |
Data items contain the following: |
|
idx - An index that captures the initial order of the list. |
data - Variable data initialized based on the input parameters. The 16b are divided as follows: |
o Upper 8b are backup of original data. |
o Bit 7 indicates if the lower 7 bits are to be used as is or calculated. |
o Bits 0-2 indicate type of operation to perform to get a 7b value. |
o Bits 3-6 provide input for the operation. |
|
Linked list is a common data structure used in many applications. |
|
For our purposes, this will excercise the memory units of the processor. |
In particular, usage of the list pointers to find and alter data. |
|
We are not using Malloc since some platforms do not support this |
library. |
|
Instead, the memory block being passed in is used to create a list, |
and the benchmark takes care not to add more items then can be |
accomodated by the memory block. The porting layer will make sure |
that we have a valid memory block. |
|
All operations are done in place, without using any extra memory. |
|
The list itself contains list pointers and pointers to data items. |
Data items contain the following: |
|
idx - An index that captures the initial order of the list. |
data - Variable data initialized based on the input parameters. The 16b |
are divided as follows: o Upper 8b are backup of original data. o Bit 7 |
indicates if the lower 7 bits are to be used as is or calculated. o Bits 0-2 |
indicate type of operation to perform to get a 7b value. o Bits 3-6 provide |
input for the operation. |
|
*/ |
|
/* local functions */ |
|
list_head *core_list_find(list_head *list,list_data *info); |
list_head *core_list_find(list_head *list, list_data *info); |
list_head *core_list_reverse(list_head *list); |
list_head *core_list_remove(list_head *item); |
list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified); |
list_head *core_list_insert_new(list_head *insert_point |
, list_data *info, list_head **memblock, list_data **datablock |
, list_head *memblock_end, list_data *datablock_end); |
typedef ee_s32(*list_cmp)(list_data *a, list_data *b, core_results *res); |
list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res); |
list_head *core_list_undo_remove(list_head *item_removed, |
list_head *item_modified); |
list_head *core_list_insert_new(list_head * insert_point, |
list_data * info, |
list_head **memblock, |
list_data **datablock, |
list_head * memblock_end, |
list_data * datablock_end); |
typedef ee_s32 (*list_cmp)(list_data *a, list_data *b, core_results *res); |
list_head *core_list_mergesort(list_head * list, |
list_cmp cmp, |
core_results *res); |
|
ee_s16 calc_func(ee_s16 *pdata, core_results *res) { |
ee_s16 data=*pdata; |
ee_s16 retval; |
ee_u8 optype=(data>>7) & 1; /* bit 7 indicates if the function result has been cached */ |
if (optype) /* if cached, use cache */ |
return (data & 0x007f); |
else { /* otherwise calculate and cache the result */ |
ee_s16 flag=data & 0x7; /* bits 0-2 is type of function to perform */ |
ee_s16 dtype=((data>>3) & 0xf); /* bits 3-6 is specific data for the operation */ |
dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */ |
switch (flag) { |
case 0: |
if (dtype<0x22) /* set min period for bit corruption */ |
dtype=0x22; |
retval=core_bench_state(res->size,res->memblock[3],res->seed1,res->seed2,dtype,res->crc); |
if (res->crcstate==0) |
res->crcstate=retval; |
break; |
case 1: |
retval=core_bench_matrix(&(res->mat),dtype,res->crc); |
if (res->crcmatrix==0) |
res->crcmatrix=retval; |
break; |
default: |
retval=data; |
break; |
} |
res->crc=crcu16(retval,res->crc); |
retval &= 0x007f; |
*pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */ |
return retval; |
} |
ee_s16 |
calc_func(ee_s16 *pdata, core_results *res) |
{ |
ee_s16 data = *pdata; |
ee_s16 retval; |
ee_u8 optype |
= (data >> 7) |
& 1; /* bit 7 indicates if the function result has been cached */ |
if (optype) /* if cached, use cache */ |
return (data & 0x007f); |
else |
{ /* otherwise calculate and cache the result */ |
ee_s16 flag = data & 0x7; /* bits 0-2 is type of function to perform */ |
ee_s16 dtype |
= ((data >> 3) |
& 0xf); /* bits 3-6 is specific data for the operation */ |
dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */ |
switch (flag) |
{ |
case 0: |
if (dtype < 0x22) /* set min period for bit corruption */ |
dtype = 0x22; |
retval = core_bench_state(res->size, |
res->memblock[3], |
res->seed1, |
res->seed2, |
dtype, |
res->crc); |
if (res->crcstate == 0) |
res->crcstate = retval; |
break; |
case 1: |
retval = core_bench_matrix(&(res->mat), dtype, res->crc); |
if (res->crcmatrix == 0) |
res->crcmatrix = retval; |
break; |
default: |
retval = data; |
break; |
} |
res->crc = crcu16(retval, res->crc); |
retval &= 0x007f; |
*pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */ |
return retval; |
} |
} |
/* Function: cmp_complex |
Compare the data item in a list cell. |
Compare the data item in a list cell. |
|
Can be used by mergesort. |
Can be used by mergesort. |
*/ |
ee_s32 cmp_complex(list_data *a, list_data *b, core_results *res) { |
ee_s16 val1=calc_func(&(a->data16),res); |
ee_s16 val2=calc_func(&(b->data16),res); |
return val1 - val2; |
ee_s32 |
cmp_complex(list_data *a, list_data *b, core_results *res) |
{ |
ee_s16 val1 = calc_func(&(a->data16), res); |
ee_s16 val2 = calc_func(&(b->data16), res); |
return val1 - val2; |
} |
|
/* Function: cmp_idx |
Compare the idx item in a list cell, and regen the data. |
Compare the idx item in a list cell, and regen the data. |
|
Can be used by mergesort. |
Can be used by mergesort. |
*/ |
ee_s32 cmp_idx(list_data *a, list_data *b, core_results *res) { |
if (res==NULL) { |
a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16>>8)); |
b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16>>8)); |
} |
return a->idx - b->idx; |
ee_s32 |
cmp_idx(list_data *a, list_data *b, core_results *res) |
{ |
if (res == NULL) |
{ |
a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16 >> 8)); |
b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16 >> 8)); |
} |
return a->idx - b->idx; |
} |
|
void copy_info(list_data *to,list_data *from) { |
to->data16=from->data16; |
to->idx=from->idx; |
void |
copy_info(list_data *to, list_data *from) |
{ |
to->data16 = from->data16; |
to->idx = from->idx; |
} |
|
/* Benchmark for linked list: |
- Try to find multiple data items. |
- List sort |
- Operate on data from list (crc) |
- Single remove/reinsert |
* At the end of this function, the list is back to original state |
- Try to find multiple data items. |
- List sort |
- Operate on data from list (crc) |
- Single remove/reinsert |
* At the end of this function, the list is back to original state |
*/ |
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) { |
ee_u16 retval=0; |
ee_u16 found=0,missed=0; |
list_head *list=res->list; |
ee_s16 find_num=res->seed3; |
list_head *this_find; |
list_head *finder, *remover; |
list_data info; |
ee_s16 i; |
ee_u16 |
core_bench_list(core_results *res, ee_s16 finder_idx) |
{ |
ee_u16 retval = 0; |
ee_u16 found = 0, missed = 0; |
list_head *list = res->list; |
ee_s16 find_num = res->seed3; |
list_head *this_find; |
list_head *finder, *remover; |
list_data info; |
ee_s16 i; |
|
info.idx=finder_idx; |
/* find <find_num> values in the list, and change the list each time (reverse and cache if value found) */ |
for (i=0; i<find_num; i++) { |
info.data16= (i & 0xff) ; |
this_find=core_list_find(list,&info); |
list=core_list_reverse(list); |
if (this_find==NULL) { |
missed++; |
retval+=(list->next->info->data16 >> 8) & 1; |
} |
else { |
found++; |
if (this_find->info->data16 & 0x1) /* use found value */ |
retval+=(this_find->info->data16 >> 9) & 1; |
/* and cache next item at the head of the list (if any) */ |
if (this_find->next != NULL) { |
finder = this_find->next; |
this_find->next = finder->next; |
finder->next=list->next; |
list->next=finder; |
} |
} |
if (info.idx>=0) |
info.idx++; |
info.idx = finder_idx; |
/* find <find_num> values in the list, and change the list each time |
* (reverse and cache if value found) */ |
for (i = 0; i < find_num; i++) |
{ |
info.data16 = (i & 0xff); |
this_find = core_list_find(list, &info); |
list = core_list_reverse(list); |
if (this_find == NULL) |
{ |
missed++; |
retval += (list->next->info->data16 >> 8) & 1; |
} |
else |
{ |
found++; |
if (this_find->info->data16 & 0x1) /* use found value */ |
retval += (this_find->info->data16 >> 9) & 1; |
/* and cache next item at the head of the list (if any) */ |
if (this_find->next != NULL) |
{ |
finder = this_find->next; |
this_find->next = finder->next; |
finder->next = list->next; |
list->next = finder; |
} |
} |
if (info.idx >= 0) |
info.idx++; |
#if CORE_DEBUG |
ee_printf("List find %d: [%d,%d,%d]\n",i,retval,missed,found); |
ee_printf("List find %d: [%d,%d,%d]\n", i, retval, missed, found); |
#endif |
} |
retval+=found*4-missed; |
/* sort the list by data content and remove one item*/ |
if (finder_idx>0) |
list=core_list_mergesort(list,cmp_complex,res); |
remover=core_list_remove(list->next); |
/* CRC data content of list from location of index N forward, and then undo remove */ |
finder=core_list_find(list,&info); |
if (!finder) |
finder=list->next; |
while (finder) { |
retval=crc16(list->info->data16,retval); |
finder=finder->next; |
} |
} |
retval += found * 4 - missed; |
/* sort the list by data content and remove one item*/ |
if (finder_idx > 0) |
list = core_list_mergesort(list, cmp_complex, res); |
remover = core_list_remove(list->next); |
/* CRC data content of list from location of index N forward, and then undo |
* remove */ |
finder = core_list_find(list, &info); |
if (!finder) |
finder = list->next; |
while (finder) |
{ |
retval = crc16(list->info->data16, retval); |
finder = finder->next; |
} |
#if CORE_DEBUG |
ee_printf("List sort 1: %04x\n",retval); |
ee_printf("List sort 1: %04x\n", retval); |
#endif |
remover=core_list_undo_remove(remover,list->next); |
/* sort the list by index, in effect returning the list to original state */ |
list=core_list_mergesort(list,cmp_idx,NULL); |
/* CRC data content of list */ |
finder=list->next; |
while (finder) { |
retval=crc16(list->info->data16,retval); |
finder=finder->next; |
} |
remover = core_list_undo_remove(remover, list->next); |
/* sort the list by index, in effect returning the list to original state */ |
list = core_list_mergesort(list, cmp_idx, NULL); |
/* CRC data content of list */ |
finder = list->next; |
while (finder) |
{ |
retval = crc16(list->info->data16, retval); |
finder = finder->next; |
} |
#if CORE_DEBUG |
ee_printf("List sort 2: %04x\n",retval); |
ee_printf("List sort 2: %04x\n", retval); |
#endif |
return retval; |
return retval; |
} |
/* Function: core_list_init |
Initialize list with data. |
Initialize list with data. |
|
Parameters: |
blksize - Size of memory to be initialized. |
memblock - Pointer to memory block. |
seed - Actual values chosen depend on the seed parameter. |
The seed parameter MUST be supplied from a source that cannot be determined at compile time |
Parameters: |
blksize - Size of memory to be initialized. |
memblock - Pointer to memory block. |
seed - Actual values chosen depend on the seed parameter. |
The seed parameter MUST be supplied from a source that cannot be |
determined at compile time |
|
Returns: |
Pointer to the head of the list. |
Returns: |
Pointer to the head of the list. |
|
*/ |
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) { |
/* calculated pointers for the list */ |
ee_u32 per_item=16+sizeof(struct list_data_s); |
ee_u32 size=(blksize/per_item)-2; /* to accomodate systems with 64b pointers, and make sure same code is executed, set max list elements */ |
list_head *memblock_end=memblock+size; |
list_data *datablock=(list_data *)(memblock_end); |
list_data *datablock_end=datablock+size; |
/* some useful variables */ |
ee_u32 i; |
list_head *finder,*list=memblock; |
list_data info; |
list_head * |
core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) |
{ |
/* calculated pointers for the list */ |
ee_u32 per_item = 16 + sizeof(struct list_data_s); |
ee_u32 size = (blksize / per_item) |
- 2; /* to accomodate systems with 64b pointers, and make sure |
same code is executed, set max list elements */ |
list_head *memblock_end = memblock + size; |
list_data *datablock = (list_data *)(memblock_end); |
list_data *datablock_end = datablock + size; |
/* some useful variables */ |
ee_u32 i; |
list_head *finder, *list = memblock; |
list_data info; |
|
/* create a fake items for the list head and tail */ |
list->next=NULL; |
list->info=datablock; |
list->info->idx=0x0000; |
list->info->data16=(ee_s16)0x8080; |
memblock++; |
datablock++; |
info.idx=0x7fff; |
info.data16=(ee_s16)0xffff; |
core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end); |
|
/* then insert size items */ |
for (i=0; i<size; i++) { |
ee_u16 datpat=((ee_u16)(seed^i) & 0xf); |
ee_u16 dat=(datpat<<3) | (i&0x7); /* alternate between algorithms */ |
info.data16=(dat<<8) | dat; /* fill the data with actual data and upper bits with rebuild value */ |
core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end); |
} |
/* and now index the list so we know initial seed order of the list */ |
finder=list->next; |
i=1; |
while (finder->next!=NULL) { |
if (i<size/5) /* first 20% of the list in order */ |
finder->info->idx=i++; |
else { |
ee_u16 pat=(ee_u16)(i++ ^ seed); /* get a pseudo random number */ |
finder->info->idx=0x3fff & (((i & 0x07) << 8) | pat); /* make sure the mixed items end up after the ones in sequence */ |
} |
finder=finder->next; |
} |
list = core_list_mergesort(list,cmp_idx,NULL); |
/* create a fake items for the list head and tail */ |
list->next = NULL; |
list->info = datablock; |
list->info->idx = 0x0000; |
list->info->data16 = (ee_s16)0x8080; |
memblock++; |
datablock++; |
info.idx = 0x7fff; |
info.data16 = (ee_s16)0xffff; |
core_list_insert_new( |
list, &info, &memblock, &datablock, memblock_end, datablock_end); |
|
/* then insert size items */ |
for (i = 0; i < size; i++) |
{ |
ee_u16 datpat = ((ee_u16)(seed ^ i) & 0xf); |
ee_u16 dat |
= (datpat << 3) | (i & 0x7); /* alternate between algorithms */ |
info.data16 = (dat << 8) | dat; /* fill the data with actual data and |
upper bits with rebuild value */ |
core_list_insert_new( |
list, &info, &memblock, &datablock, memblock_end, datablock_end); |
} |
/* and now index the list so we know initial seed order of the list */ |
finder = list->next; |
i = 1; |
while (finder->next != NULL) |
{ |
if (i < size / 5) /* first 20% of the list in order */ |
finder->info->idx = i++; |
else |
{ |
ee_u16 pat = (ee_u16)(i++ ^ seed); /* get a pseudo random number */ |
finder->info->idx = 0x3fff |
& (((i & 0x07) << 8) |
| pat); /* make sure the mixed items end up |
after the ones in sequence */ |
} |
finder = finder->next; |
} |
list = core_list_mergesort(list, cmp_idx, NULL); |
#if CORE_DEBUG |
ee_printf("Initialized list:\n"); |
finder=list; |
while (finder) { |
ee_printf("[%04x,%04x]",finder->info->idx,(ee_u16)finder->info->data16); |
finder=finder->next; |
} |
ee_printf("\n"); |
ee_printf("Initialized list:\n"); |
finder = list; |
while (finder) |
{ |
ee_printf( |
"[%04x,%04x]", finder->info->idx, (ee_u16)finder->info->data16); |
finder = finder->next; |
} |
ee_printf("\n"); |
#endif |
return list; |
return list; |
} |
|
/* Function: core_list_insert |
Insert an item to the list |
Insert an item to the list |
|
Parameters: |
insert_point - where to insert the item. |
info - data for the cell. |
memblock - pointer for the list header |
datablock - pointer for the list data |
memblock_end - end of region for list headers |
datablock_end - end of region for list data |
Parameters: |
insert_point - where to insert the item. |
info - data for the cell. |
memblock - pointer for the list header |
datablock - pointer for the list data |
memblock_end - end of region for list headers |
datablock_end - end of region for list data |
|
Returns: |
Pointer to new item. |
Returns: |
Pointer to new item. |
*/ |
list_head *core_list_insert_new(list_head *insert_point, list_data *info, list_head **memblock, list_data **datablock |
, list_head *memblock_end, list_data *datablock_end) { |
list_head *newitem; |
|
if ((*memblock+1) >= memblock_end) |
return NULL; |
if ((*datablock+1) >= datablock_end) |
return NULL; |
|
newitem=*memblock; |
(*memblock)++; |
newitem->next=insert_point->next; |
insert_point->next=newitem; |
|
newitem->info=*datablock; |
(*datablock)++; |
copy_info(newitem->info,info); |
|
return newitem; |
list_head * |
core_list_insert_new(list_head * insert_point, |
list_data * info, |
list_head **memblock, |
list_data **datablock, |
list_head * memblock_end, |
list_data * datablock_end) |
{ |
list_head *newitem; |
|
if ((*memblock + 1) >= memblock_end) |
return NULL; |
if ((*datablock + 1) >= datablock_end) |
return NULL; |
|
newitem = *memblock; |
(*memblock)++; |
newitem->next = insert_point->next; |
insert_point->next = newitem; |
|
newitem->info = *datablock; |
(*datablock)++; |
copy_info(newitem->info, info); |
|
return newitem; |
} |
|
/* Function: core_list_remove |
Remove an item from the list. |
Remove an item from the list. |
|
Operation: |
For a singly linked list, remove by copying the data from the next item |
over to the current cell, and unlinking the next item. |
Operation: |
For a singly linked list, remove by copying the data from the next item |
over to the current cell, and unlinking the next item. |
|
Note: |
since there is always a fake item at the end of the list, no need to check for NULL. |
Note: |
since there is always a fake item at the end of the list, no need to |
check for NULL. |
|
Returns: |
Removed item. |
Returns: |
Removed item. |
*/ |
list_head *core_list_remove(list_head *item) { |
list_data *tmp; |
list_head *ret=item->next; |
/* swap data pointers */ |
tmp=item->info; |
item->info=ret->info; |
ret->info=tmp; |
/* and eliminate item */ |
item->next=item->next->next; |
ret->next=NULL; |
return ret; |
list_head * |
core_list_remove(list_head *item) |
{ |
list_data *tmp; |
list_head *ret = item->next; |
/* swap data pointers */ |
tmp = item->info; |
item->info = ret->info; |
ret->info = tmp; |
/* and eliminate item */ |
item->next = item->next->next; |
ret->next = NULL; |
return ret; |
} |
|
/* Function: core_list_undo_remove |
Undo a remove operation. |
Undo a remove operation. |
|
Operation: |
Since we want each iteration of the benchmark to be exactly the same, |
we need to be able to undo a remove. |
Link the removed item back into the list, and switch the info items. |
Operation: |
Since we want each iteration of the benchmark to be exactly the same, |
we need to be able to undo a remove. |
Link the removed item back into the list, and switch the info items. |
|
Parameters: |
item_removed - Return value from the <core_list_remove> |
item_modified - List item that was modified during <core_list_remove> |
Parameters: |
item_removed - Return value from the <core_list_remove> |
item_modified - List item that was modified during <core_list_remove> |
|
Returns: |
The item that was linked back to the list. |
|
Returns: |
The item that was linked back to the list. |
|
*/ |
list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified) { |
list_data *tmp; |
/* swap data pointers */ |
tmp=item_removed->info; |
item_removed->info=item_modified->info; |
item_modified->info=tmp; |
/* and insert item */ |
item_removed->next=item_modified->next; |
item_modified->next=item_removed; |
return item_removed; |
list_head * |
core_list_undo_remove(list_head *item_removed, list_head *item_modified) |
{ |
list_data *tmp; |
/* swap data pointers */ |
tmp = item_removed->info; |
item_removed->info = item_modified->info; |
item_modified->info = tmp; |
/* and insert item */ |
item_removed->next = item_modified->next; |
item_modified->next = item_removed; |
return item_removed; |
} |
|
/* Function: core_list_find |
Find an item in the list |
Find an item in the list |
|
Operation: |
Find an item by idx (if not 0) or specific data value |
Operation: |
Find an item by idx (if not 0) or specific data value |
|
Parameters: |
list - list head |
info - idx or data to find |
Parameters: |
list - list head |
info - idx or data to find |
|
Returns: |
Found item, or NULL if not found. |
Returns: |
Found item, or NULL if not found. |
*/ |
list_head *core_list_find(list_head *list,list_data *info) { |
if (info->idx>=0) { |
while (list && (list->info->idx != info->idx)) |
list=list->next; |
return list; |
} else { |
while (list && ((list->info->data16 & 0xff) != info->data16)) |
list=list->next; |
return list; |
} |
list_head * |
core_list_find(list_head *list, list_data *info) |
{ |
if (info->idx >= 0) |
{ |
while (list && (list->info->idx != info->idx)) |
list = list->next; |
return list; |
} |
else |
{ |
while (list && ((list->info->data16 & 0xff) != info->data16)) |
list = list->next; |
return list; |
} |
} |
/* Function: core_list_reverse |
Reverse a list |
Reverse a list |
|
Operation: |
Rearrange the pointers so the list is reversed. |
Operation: |
Rearrange the pointers so the list is reversed. |
|
Parameters: |
list - list head |
info - idx or data to find |
Parameters: |
list - list head |
info - idx or data to find |
|
Returns: |
Found item, or NULL if not found. |
Returns: |
Found item, or NULL if not found. |
*/ |
|
list_head *core_list_reverse(list_head *list) { |
list_head *next=NULL, *tmp; |
while (list) { |
tmp=list->next; |
list->next=next; |
next=list; |
list=tmp; |
} |
return next; |
list_head * |
core_list_reverse(list_head *list) |
{ |
list_head *next = NULL, *tmp; |
while (list) |
{ |
tmp = list->next; |
list->next = next; |
next = list; |
list = tmp; |
} |
return next; |
} |
/* Function: core_list_mergesort |
Sort the list in place without recursion. |
Sort the list in place without recursion. |
|
Description: |
Use mergesort, as for linked list this is a realistic solution. |
Also, since this is aimed at embedded, care was taken to use iterative rather then recursive algorithm. |
The sort can either return the list to original order (by idx) , |
or use the data item to invoke other other algorithms and change the order of the list. |
Description: |
Use mergesort, as for linked list this is a realistic solution. |
Also, since this is aimed at embedded, care was taken to use iterative |
rather then recursive algorithm. The sort can either return the list to |
original order (by idx) , or use the data item to invoke other other |
algorithms and change the order of the list. |
|
Parameters: |
list - list to be sorted. |
cmp - cmp function to use |
Parameters: |
list - list to be sorted. |
cmp - cmp function to use |
|
Returns: |
New head of the list. |
Returns: |
New head of the list. |
|
Note: |
We have a special header for the list that will always be first, |
but the algorithm could theoretically modify where the list starts. |
Note: |
We have a special header for the list that will always be first, |
but the algorithm could theoretically modify where the list starts. |
|
*/ |
list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res) { |
list_head * |
core_list_mergesort(list_head *list, list_cmp cmp, core_results *res) |
{ |
list_head *p, *q, *e, *tail; |
ee_s32 insize, nmerges, psize, qsize, i; |
ee_s32 insize, nmerges, psize, qsize, i; |
|
insize = 1; |
|
while (1) { |
p = list; |
while (1) |
{ |
p = list; |
list = NULL; |
tail = NULL; |
|
nmerges = 0; /* count number of merges we do in this pass */ |
nmerges = 0; /* count number of merges we do in this pass */ |
|
while (p) { |
nmerges++; /* there exists a merge to be done */ |
while (p) |
{ |
nmerges++; /* there exists a merge to be done */ |
/* step `insize' places along from p */ |
q = p; |
q = p; |
psize = 0; |
for (i = 0; i < insize; i++) { |
for (i = 0; i < insize; i++) |
{ |
psize++; |
q = q->next; |
if (!q) break; |
q = q->next; |
if (!q) |
break; |
} |
|
/* if q hasn't fallen off end, we have two lists to merge */ |
450,40 → 530,60
qsize = insize; |
|
/* now we have two lists; merge them */ |
while (psize > 0 || (qsize > 0 && q)) { |
while (psize > 0 || (qsize > 0 && q)) |
{ |
|
/* decide whether next element of merge comes from p or q */ |
if (psize == 0) { |
/* p is empty; e must come from q. */ |
e = q; q = q->next; qsize--; |
} else if (qsize == 0 || !q) { |
/* q is empty; e must come from p. */ |
e = p; p = p->next; psize--; |
} else if (cmp(p->info,q->info,res) <= 0) { |
/* First element of p is lower (or same); e must come from p. */ |
e = p; p = p->next; psize--; |
} else { |
/* First element of q is lower; e must come from q. */ |
e = q; q = q->next; qsize--; |
} |
/* decide whether next element of merge comes from p or q */ |
if (psize == 0) |
{ |
/* p is empty; e must come from q. */ |
e = q; |
q = q->next; |
qsize--; |
} |
else if (qsize == 0 || !q) |
{ |
/* q is empty; e must come from p. */ |
e = p; |
p = p->next; |
psize--; |
} |
else if (cmp(p->info, q->info, res) <= 0) |
{ |
/* First element of p is lower (or same); e must come from |
* p. */ |
e = p; |
p = p->next; |
psize--; |
} |
else |
{ |
/* First element of q is lower; e must come from q. */ |
e = q; |
q = q->next; |
qsize--; |
} |
|
/* add the next element to the merged list */ |
if (tail) { |
tail->next = e; |
} else { |
list = e; |
} |
tail = e; |
} |
/* add the next element to the merged list */ |
if (tail) |
{ |
tail->next = e; |
} |
else |
{ |
list = e; |
} |
tail = e; |
} |
|
/* now p has stepped `insize' places along, and q has too */ |
p = q; |
/* now p has stepped `insize' places along, and q has too */ |
p = q; |
} |
|
tail->next = NULL; |
|
tail->next = NULL; |
|
/* If we have done only one merge, we're finished. */ |
if (nmerges <= 1) /* allow for nmerges==0, the empty list case */ |
if (nmerges <= 1) /* allow for nmerges==0, the empty list case */ |
return list; |
|
/* Otherwise repeat, merging lists twice the size */ |
490,6 → 590,6
insize *= 2; |
} |
#if COMPILER_REQUIRES_SORT_RETURN |
return list; |
return list; |
#endif |
} |
/sw/example/coremark/core_main.c
17,357 → 17,426
*/ |
|
/* File: core_main.c |
This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results. |
This file contains the framework to acquire a block of memory, seed |
initial parameters, tun t he benchmark and report the results. |
*/ |
#include "coremark.h" |
|
/* Function: iterate |
Run the benchmark for a specified number of iterations. |
Run the benchmark for a specified number of iterations. |
|
Operation: |
For each type of benchmarked algorithm: |
a - Initialize the data block for the algorithm. |
b - Execute the algorithm N times. |
Operation: |
For each type of benchmarked algorithm: |
a - Initialize the data block for the algorithm. |
b - Execute the algorithm N times. |
|
Returns: |
NULL. |
Returns: |
NULL. |
*/ |
static ee_u16 list_known_crc[] = {(ee_u16)0xd4b0,(ee_u16)0x3340,(ee_u16)0x6a79,(ee_u16)0xe714,(ee_u16)0xe3c1}; |
static ee_u16 matrix_known_crc[] = {(ee_u16)0xbe52,(ee_u16)0x1199,(ee_u16)0x5608,(ee_u16)0x1fd7,(ee_u16)0x0747}; |
static ee_u16 state_known_crc[] = {(ee_u16)0x5e47,(ee_u16)0x39bf,(ee_u16)0xe5a4,(ee_u16)0x8e3a,(ee_u16)0x8d84}; |
void *iterate(void *pres) { |
ee_u32 i; |
ee_u16 crc; |
core_results *res=(core_results *)pres; |
ee_u32 iterations=res->iterations; |
res->crc=0; |
res->crclist=0; |
res->crcmatrix=0; |
res->crcstate=0; |
static ee_u16 list_known_crc[] = { (ee_u16)0xd4b0, |
(ee_u16)0x3340, |
(ee_u16)0x6a79, |
(ee_u16)0xe714, |
(ee_u16)0xe3c1 }; |
static ee_u16 matrix_known_crc[] = { (ee_u16)0xbe52, |
(ee_u16)0x1199, |
(ee_u16)0x5608, |
(ee_u16)0x1fd7, |
(ee_u16)0x0747 }; |
static ee_u16 state_known_crc[] = { (ee_u16)0x5e47, |
(ee_u16)0x39bf, |
(ee_u16)0xe5a4, |
(ee_u16)0x8e3a, |
(ee_u16)0x8d84 }; |
void * |
iterate(void *pres) |
{ |
ee_u32 i; |
ee_u16 crc; |
core_results *res = (core_results *)pres; |
ee_u32 iterations = res->iterations; |
res->crc = 0; |
res->crclist = 0; |
res->crcmatrix = 0; |
res->crcstate = 0; |
|
for (i=0; i<iterations; i++) { |
crc=core_bench_list(res,1); |
res->crc=crcu16(crc,res->crc); |
crc=core_bench_list(res,-1); |
res->crc=crcu16(crc,res->crc); |
if (i==0) res->crclist=res->crc; |
} |
return NULL; |
for (i = 0; i < iterations; i++) |
{ |
crc = core_bench_list(res, 1); |
res->crc = crcu16(crc, res->crc); |
crc = core_bench_list(res, -1); |
res->crc = crcu16(crc, res->crc); |
if (i == 0) |
res->crclist = res->crc; |
} |
return NULL; |
} |
|
#if (SEED_METHOD==SEED_ARG) |
#if (SEED_METHOD == SEED_ARG) |
ee_s32 get_seed_args(int i, int argc, char *argv[]); |
#define get_seed(x) (ee_s16)get_seed_args(x,argc,argv) |
#define get_seed_32(x) get_seed_args(x,argc,argv) |
#define get_seed(x) (ee_s16) get_seed_args(x, argc, argv) |
#define get_seed_32(x) get_seed_args(x, argc, argv) |
#else /* via function or volatile */ |
ee_s32 get_seed_32(int i); |
#define get_seed(x) (ee_s16)get_seed_32(x) |
#define get_seed(x) (ee_s16) get_seed_32(x) |
#endif |
|
#if (MEM_METHOD==MEM_STATIC) |
#if (MEM_METHOD == MEM_STATIC) |
ee_u8 static_memblk[TOTAL_DATA_SIZE]; |
#endif |
char *mem_name[3] = {"Static","Heap","Stack"}; |
char *mem_name[3] = { "Static", "Heap", "Stack" }; |
/* Function: main |
Main entry routine for the benchmark. |
This function is responsible for the following steps: |
Main entry routine for the benchmark. |
This function is responsible for the following steps: |
|
1 - Initialize input seeds from a source that cannot be determined at compile time. |
2 - Initialize memory block for use. |
3 - Run and time the benchmark. |
4 - Report results, testing the validity of the output if the seeds are known. |
1 - Initialize input seeds from a source that cannot be determined at |
compile time. 2 - Initialize memory block for use. 3 - Run and time the |
benchmark. 4 - Report results, testing the validity of the output if the |
seeds are known. |
|
Arguments: |
1 - first seed : Any value |
2 - second seed : Must be identical to first for iterations to be identical |
3 - third seed : Any value, should be at least an order of magnitude less then the input size, but bigger then 32. |
4 - Iterations : Special, if set to 0, iterations will be automatically determined such that the benchmark will run between 10 to 100 secs |
Arguments: |
1 - first seed : Any value |
2 - second seed : Must be identical to first for iterations to be |
identical 3 - third seed : Any value, should be at least an order of |
magnitude less then the input size, but bigger then 32. 4 - Iterations : |
Special, if set to 0, iterations will be automatically determined such that |
the benchmark will run between 10 to 100 secs |
|
*/ |
|
#if MAIN_HAS_NOARGC |
MAIN_RETURN_TYPE main(void) { |
int argc=0; |
char *argv[1]; |
MAIN_RETURN_TYPE |
main(void) |
{ |
int argc = 0; |
char *argv[1]; |
#else |
MAIN_RETURN_TYPE main(int argc, char *argv[]) { |
MAIN_RETURN_TYPE |
main(int argc, char *argv[]) |
{ |
#endif |
|
// ----------------------------------------------- |
// ----------------------------------------------- |
// Disable coremark compilation by default |
#ifndef RUN_COREMARK |
#warning COREMARK HAS NOT BEEN COMPILED! Use >>make USER_FLAGS+=-DRUN_COREMARK clean_all exe<< to compile it. |
|
// inform the user if you are actually executing this |
portable_init(NULL, &argc, argv); |
ee_printf("ERROR! CoreMark has not been compiled. Use >>make USER_FLAGS+=-DRUN_COREMARK clean_all exe<< to compile it.\n"); |
|
return 0; |
ee_u16 i, j = 0, num_algorithms = 0; |
ee_s16 known_id = -1, total_errors = 0; |
ee_u16 seedcrc = 0; |
CORE_TICKS total_time; |
core_results results[MULTITHREAD]; |
#if (MEM_METHOD == MEM_STACK) |
ee_u8 stack_memblock[TOTAL_DATA_SIZE * MULTITHREAD]; |
#endif |
// ----------------------------------------------- |
// ----------------------------------------------- |
|
|
ee_u16 i,j=0,num_algorithms=0; |
ee_s16 known_id=-1,total_errors=0; |
ee_u16 seedcrc=0; |
CORE_TICKS total_time; |
core_results results[MULTITHREAD]; |
#if (MEM_METHOD==MEM_STACK) |
ee_u8 stack_memblock[TOTAL_DATA_SIZE*MULTITHREAD]; |
#endif |
/* first call any initializations needed */ |
portable_init(&(results[0].port), &argc, argv); |
/* First some checks to make sure benchmark will run ok */ |
if (sizeof(struct list_head_s)>128) { |
ee_printf("list_head structure too big for comparable data!\n"); |
return MAIN_RETURN_VAL; |
} |
results[0].seed1=get_seed(1); |
results[0].seed2=get_seed(2); |
results[0].seed3=get_seed(3); |
results[0].iterations=get_seed_32(4); |
/* first call any initializations needed */ |
portable_init(&(results[0].port), &argc, argv); |
/* First some checks to make sure benchmark will run ok */ |
if (sizeof(struct list_head_s) > 128) |
{ |
ee_printf("list_head structure too big for comparable data!\n"); |
return MAIN_RETURN_VAL; |
} |
results[0].seed1 = get_seed(1); |
results[0].seed2 = get_seed(2); |
results[0].seed3 = get_seed(3); |
results[0].iterations = get_seed_32(4); |
#if CORE_DEBUG |
results[0].iterations=1; |
results[0].iterations = 1; |
#endif |
results[0].execs=get_seed_32(5); |
if (results[0].execs==0) { /* if not supplied, execute all algorithms */ |
results[0].execs=ALL_ALGORITHMS_MASK; |
} |
/* put in some default values based on one seed only for easy testing */ |
if ((results[0].seed1==0) && (results[0].seed2==0) && (results[0].seed3==0)) { /* validation run */ |
results[0].seed1=0; |
results[0].seed2=0; |
results[0].seed3=0x66; |
} |
if ((results[0].seed1==1) && (results[0].seed2==0) && (results[0].seed3==0)) { /* perfromance run */ |
results[0].seed1=0x3415; |
results[0].seed2=0x3415; |
results[0].seed3=0x66; |
} |
#if (MEM_METHOD==MEM_STATIC) |
results[0].memblock[0]=(void *)static_memblk; |
results[0].size=TOTAL_DATA_SIZE; |
results[0].err=0; |
#if (MULTITHREAD>1) |
#error "Cannot use a static data area with multiple contexts!" |
#endif |
#elif (MEM_METHOD==MEM_MALLOC) |
for (i=0 ; i<MULTITHREAD; i++) { |
ee_s32 malloc_override=get_seed(7); |
if (malloc_override != 0) |
results[i].size=malloc_override; |
else |
results[i].size=TOTAL_DATA_SIZE; |
results[i].memblock[0]=portable_malloc(results[i].size); |
results[i].seed1=results[0].seed1; |
results[i].seed2=results[0].seed2; |
results[i].seed3=results[0].seed3; |
results[i].err=0; |
results[i].execs=results[0].execs; |
} |
#elif (MEM_METHOD==MEM_STACK) |
for (i=0 ; i<MULTITHREAD; i++) { |
results[i].memblock[0]=stack_memblock+i*TOTAL_DATA_SIZE; |
results[i].size=TOTAL_DATA_SIZE; |
results[i].seed1=results[0].seed1; |
results[i].seed2=results[0].seed2; |
results[i].seed3=results[0].seed3; |
results[i].err=0; |
results[i].execs=results[0].execs; |
} |
results[0].execs = get_seed_32(5); |
if (results[0].execs == 0) |
{ /* if not supplied, execute all algorithms */ |
results[0].execs = ALL_ALGORITHMS_MASK; |
} |
/* put in some default values based on one seed only for easy testing */ |
if ((results[0].seed1 == 0) && (results[0].seed2 == 0) |
&& (results[0].seed3 == 0)) |
{ /* perfromance run */ |
results[0].seed1 = 0; |
results[0].seed2 = 0; |
results[0].seed3 = 0x66; |
} |
if ((results[0].seed1 == 1) && (results[0].seed2 == 0) |
&& (results[0].seed3 == 0)) |
{ /* validation run */ |
results[0].seed1 = 0x3415; |
results[0].seed2 = 0x3415; |
results[0].seed3 = 0x66; |
} |
#if (MEM_METHOD == MEM_STATIC) |
results[0].memblock[0] = (void *)static_memblk; |
results[0].size = TOTAL_DATA_SIZE; |
results[0].err = 0; |
#if (MULTITHREAD > 1) |
#error "Cannot use a static data area with multiple contexts!" |
#endif |
#elif (MEM_METHOD == MEM_MALLOC) |
for (i = 0; i < MULTITHREAD; i++) |
{ |
ee_s32 malloc_override = get_seed(7); |
if (malloc_override != 0) |
results[i].size = malloc_override; |
else |
results[i].size = TOTAL_DATA_SIZE; |
results[i].memblock[0] = portable_malloc(results[i].size); |
results[i].seed1 = results[0].seed1; |
results[i].seed2 = results[0].seed2; |
results[i].seed3 = results[0].seed3; |
results[i].err = 0; |
results[i].execs = results[0].execs; |
} |
#elif (MEM_METHOD == MEM_STACK) |
for (i = 0; i < MULTITHREAD; i++) |
{ |
results[i].memblock[0] = stack_memblock + i * TOTAL_DATA_SIZE; |
results[i].size = TOTAL_DATA_SIZE; |
results[i].seed1 = results[0].seed1; |
results[i].seed2 = results[0].seed2; |
results[i].seed3 = results[0].seed3; |
results[i].err = 0; |
results[i].execs = results[0].execs; |
} |
#else |
#error "Please define a way to initialize a memory block." |
#endif |
/* Data init */ |
/* Find out how space much we have based on number of algorithms */ |
for (i=0; i<NUM_ALGORITHMS; i++) { |
if ((1<<(ee_u32)i) & results[0].execs) |
num_algorithms++; |
} |
for (i=0 ; i<MULTITHREAD; i++) |
results[i].size=results[i].size/num_algorithms; |
/* Assign pointers */ |
for (i=0; i<NUM_ALGORITHMS; i++) { |
ee_u32 ctx; |
if ((1<<(ee_u32)i) & results[0].execs) { |
for (ctx=0 ; ctx<MULTITHREAD; ctx++) |
results[ctx].memblock[i+1]=(char *)(results[ctx].memblock[0])+results[0].size*j; |
j++; |
} |
} |
/* call inits */ |
for (i=0 ; i<MULTITHREAD; i++) { |
if (results[i].execs & ID_LIST) { |
results[i].list=core_list_init(results[0].size,results[i].memblock[1],results[i].seed1); |
} |
if (results[i].execs & ID_MATRIX) { |
core_init_matrix(results[0].size, results[i].memblock[2], (ee_s32)results[i].seed1 | (((ee_s32)results[i].seed2) << 16), &(results[i].mat) ); |
} |
if (results[i].execs & ID_STATE) { |
core_init_state(results[0].size,results[i].seed1,results[i].memblock[3]); |
} |
} |
|
/* automatically determine number of iterations if not set */ |
if (results[0].iterations==0) { |
secs_ret secs_passed=0; |
ee_u32 divisor; |
results[0].iterations=1; |
while (secs_passed < (secs_ret)1) { |
results[0].iterations*=10; |
start_time(); |
iterate(&results[0]); |
stop_time(); |
secs_passed=time_in_secs(get_time()); |
} |
/* now we know it executes for at least 1 sec, set actual run time at about 10 secs */ |
divisor=(ee_u32)secs_passed; |
if (divisor==0) /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */ |
divisor=1; |
results[0].iterations*=1+10/divisor; |
} |
/* perform actual benchmark */ |
start_time(); |
#if (MULTITHREAD>1) |
if (default_num_contexts>MULTITHREAD) { |
default_num_contexts=MULTITHREAD; |
} |
for (i=0 ; i<default_num_contexts; i++) { |
results[i].iterations=results[0].iterations; |
results[i].execs=results[0].execs; |
core_start_parallel(&results[i]); |
} |
for (i=0 ; i<default_num_contexts; i++) { |
core_stop_parallel(&results[i]); |
} |
/* Data init */ |
/* Find out how space much we have based on number of algorithms */ |
for (i = 0; i < NUM_ALGORITHMS; i++) |
{ |
if ((1 << (ee_u32)i) & results[0].execs) |
num_algorithms++; |
} |
for (i = 0; i < MULTITHREAD; i++) |
results[i].size = results[i].size / num_algorithms; |
/* Assign pointers */ |
for (i = 0; i < NUM_ALGORITHMS; i++) |
{ |
ee_u32 ctx; |
if ((1 << (ee_u32)i) & results[0].execs) |
{ |
for (ctx = 0; ctx < MULTITHREAD; ctx++) |
results[ctx].memblock[i + 1] |
= (char *)(results[ctx].memblock[0]) + results[0].size * j; |
j++; |
} |
} |
/* call inits */ |
for (i = 0; i < MULTITHREAD; i++) |
{ |
if (results[i].execs & ID_LIST) |
{ |
results[i].list = core_list_init( |
results[0].size, results[i].memblock[1], results[i].seed1); |
} |
if (results[i].execs & ID_MATRIX) |
{ |
core_init_matrix(results[0].size, |
results[i].memblock[2], |
(ee_s32)results[i].seed1 |
| (((ee_s32)results[i].seed2) << 16), |
&(results[i].mat)); |
} |
if (results[i].execs & ID_STATE) |
{ |
core_init_state( |
results[0].size, results[i].seed1, results[i].memblock[3]); |
} |
} |
|
/* automatically determine number of iterations if not set */ |
if (results[0].iterations == 0) |
{ |
secs_ret secs_passed = 0; |
ee_u32 divisor; |
results[0].iterations = 1; |
while (secs_passed < (secs_ret)1) |
{ |
results[0].iterations *= 10; |
start_time(); |
iterate(&results[0]); |
stop_time(); |
secs_passed = time_in_secs(get_time()); |
} |
/* now we know it executes for at least 1 sec, set actual run time at |
* about 10 secs */ |
divisor = (ee_u32)secs_passed; |
if (divisor == 0) /* some machines cast float to int as 0 since this |
conversion is not defined by ANSI, but we know at |
least one second passed */ |
divisor = 1; |
results[0].iterations *= 1 + 10 / divisor; |
} |
/* perform actual benchmark */ |
start_time(); |
#if (MULTITHREAD > 1) |
if (default_num_contexts > MULTITHREAD) |
{ |
default_num_contexts = MULTITHREAD; |
} |
for (i = 0; i < default_num_contexts; i++) |
{ |
results[i].iterations = results[0].iterations; |
results[i].execs = results[0].execs; |
core_start_parallel(&results[i]); |
} |
for (i = 0; i < default_num_contexts; i++) |
{ |
core_stop_parallel(&results[i]); |
} |
#else |
iterate(&results[0]); |
iterate(&results[0]); |
#endif |
stop_time(); |
total_time=get_time(); |
/* get a function of the input to report */ |
seedcrc=crc16(results[0].seed1,seedcrc); |
seedcrc=crc16(results[0].seed2,seedcrc); |
seedcrc=crc16(results[0].seed3,seedcrc); |
seedcrc=crc16(results[0].size,seedcrc); |
|
switch (seedcrc) { /* test known output for common seeds */ |
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */ |
known_id=0; |
ee_printf("6k performance run parameters for coremark.\n"); |
break; |
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per algorithm */ |
known_id=1; |
ee_printf("6k validation run parameters for coremark.\n"); |
break; |
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm */ |
known_id=2; |
ee_printf("Profile generation run parameters for coremark.\n"); |
break; |
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */ |
known_id=3; |
ee_printf("2K performance run parameters for coremark.\n"); |
break; |
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per algorithm */ |
known_id=4; |
ee_printf("2K validation run parameters for coremark.\n"); |
break; |
default: |
total_errors=-1; |
break; |
} |
if (known_id>=0) { |
for (i=0 ; i<default_num_contexts; i++) { |
results[i].err=0; |
if ((results[i].execs & ID_LIST) && |
(results[i].crclist!=list_known_crc[known_id])) { |
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",i,results[i].crclist,list_known_crc[known_id]); |
results[i].err++; |
} |
if ((results[i].execs & ID_MATRIX) && |
(results[i].crcmatrix!=matrix_known_crc[known_id])) { |
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",i,results[i].crcmatrix,matrix_known_crc[known_id]); |
results[i].err++; |
} |
if ((results[i].execs & ID_STATE) && |
(results[i].crcstate!=state_known_crc[known_id])) { |
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",i,results[i].crcstate,state_known_crc[known_id]); |
results[i].err++; |
} |
total_errors+=results[i].err; |
} |
} |
total_errors+=check_data_types(); |
/* and report results */ |
ee_printf("CoreMark Size : %lu\n", (long unsigned) results[0].size); |
ee_printf("Total ticks : see NEORV32 output below\n", (long unsigned) total_time); /* NEORV32 modified due to overflow in %lu in ee_printf */ |
stop_time(); |
total_time = get_time(); |
/* get a function of the input to report */ |
seedcrc = crc16(results[0].seed1, seedcrc); |
seedcrc = crc16(results[0].seed2, seedcrc); |
seedcrc = crc16(results[0].seed3, seedcrc); |
seedcrc = crc16(results[0].size, seedcrc); |
|
switch (seedcrc) |
{ /* test known output for common seeds */ |
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */ |
known_id = 0; |
ee_printf("6k performance run parameters for coremark.\n"); |
break; |
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per |
algorithm */ |
known_id = 1; |
ee_printf("6k validation run parameters for coremark.\n"); |
break; |
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm |
*/ |
known_id = 2; |
ee_printf("Profile generation run parameters for coremark.\n"); |
break; |
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */ |
known_id = 3; |
ee_printf("2K performance run parameters for coremark.\n"); |
break; |
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per |
algorithm */ |
known_id = 4; |
ee_printf("2K validation run parameters for coremark.\n"); |
break; |
default: |
total_errors = -1; |
break; |
} |
if (known_id >= 0) |
{ |
for (i = 0; i < default_num_contexts; i++) |
{ |
results[i].err = 0; |
if ((results[i].execs & ID_LIST) |
&& (results[i].crclist != list_known_crc[known_id])) |
{ |
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n", |
i, |
results[i].crclist, |
list_known_crc[known_id]); |
results[i].err++; |
} |
if ((results[i].execs & ID_MATRIX) |
&& (results[i].crcmatrix != matrix_known_crc[known_id])) |
{ |
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n", |
i, |
results[i].crcmatrix, |
matrix_known_crc[known_id]); |
results[i].err++; |
} |
if ((results[i].execs & ID_STATE) |
&& (results[i].crcstate != state_known_crc[known_id])) |
{ |
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n", |
i, |
results[i].crcstate, |
state_known_crc[known_id]); |
results[i].err++; |
} |
total_errors += results[i].err; |
} |
} |
total_errors += check_data_types(); |
/* and report results */ |
ee_printf("CoreMark Size : %lu\n", (long unsigned)results[0].size); |
ee_printf("Total ticks : %lu\n", (long unsigned)total_time); |
#if HAS_FLOAT |
ee_printf("Total time (secs): %f\n",time_in_secs(total_time)); |
if (time_in_secs(total_time) > 0) |
ee_printf("Iterations/Sec : %f\n",default_num_contexts*results[0].iterations/time_in_secs(total_time)); |
#else |
ee_printf("Total time (secs): %d\n",time_in_secs(total_time)); |
if (time_in_secs(total_time) > 0) |
ee_printf("Iterations/Sec : %d\n",default_num_contexts*results[0].iterations/time_in_secs(total_time)); |
ee_printf("Total time (secs): %f\n", time_in_secs(total_time)); |
if (time_in_secs(total_time) > 0) |
ee_printf("Iterations/Sec : %f\n", |
default_num_contexts * results[0].iterations |
/ time_in_secs(total_time)); |
#else |
ee_printf("Total time (secs): %d\n", time_in_secs(total_time)); |
if (time_in_secs(total_time) > 0) |
ee_printf("Iterations/Sec : %d\n", |
default_num_contexts * results[0].iterations |
/ time_in_secs(total_time)); |
#endif |
if (time_in_secs(total_time) < 10) { |
ee_printf("ERROR! Must execute for at least 10 secs for a valid result!\n"); |
total_errors++; |
} |
if (time_in_secs(total_time) < 10) |
{ |
ee_printf( |
"ERROR! Must execute for at least 10 secs for a valid result!\n"); |
total_errors++; |
} |
|
ee_printf("Iterations : %lu\n", (long unsigned) default_num_contexts*results[0].iterations); |
ee_printf("Compiler version : %s\n",COMPILER_VERSION); |
ee_printf("Compiler flags : %s\n",COMPILER_FLAGS); |
#if (MULTITHREAD>1) |
ee_printf("Parallel %s : %d\n",PARALLEL_METHOD,default_num_contexts); |
ee_printf("Iterations : %lu\n", |
(long unsigned)default_num_contexts * results[0].iterations); |
ee_printf("Compiler version : %s\n", COMPILER_VERSION); |
ee_printf("Compiler flags : %s\n", COMPILER_FLAGS); |
#if (MULTITHREAD > 1) |
ee_printf("Parallel %s : %d\n", PARALLEL_METHOD, default_num_contexts); |
#endif |
ee_printf("Memory location : %s\n",MEM_LOCATION); |
/* output for verification */ |
ee_printf("seedcrc : 0x%04x\n",seedcrc); |
if (results[0].execs & ID_LIST) |
for (i=0 ; i<default_num_contexts; i++) |
ee_printf("[%d]crclist : 0x%04x\n",i,results[i].crclist); |
if (results[0].execs & ID_MATRIX) |
for (i=0 ; i<default_num_contexts; i++) |
ee_printf("[%d]crcmatrix : 0x%04x\n",i,results[i].crcmatrix); |
if (results[0].execs & ID_STATE) |
for (i=0 ; i<default_num_contexts; i++) |
ee_printf("[%d]crcstate : 0x%04x\n",i,results[i].crcstate); |
for (i=0 ; i<default_num_contexts; i++) |
ee_printf("[%d]crcfinal : 0x%04x\n",i,results[i].crc); |
if (total_errors==0) { |
ee_printf("Correct operation validated. See README.md for run and reporting rules.\n"); |
ee_printf("Memory location : %s\n", MEM_LOCATION); |
/* output for verification */ |
ee_printf("seedcrc : 0x%04x\n", seedcrc); |
if (results[0].execs & ID_LIST) |
for (i = 0; i < default_num_contexts; i++) |
ee_printf("[%d]crclist : 0x%04x\n", i, results[i].crclist); |
if (results[0].execs & ID_MATRIX) |
for (i = 0; i < default_num_contexts; i++) |
ee_printf("[%d]crcmatrix : 0x%04x\n", i, results[i].crcmatrix); |
if (results[0].execs & ID_STATE) |
for (i = 0; i < default_num_contexts; i++) |
ee_printf("[%d]crcstate : 0x%04x\n", i, results[i].crcstate); |
for (i = 0; i < default_num_contexts; i++) |
ee_printf("[%d]crcfinal : 0x%04x\n", i, results[i].crc); |
if (total_errors == 0) |
{ |
ee_printf( |
"Correct operation validated. See README.md for run and reporting " |
"rules.\n"); |
#if HAS_FLOAT |
if (known_id==3) { |
ee_printf("CoreMark 1.0 : %f / %s %s",default_num_contexts*results[0].iterations/time_in_secs(total_time),COMPILER_VERSION,COMPILER_FLAGS); |
if (known_id == 3) |
{ |
ee_printf("CoreMark 1.0 : %f / %s %s", |
default_num_contexts * results[0].iterations |
/ time_in_secs(total_time), |
COMPILER_VERSION, |
COMPILER_FLAGS); |
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC) |
ee_printf(" / %s",MEM_LOCATION); |
ee_printf(" / %s", MEM_LOCATION); |
#else |
ee_printf(" / %s",mem_name[MEM_METHOD]); |
ee_printf(" / %s", mem_name[MEM_METHOD]); |
#endif |
|
#if (MULTITHREAD>1) |
ee_printf(" / %d:%s",default_num_contexts,PARALLEL_METHOD); |
#if (MULTITHREAD > 1) |
ee_printf(" / %d:%s", default_num_contexts, PARALLEL_METHOD); |
#endif |
ee_printf("\n"); |
} |
ee_printf("\n"); |
} |
#endif |
} |
if (total_errors>0) |
ee_printf("Errors detected\n"); |
if (total_errors<0) |
ee_printf("Cannot validate operation for these seed values, please compare with results on a known platform.\n"); |
} |
if (total_errors > 0) |
ee_printf("Errors detected\n"); |
if (total_errors < 0) |
ee_printf( |
"Cannot validate operation for these seed values, please compare " |
"with results on a known platform.\n"); |
|
#if (MEM_METHOD==MEM_MALLOC) |
for (i=0 ; i<MULTITHREAD; i++) |
portable_free(results[i].memblock[0]); |
#if (MEM_METHOD == MEM_MALLOC) |
for (i = 0; i < MULTITHREAD; i++) |
portable_free(results[i].memblock[0]); |
#endif |
/* And last call any target specific code for finalizing */ |
portable_fini(&(results[0].port)); |
/* And last call any target specific code for finalizing */ |
portable_fini(&(results[0].port)); |
|
return MAIN_RETURN_VAL; |
return MAIN_RETURN_VAL; |
} |
|
|
/sw/example/coremark/core_matrix.c
19,290 → 19,341
#include "coremark.h" |
/* |
Topic: Description |
Matrix manipulation benchmark |
|
This very simple algorithm forms the basis of many more complex algorithms. |
|
The tight inner loop is the focus of many optimizations (compiler as well as hardware based) |
and is thus relevant for embedded processing. |
|
The total available data space will be divided to 3 parts: |
NxN Matrix A - initialized with small values (upper 3/4 of the bits all zero). |
NxN Matrix B - initialized with medium values (upper half of the bits all zero). |
NxN Matrix C - used for the result. |
Matrix manipulation benchmark |
|
The actual values for A and B must be derived based on input that is not available at compile time. |
This very simple algorithm forms the basis of many more complex |
algorithms. |
|
The tight inner loop is the focus of many optimizations (compiler as |
well as hardware based) and is thus relevant for embedded processing. |
|
The total available data space will be divided to 3 parts: |
NxN Matrix A - initialized with small values (upper 3/4 of the bits all |
zero). NxN Matrix B - initialized with medium values (upper half of the bits all |
zero). NxN Matrix C - used for the result. |
|
The actual values for A and B must be derived based on input that is not |
available at compile time. |
*/ |
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val); |
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval); |
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val); |
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); |
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); |
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); |
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val); |
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val); |
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); |
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); |
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); |
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val); |
|
#define matrix_test_next(x) (x+1) |
#define matrix_clip(x,y) ((y) ? (x) & 0x0ff : (x) & 0x0ffff) |
#define matrix_big(x) (0xf000 | (x)) |
#define bit_extract(x,from,to) (((x)>>(from)) & (~(0xffffffff << (to)))) |
#define matrix_test_next(x) (x + 1) |
#define matrix_clip(x, y) ((y) ? (x)&0x0ff : (x)&0x0ffff) |
#define matrix_big(x) (0xf000 | (x)) |
#define bit_extract(x, from, to) (((x) >> (from)) & (~(0xffffffff << (to)))) |
|
#if CORE_DEBUG |
void printmat(MATDAT *A, ee_u32 N, char *name) { |
ee_u32 i,j; |
ee_printf("Matrix %s [%dx%d]:\n",name,N,N); |
for (i=0; i<N; i++) { |
for (j=0; j<N; j++) { |
if (j!=0) |
ee_printf(","); |
ee_printf("%d",A[i*N+j]); |
} |
ee_printf("\n"); |
} |
void |
printmat(MATDAT *A, ee_u32 N, char *name) |
{ |
ee_u32 i, j; |
ee_printf("Matrix %s [%dx%d]:\n", name, N, N); |
for (i = 0; i < N; i++) |
{ |
for (j = 0; j < N; j++) |
{ |
if (j != 0) |
ee_printf(","); |
ee_printf("%d", A[i * N + j]); |
} |
ee_printf("\n"); |
} |
} |
void printmatC(MATRES *C, ee_u32 N, char *name) { |
ee_u32 i,j; |
ee_printf("Matrix %s [%dx%d]:\n",name,N,N); |
for (i=0; i<N; i++) { |
for (j=0; j<N; j++) { |
if (j!=0) |
ee_printf(","); |
ee_printf("%d",C[i*N+j]); |
} |
ee_printf("\n"); |
} |
void |
printmatC(MATRES *C, ee_u32 N, char *name) |
{ |
ee_u32 i, j; |
ee_printf("Matrix %s [%dx%d]:\n", name, N, N); |
for (i = 0; i < N; i++) |
{ |
for (j = 0; j < N; j++) |
{ |
if (j != 0) |
ee_printf(","); |
ee_printf("%d", C[i * N + j]); |
} |
ee_printf("\n"); |
} |
} |
#endif |
/* Function: core_bench_matrix |
Benchmark function |
Benchmark function |
|
Iterate <matrix_test> N times, |
changing the matrix values slightly by a constant amount each time. |
Iterate <matrix_test> N times, |
changing the matrix values slightly by a constant amount each time. |
*/ |
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc) { |
ee_u32 N=p->N; |
MATRES *C=p->C; |
MATDAT *A=p->A; |
MATDAT *B=p->B; |
MATDAT val=(MATDAT)seed; |
ee_u16 |
core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc) |
{ |
ee_u32 N = p->N; |
MATRES *C = p->C; |
MATDAT *A = p->A; |
MATDAT *B = p->B; |
MATDAT val = (MATDAT)seed; |
|
crc=crc16(matrix_test(N,C,A,B,val),crc); |
crc = crc16(matrix_test(N, C, A, B, val), crc); |
|
return crc; |
return crc; |
} |
|
/* Function: matrix_test |
Perform matrix manipulation. |
Perform matrix manipulation. |
|
Parameters: |
N - Dimensions of the matrix. |
C - memory for result matrix. |
A - input matrix |
B - operator matrix (not changed during operations) |
Parameters: |
N - Dimensions of the matrix. |
C - memory for result matrix. |
A - input matrix |
B - operator matrix (not changed during operations) |
|
Returns: |
A CRC value that captures all results calculated in the function. |
In particular, crc of the value calculated on the result matrix |
after each step by <matrix_sum>. |
Returns: |
A CRC value that captures all results calculated in the function. |
In particular, crc of the value calculated on the result matrix |
after each step by <matrix_sum>. |
|
Operation: |
|
1 - Add a constant value to all elements of a matrix. |
2 - Multiply a matrix by a constant. |
3 - Multiply a matrix by a vector. |
4 - Multiply a matrix by a matrix. |
5 - Add a constant value to all elements of a matrix. |
Operation: |
|
After the last step, matrix A is back to original contents. |
1 - Add a constant value to all elements of a matrix. |
2 - Multiply a matrix by a constant. |
3 - Multiply a matrix by a vector. |
4 - Multiply a matrix by a matrix. |
5 - Add a constant value to all elements of a matrix. |
|
After the last step, matrix A is back to original contents. |
*/ |
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val) { |
ee_u16 crc=0; |
MATDAT clipval=matrix_big(val); |
ee_s16 |
matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val) |
{ |
ee_u16 crc = 0; |
MATDAT clipval = matrix_big(val); |
|
matrix_add_const(N,A,val); /* make sure data changes */ |
matrix_add_const(N, A, val); /* make sure data changes */ |
#if CORE_DEBUG |
printmat(A,N,"matrix_add_const"); |
printmat(A, N, "matrix_add_const"); |
#endif |
matrix_mul_const(N,C,A,val); |
crc=crc16(matrix_sum(N,C,clipval),crc); |
matrix_mul_const(N, C, A, val); |
crc = crc16(matrix_sum(N, C, clipval), crc); |
#if CORE_DEBUG |
printmatC(C,N,"matrix_mul_const"); |
printmatC(C, N, "matrix_mul_const"); |
#endif |
matrix_mul_vect(N,C,A,B); |
crc=crc16(matrix_sum(N,C,clipval),crc); |
matrix_mul_vect(N, C, A, B); |
crc = crc16(matrix_sum(N, C, clipval), crc); |
#if CORE_DEBUG |
printmatC(C,N,"matrix_mul_vect"); |
printmatC(C, N, "matrix_mul_vect"); |
#endif |
matrix_mul_matrix(N,C,A,B); |
crc=crc16(matrix_sum(N,C,clipval),crc); |
matrix_mul_matrix(N, C, A, B); |
crc = crc16(matrix_sum(N, C, clipval), crc); |
#if CORE_DEBUG |
printmatC(C,N,"matrix_mul_matrix"); |
printmatC(C, N, "matrix_mul_matrix"); |
#endif |
matrix_mul_matrix_bitextract(N,C,A,B); |
crc=crc16(matrix_sum(N,C,clipval),crc); |
matrix_mul_matrix_bitextract(N, C, A, B); |
crc = crc16(matrix_sum(N, C, clipval), crc); |
#if CORE_DEBUG |
printmatC(C,N,"matrix_mul_matrix_bitextract"); |
printmatC(C, N, "matrix_mul_matrix_bitextract"); |
#endif |
|
matrix_add_const(N,A,-val); /* return matrix to initial value */ |
return crc; |
|
matrix_add_const(N, A, -val); /* return matrix to initial value */ |
return crc; |
} |
|
/* Function : matrix_init |
Initialize the memory block for matrix benchmarking. |
Initialize the memory block for matrix benchmarking. |
|
Parameters: |
blksize - Size of memory to be initialized. |
memblk - Pointer to memory block. |
seed - Actual values chosen depend on the seed parameter. |
p - pointers to <mat_params> containing initialized matrixes. |
Parameters: |
blksize - Size of memory to be initialized. |
memblk - Pointer to memory block. |
seed - Actual values chosen depend on the seed parameter. |
p - pointers to <mat_params> containing initialized matrixes. |
|
Returns: |
Matrix dimensions. |
|
Note: |
The seed parameter MUST be supplied from a source that cannot be determined at compile time |
Returns: |
Matrix dimensions. |
|
Note: |
The seed parameter MUST be supplied from a source that cannot be |
determined at compile time |
*/ |
ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p) { |
ee_u32 N=0; |
MATDAT *A; |
MATDAT *B; |
ee_s32 order=1; |
MATDAT val; |
ee_u32 i=0,j=0; |
if (seed==0) |
seed=1; |
while (j<blksize) { |
i++; |
j=i*i*2*4; |
} |
N=i-1; |
A=(MATDAT *)align_mem(memblk); |
B=A+N*N; |
ee_u32 |
core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p) |
{ |
ee_u32 N = 0; |
MATDAT *A; |
MATDAT *B; |
ee_s32 order = 1; |
MATDAT val; |
ee_u32 i = 0, j = 0; |
if (seed == 0) |
seed = 1; |
while (j < blksize) |
{ |
i++; |
j = i * i * 2 * 4; |
} |
N = i - 1; |
A = (MATDAT *)align_mem(memblk); |
B = A + N * N; |
|
for (i=0; i<N; i++) { |
for (j=0; j<N; j++) { |
seed = ( ( order * seed ) % 65536 ); |
val = (seed + order); |
val=matrix_clip(val,0); |
B[i*N+j] = val; |
val = (val + order); |
val=matrix_clip(val,1); |
A[i*N+j] = val; |
order++; |
} |
} |
for (i = 0; i < N; i++) |
{ |
for (j = 0; j < N; j++) |
{ |
seed = ((order * seed) % 65536); |
val = (seed + order); |
val = matrix_clip(val, 0); |
B[i * N + j] = val; |
val = (val + order); |
val = matrix_clip(val, 1); |
A[i * N + j] = val; |
order++; |
} |
} |
|
p->A=A; |
p->B=B; |
p->C=(MATRES *)align_mem(B+N*N); |
p->N=N; |
p->A = A; |
p->B = B; |
p->C = (MATRES *)align_mem(B + N * N); |
p->N = N; |
#if CORE_DEBUG |
printmat(A,N,"A"); |
printmat(B,N,"B"); |
printmat(A, N, "A"); |
printmat(B, N, "B"); |
#endif |
return N; |
return N; |
} |
|
/* Function: matrix_sum |
Calculate a function that depends on the values of elements in the matrix. |
Calculate a function that depends on the values of elements in the |
matrix. |
|
For each element, accumulate into a temporary variable. |
|
As long as this value is under the parameter clipval, |
add 1 to the result if the element is bigger then the previous. |
|
Otherwise, reset the accumulator and add 10 to the result. |
For each element, accumulate into a temporary variable. |
|
As long as this value is under the parameter clipval, |
add 1 to the result if the element is bigger then the previous. |
|
Otherwise, reset the accumulator and add 10 to the result. |
*/ |
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval) { |
MATRES tmp=0,prev=0,cur=0; |
ee_s16 ret=0; |
ee_u32 i,j; |
for (i=0; i<N; i++) { |
for (j=0; j<N; j++) { |
cur=C[i*N+j]; |
tmp+=cur; |
if (tmp>clipval) { |
ret+=10; |
tmp=0; |
} else { |
ret += (cur>prev) ? 1 : 0; |
} |
prev=cur; |
} |
} |
return ret; |
ee_s16 |
matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval) |
{ |
MATRES tmp = 0, prev = 0, cur = 0; |
ee_s16 ret = 0; |
ee_u32 i, j; |
for (i = 0; i < N; i++) |
{ |
for (j = 0; j < N; j++) |
{ |
cur = C[i * N + j]; |
tmp += cur; |
if (tmp > clipval) |
{ |
ret += 10; |
tmp = 0; |
} |
else |
{ |
ret += (cur > prev) ? 1 : 0; |
} |
prev = cur; |
} |
} |
return ret; |
} |
|
/* Function: matrix_mul_const |
Multiply a matrix by a constant. |
This could be used as a scaler for instance. |
Multiply a matrix by a constant. |
This could be used as a scaler for instance. |
*/ |
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val) { |
ee_u32 i,j; |
for (i=0; i<N; i++) { |
for (j=0; j<N; j++) { |
C[i*N+j]=(MATRES)A[i*N+j] * (MATRES)val; |
} |
} |
void |
matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val) |
{ |
ee_u32 i, j; |
for (i = 0; i < N; i++) |
{ |
for (j = 0; j < N; j++) |
{ |
C[i * N + j] = (MATRES)A[i * N + j] * (MATRES)val; |
} |
} |
} |
|
/* Function: matrix_add_const |
Add a constant value to all elements of a matrix. |
Add a constant value to all elements of a matrix. |
*/ |
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val) { |
ee_u32 i,j; |
for (i=0; i<N; i++) { |
for (j=0; j<N; j++) { |
A[i*N+j] += val; |
} |
} |
void |
matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val) |
{ |
ee_u32 i, j; |
for (i = 0; i < N; i++) |
{ |
for (j = 0; j < N; j++) |
{ |
A[i * N + j] += val; |
} |
} |
} |
|
/* Function: matrix_mul_vect |
Multiply a matrix by a vector. |
This is common in many simple filters (e.g. fir where a vector of coefficients is applied to the matrix.) |
Multiply a matrix by a vector. |
This is common in many simple filters (e.g. fir where a vector of |
coefficients is applied to the matrix.) |
*/ |
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) { |
ee_u32 i,j; |
for (i=0; i<N; i++) { |
C[i]=0; |
for (j=0; j<N; j++) { |
C[i]+=(MATRES)A[i*N+j] * (MATRES)B[j]; |
} |
} |
void |
matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) |
{ |
ee_u32 i, j; |
for (i = 0; i < N; i++) |
{ |
C[i] = 0; |
for (j = 0; j < N; j++) |
{ |
C[i] += (MATRES)A[i * N + j] * (MATRES)B[j]; |
} |
} |
} |
|
/* Function: matrix_mul_matrix |
Multiply a matrix by a matrix. |
Basic code is used in many algorithms, mostly with minor changes such as scaling. |
Multiply a matrix by a matrix. |
Basic code is used in many algorithms, mostly with minor changes such as |
scaling. |
*/ |
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) { |
ee_u32 i,j,k; |
for (i=0; i<N; i++) { |
for (j=0; j<N; j++) { |
C[i*N+j]=0; |
for(k=0;k<N;k++) |
{ |
C[i*N+j]+=(MATRES)A[i*N+k] * (MATRES)B[k*N+j]; |
} |
} |
} |
void |
matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) |
{ |
ee_u32 i, j, k; |
for (i = 0; i < N; i++) |
{ |
for (j = 0; j < N; j++) |
{ |
C[i * N + j] = 0; |
for (k = 0; k < N; k++) |
{ |
C[i * N + j] += (MATRES)A[i * N + k] * (MATRES)B[k * N + j]; |
} |
} |
} |
} |
|
/* Function: matrix_mul_matrix_bitextract |
Multiply a matrix by a matrix, and extract some bits from the result. |
Basic code is used in many algorithms, mostly with minor changes such as scaling. |
Multiply a matrix by a matrix, and extract some bits from the result. |
Basic code is used in many algorithms, mostly with minor changes such as |
scaling. |
*/ |
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) { |
ee_u32 i,j,k; |
for (i=0; i<N; i++) { |
for (j=0; j<N; j++) { |
C[i*N+j]=0; |
for(k=0;k<N;k++) |
{ |
MATRES tmp=(MATRES)A[i*N+k] * (MATRES)B[k*N+j]; |
C[i*N+j]+=bit_extract(tmp,2,4)*bit_extract(tmp,5,7); |
} |
} |
} |
void |
matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) |
{ |
ee_u32 i, j, k; |
for (i = 0; i < N; i++) |
{ |
for (j = 0; j < N; j++) |
{ |
C[i * N + j] = 0; |
for (k = 0; k < N; k++) |
{ |
MATRES tmp = (MATRES)A[i * N + k] * (MATRES)B[k * N + j]; |
C[i * N + j] += bit_extract(tmp, 2, 4) * bit_extract(tmp, 5, 7); |
} |
} |
} |
} |
/sw/example/coremark/core_portme.c
14,139 → 14,184
limitations under the License. |
|
Original Author: Shay Gal-on |
|
Modified for NEORV32 by Stephan Nolting |
*/ |
|
#include <stdio.h> |
#include <stdlib.h> |
/* Modified for the NEORV32 Processor - by Stephan Nolting */ |
|
#include "coremark.h" |
#include "core_portme.h" |
|
#if VALIDATION_RUN |
volatile ee_s32 seed1_volatile=0x3415; |
volatile ee_s32 seed2_volatile=0x3415; |
volatile ee_s32 seed3_volatile=0x66; |
volatile ee_s32 seed1_volatile = 0x3415; |
volatile ee_s32 seed2_volatile = 0x3415; |
volatile ee_s32 seed3_volatile = 0x66; |
#endif |
#if PERFORMANCE_RUN |
volatile ee_s32 seed1_volatile=0x0; |
volatile ee_s32 seed2_volatile=0x0; |
volatile ee_s32 seed3_volatile=0x66; |
volatile ee_s32 seed1_volatile = 0x0; |
volatile ee_s32 seed2_volatile = 0x0; |
volatile ee_s32 seed3_volatile = 0x66; |
#endif |
#if PROFILE_RUN |
volatile ee_s32 seed1_volatile=0x8; |
volatile ee_s32 seed2_volatile=0x8; |
volatile ee_s32 seed3_volatile=0x8; |
volatile ee_s32 seed1_volatile = 0x8; |
volatile ee_s32 seed2_volatile = 0x8; |
volatile ee_s32 seed3_volatile = 0x8; |
#endif |
volatile ee_s32 seed4_volatile=ITERATIONS; |
volatile ee_s32 seed5_volatile=0; |
volatile ee_s32 seed4_volatile = ITERATIONS; |
volatile ee_s32 seed5_volatile = 0; |
/* Porting : Timing functions |
How to capture time and convert to seconds must be ported to whatever is supported by the platform. |
e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc. |
Sample implementation for standard time.h and windows.h definitions included. |
How to capture time and convert to seconds must be ported to whatever is |
supported by the platform. e.g. Read value from on board RTC, read value from |
cpu clock cycles performance counter etc. Sample implementation for standard |
time.h and windows.h definitions included. |
*/ |
CORETIMETYPE |
barebones_clock() |
{ |
/* |
#error \ |
"You must implement a method to measure time in barebones_clock()! This function should return current time.\n" |
*/ |
return 0; |
} |
/* Define : TIMER_RES_DIVIDER |
Divider to trade off timer resolution and total time that can be measured. |
Divider to trade off timer resolution and total time that can be |
measured. |
|
Use lower values to increase resolution, but make sure that overflow does not occur. |
If there are issues with the return value overflowing, increase this value. |
*/ |
#define NSECS_PER_SEC 20000000 |
#define CORETIMETYPE clock_t |
#define GETMYTIME(_t) (*_t=clock()) |
#define MYTIMEDIFF(fin,ini) ((fin)-(ini)) |
#define TIMER_RES_DIVIDER 1 |
Use lower values to increase resolution, but make sure that overflow |
does not occur. If there are issues with the return value overflowing, |
increase this value. |
*/ |
#define GETMYTIME(_t) (*_t = (CORETIMETYPE)neorv32_cpu_get_cycle()) |
#define MYTIMEDIFF(fin, ini) ((fin) - (ini)) |
#define TIMER_RES_DIVIDER 1 |
#define SAMPLE_TIME_IMPLEMENTATION 1 |
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) |
#define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER) |
|
CORE_TICKS elapsed_cycles; // NEORV32 specific |
|
/** Define Host specific (POSIX), or target specific global time variables. */ |
//static CORETIMETYPE start_time_val, stop_time_val; |
static CORETIMETYPE start_time_val, stop_time_val; |
|
/* Function : start_time |
This function will be called right before starting the timed portion of the benchmark. |
This function will be called right before starting the timed portion of |
the benchmark. |
|
Implementation may be capturing a system timer (as implemented in the example code) |
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. |
Implementation may be capturing a system timer (as implemented in the |
example code) or zeroing some system parameters - e.g. setting the cpu clocks |
cycles to 0. |
*/ |
void start_time(void) { |
elapsed_cycles = 0; // this is time zero |
neorv32_cpu_set_mcycle(0); |
neorv32_cpu_set_minstret(0); |
//GETMYTIME(&start_time_val ); |
void |
start_time(void) |
{ |
GETMYTIME(&start_time_val); |
} |
/* Function : stop_time |
This function will be called right after ending the timed portion of the benchmark. |
This function will be called right after ending the timed portion of the |
benchmark. |
|
Implementation may be capturing a system timer (as implemented in the example code) |
or other system parameters - e.g. reading the current value of cpu cycles counter. |
Implementation may be capturing a system timer (as implemented in the |
example code) or other system parameters - e.g. reading the current value of |
cpu cycles counter. |
*/ |
void stop_time(void) { |
//GETMYTIME(&stop_time_val ); |
void |
stop_time(void) |
{ |
GETMYTIME(&stop_time_val); |
} |
/* Function : get_time |
Return an abstract "ticks" number that signifies time on the system. |
|
Actual value returned may be cpu cycles, milliseconds or any other value, |
as long as it can be converted to seconds by <time_in_secs>. |
This methodology is taken to accomodate any hardware or simulated platform. |
The sample implementation returns millisecs by default, |
and the resolution is controlled by <TIMER_RES_DIVIDER> |
Return an abstract "ticks" number that signifies time on the system. |
|
Actual value returned may be cpu cycles, milliseconds or any other |
value, as long as it can be converted to seconds by <time_in_secs>. This |
methodology is taken to accomodate any hardware or simulated platform. The |
sample implementation returns millisecs by default, and the resolution is |
controlled by <TIMER_RES_DIVIDER> |
*/ |
CORE_TICKS get_time(void) { |
CORE_TICKS elapsed = ((CORE_TICKS)neorv32_cpu_get_cycle()) - elapsed_cycles; |
elapsed_cycles = elapsed; |
//CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); |
return elapsed; |
CORE_TICKS |
get_time(void) |
{ |
CORE_TICKS elapsed |
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); |
return elapsed; |
} |
/* Function : time_in_secs |
Convert the value returned by get_time to seconds. |
Convert the value returned by get_time to seconds. |
|
The <secs_ret> type is used to accomodate systems with no support for floating point. |
Default implementation implemented by the EE_TICKS_PER_SEC macro above. |
The <secs_ret> type is used to accomodate systems with no support for |
floating point. Default implementation implemented by the EE_TICKS_PER_SEC |
macro above. |
*/ |
secs_ret time_in_secs(CORE_TICKS ticks) { |
//secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; |
secs_ret retval=(secs_ret)(ticks / SYSINFO_CLK); |
return retval; |
secs_ret |
time_in_secs(CORE_TICKS ticks) |
{ |
/* NEORV32-specific */ |
secs_ret retval = ((secs_ret)ticks) / (secs_ret)SYSINFO_CLK; |
return retval; |
} |
|
ee_u32 default_num_contexts=1; |
ee_u32 default_num_contexts = 1; |
|
/* Function : portable_init |
Target specific initialization code |
Test for some common mistakes. |
Target specific initialization code |
Test for some common mistakes. |
*/ |
void portable_init(core_portable *p, int *argc, char *argv[]) |
#ifndef RUN_COREMARK |
void |
__attribute__((__noreturn__)) |
portable_init(core_portable *p, int *argc, char *argv[]) |
#else |
void |
portable_init(core_portable *p, int *argc, char *argv[]) |
#endif |
{ |
// no interrupts, thanks |
neorv32_cpu_dint(); |
/* NEORV32-specific */ |
neorv32_cpu_dint(); // no interrupt, thanks |
neorv32_rte_setup(); // capture all exceptions and give debug information |
neorv32_uart_setup(BAUD_RATE, 0, 0); // setup UART |
|
// capture all exceptions and give debug information |
neorv32_rte_setup(); |
|
// setup neorv32 UART |
neorv32_uart_setup(BAUD_RATE, 0, 0); |
// Disable coremark compilation by default |
#ifndef RUN_COREMARK |
#warning COREMARK HAS NOT BEEN COMPILED! Use >>make USER_FLAGS+=-DRUN_COREMARK clean_all exe<< to compile it. |
|
// inform the user if you are actually executing this |
neorv32_uart_printf("ERROR! CoreMark has not been compiled. Use >>make USER_FLAGS+=-DRUN_COREMARK clean_all exe<< to compile it.\n"); |
|
while(1); |
#endif |
|
|
neorv32_uart_printf("NEORV32: Processor running at %u Hz\n", (uint32_t)SYSINFO_CLK); |
neorv32_uart_printf("NEORV32: Executing coremark (%u iterations). This may take some time...\n\n", (uint32_t)ITERATIONS); |
|
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { |
ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); |
} |
if (sizeof(ee_u32) != 4) { |
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); |
} |
p->portable_id=1; |
/* |
#error \ |
"Call board initialization routines in portable init (if needed), in particular initialize UART!\n" |
*/ |
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) |
{ |
ee_printf( |
"ERROR! Please define ee_ptr_int to a type that holds a " |
"pointer!\n"); |
} |
if (sizeof(ee_u32) != 4) |
{ |
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); |
} |
p->portable_id = 1; |
|
#ifndef RUN_COREMARK |
while(1); |
#endif |
} |
/* Function : portable_fini |
Target specific final code |
Target specific final code |
*/ |
void portable_fini(core_portable *p) |
void |
portable_fini(core_portable *p) |
{ |
p->portable_id=0; |
p->portable_id = 0; |
|
/* NORV§"-specific */ |
|
// show executed instructions, required cycles and resulting average CPI |
union { |
uint64_t uint64; |
153,7 → 198,7
uint32_t uint32[sizeof(uint64_t)/2]; |
} exe_instructions, exe_time; |
|
exe_time.uint64 = (uint64_t)elapsed_cycles; |
exe_time.uint64 = (uint64_t)get_time(); |
exe_instructions.uint64 = neorv32_cpu_get_instret(); |
|
neorv32_uart_printf("\nNEORV32: All reported numbers only show the integer results.\n\n"); |
161,6 → 206,7
neorv32_uart_printf("NEORV32: Executed instructions 0x%x_%x\n", (uint32_t)exe_instructions.uint32[1], (uint32_t)exe_instructions.uint32[0]); |
neorv32_uart_printf("NEORV32: CoreMark core clock cycles 0x%x_%x\n", (uint32_t)exe_time.uint32[1], (uint32_t)exe_time.uint32[0]); |
|
uint64_t average_cpi = exe_time.uint64 / exe_instructions.uint64; |
neorv32_uart_printf("NEORV32: Average CPI (integer part only): %u cycles/instruction\n", (uint32_t)average_cpi); |
uint64_t average_cpi_int = exe_time.uint64 / exe_instructions.uint64; |
neorv32_uart_printf("NEORV32: Average CPI (integer part only): %u cycles/instruction\n", (uint32_t)average_cpi_int); |
|
} |
/sw/example/coremark/core_portme.h
14,117 → 14,120
limitations under the License. |
|
Original Author: Shay Gal-on |
|
Modified for NEORV32 by Stephan Nolting |
*/ |
|
/* Modified for the NEORV32 Processor - by Stephan Nolting */ |
|
/* Topic : Description |
This file contains configuration constants required to execute on different platforms |
This file contains configuration constants required to execute on |
different platforms |
*/ |
#ifndef CORE_PORTME_H |
#define CORE_PORTME_H |
|
|
// NEORV32 libraries |
#include <stdint.h> |
#include <stdlib.h> |
#include <neorv32.h> |
|
// Manual NEORV32 config: |
#define BAUD_RATE (19200) |
#define ITERATIONS (2000) |
#define FLAGS_STR "-> see makefile" // compiler optimization |
/************************/ |
/* NEORV32-specific */ |
/************************/ |
#define BAUD_RATE (19200) |
#define ITERATIONS (2000) |
#define FLAGS_STR "-> default, see makefile" // compiler optimization |
|
// For debugging |
#define xstr(a) str(a) |
#define str(a) #a |
|
|
/************************/ |
/* Data types and settings */ |
/************************/ |
/* Configuration : HAS_FLOAT |
Define to 1 if the platform supports floating point. |
/* Configuration : HAS_FLOAT |
Define to 1 if the platform supports floating point. |
*/ |
#ifndef HAS_FLOAT |
#ifndef HAS_FLOAT |
#define HAS_FLOAT 0 |
#endif |
/* Configuration : HAS_TIME_H |
Define to 1 if platform has the time.h header file, |
and implementation of functions thereof. |
Define to 1 if platform has the time.h header file, |
and implementation of functions thereof. |
*/ |
#ifndef HAS_TIME_H |
#define HAS_TIME_H 0 |
#endif |
/* Configuration : USE_CLOCK |
Define to 1 if platform has the time.h header file, |
and implementation of functions thereof. |
Define to 1 if platform has the time.h header file, |
and implementation of functions thereof. |
*/ |
#ifndef USE_CLOCK |
#define USE_CLOCK 0 |
#endif |
/* Configuration : HAS_STDIO |
Define to 1 if the platform has stdio.h. |
Define to 1 if the platform has stdio.h. |
*/ |
#ifndef HAS_STDIO |
#define HAS_STDIO 0 |
#endif |
/* Configuration : HAS_PRINTF |
Define to 1 if the platform has stdio.h and implements the printf function. |
Define to 1 if the platform has stdio.h and implements the printf |
function. |
*/ |
#ifndef HAS_PRINTF |
#define HAS_PRINTF 0 |
#endif |
|
/* Configuration : CORE_TICKS |
Define type of return from the timing functions. |
*/ |
#include <time.h> |
//typedef clock_t CORE_TICKS; |
typedef uint64_t CORE_TICKS; |
|
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION |
Initialize these strings per platform |
Initialize these strings per platform |
*/ |
#ifndef COMPILER_VERSION |
#ifdef __GNUC__ |
#define COMPILER_VERSION "GCC"__VERSION__ |
#else |
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" |
#endif |
#ifndef COMPILER_VERSION |
#ifdef __GNUC__ |
#define COMPILER_VERSION "GCC"__VERSION__ |
#else |
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" |
#endif |
#ifndef COMPILER_FLAGS |
#define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ |
#endif |
#ifndef MEM_LOCATION |
#define MEM_LOCATION "STACK" |
#ifndef COMPILER_FLAGS |
#define COMPILER_FLAGS \ |
FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ |
#endif |
#ifndef MEM_LOCATION |
#define MEM_LOCATION "STACK" |
#endif |
|
/* Data Types : |
To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in <core_portme.h>. |
|
*Imprtant* : |
ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! |
To avoid compiler issues, define the data types that need ot be used for |
8b, 16b and 32b in <core_portme.h>. |
|
*Imprtant* : |
ee_ptr_int needs to be the data type used to hold pointers, otherwise |
coremark may fail!!! |
*/ |
typedef signed short ee_s16; |
typedef signed short ee_s16; |
typedef unsigned short ee_u16; |
typedef signed int ee_s32; |
typedef double ee_f32; |
typedef unsigned char ee_u8; |
typedef unsigned int ee_u32; |
typedef ee_u32 ee_ptr_int; |
typedef size_t ee_size_t; |
typedef signed int ee_s32; |
typedef double ee_f32; |
typedef unsigned char ee_u8; |
typedef unsigned int ee_u32; |
typedef unsigned long ee_u64; |
typedef ee_u32 ee_ptr_int; |
typedef size_t ee_size_t; |
#define NULL ((void *)0) |
/* align_mem : |
This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks. |
This macro is used to align an offset to point to a 32b value. It is |
used in the Matrix algorithm to initialize the input memory blocks. |
*/ |
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x) - 1) & ~3)) |
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3)) |
|
/* Configuration : CORE_TICKS |
Define type of return from the timing functions. |
*/ |
#define CORETIMETYPE ee_u64 |
typedef ee_u64 CORE_TICKS; |
|
/* Configuration : SEED_METHOD |
Defines method to get seed values that cannot be computed at compile time. |
|
Valid values : |
SEED_ARG - from command line. |
SEED_FUNC - from a system function. |
SEED_VOLATILE - from volatile variables. |
Defines method to get seed values that cannot be computed at compile |
time. |
|
Valid values : |
SEED_ARG - from command line. |
SEED_FUNC - from a system function. |
SEED_VOLATILE - from volatile variables. |
*/ |
#ifndef SEED_METHOD |
#define SEED_METHOD SEED_VOLATILE |
131,12 → 134,12
#endif |
|
/* Configuration : MEM_METHOD |
Defines method to get a block of memry. |
|
Valid values : |
MEM_MALLOC - for platforms that implement malloc and have malloc.h. |
MEM_STATIC - to use a static memory array. |
MEM_STACK - to allocate the data block on the stack (NYI). |
Defines method to get a block of memry. |
|
Valid values : |
MEM_MALLOC - for platforms that implement malloc and have malloc.h. |
MEM_STATIC - to use a static memory array. |
MEM_STACK - to allocate the data block on the stack (NYI). |
*/ |
#ifndef MEM_METHOD |
#define MEM_METHOD MEM_STACK |
143,47 → 146,50
#endif |
|
/* Configuration : MULTITHREAD |
Define for parallel execution |
|
Valid values : |
1 - only one context (default). |
N>1 - will execute N copies in parallel. |
|
Note : |
If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. |
|
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK> to enable them. |
|
It is valid to have a different implementation of <core_start_parallel> and <core_end_parallel> in <core_portme.c>, |
to fit a particular architecture. |
Define for parallel execution |
|
Valid values : |
1 - only one context (default). |
N>1 - will execute N copies in parallel. |
|
Note : |
If this flag is defined to more then 1, an implementation for launching |
parallel contexts must be defined. |
|
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK> |
to enable them. |
|
It is valid to have a different implementation of <core_start_parallel> |
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture. |
*/ |
#ifndef MULTITHREAD |
#define MULTITHREAD 1 |
#define USE_PTHREAD 0 |
#define USE_FORK 0 |
#define USE_SOCKET 0 |
#define USE_FORK 0 |
#define USE_SOCKET 0 |
#endif |
|
/* Configuration : MAIN_HAS_NOARGC |
Needed if platform does not support getting arguments to main. |
|
Valid values : |
0 - argc/argv to main is supported |
1 - argc/argv to main is not supported |
|
Note : |
This flag only matters if MULTITHREAD has been defined to a value greater then 1. |
Needed if platform does not support getting arguments to main. |
|
Valid values : |
0 - argc/argv to main is supported |
1 - argc/argv to main is not supported |
|
Note : |
This flag only matters if MULTITHREAD has been defined to a value |
greater then 1. |
*/ |
#ifndef MAIN_HAS_NOARGC |
#define MAIN_HAS_NOARGC 1 |
#ifndef MAIN_HAS_NOARGC |
#define MAIN_HAS_NOARGC 0 |
#endif |
|
/* Configuration : MAIN_HAS_NORETURN |
Needed if platform does not support returning a value from main. |
|
Valid values : |
0 - main returns an int, and return value will be 0. |
1 - platform does not support returning a value from main |
Needed if platform does not support returning a value from main. |
|
Valid values : |
0 - main returns an int, and return value will be 0. |
1 - platform does not support returning a value from main |
*/ |
#ifndef MAIN_HAS_NORETURN |
#define MAIN_HAS_NORETURN 0 |
190,25 → 196,31
#endif |
|
/* Variable : default_num_contexts |
Not used for this simple port, must cintain the value 1. |
Not used for this simple port, must cintain the value 1. |
*/ |
extern ee_u32 default_num_contexts; |
|
typedef struct CORE_PORTABLE_S { |
ee_u8 portable_id; |
typedef struct CORE_PORTABLE_S |
{ |
ee_u8 portable_id; |
} core_portable; |
|
/* target specific init/fini */ |
void portable_init(core_portable *p, int *argc, char *argv[]); |
#ifndef RUN_COREMARK |
void |
__attribute__((__noreturn__)) |
portable_init(core_portable *p, int *argc, char *argv[]); |
#else |
void |
portable_init(core_portable *p, int *argc, char *argv[]); |
#endif |
void portable_fini(core_portable *p); |
|
// special printf |
int ee_printf(const char *fmt, ...); |
|
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN) |
#if (TOTAL_DATA_SIZE==1200) |
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \ |
&& !defined(VALIDATION_RUN) |
#if (TOTAL_DATA_SIZE == 1200) |
#define PROFILE_RUN 1 |
#elif (TOTAL_DATA_SIZE==2000) |
#elif (TOTAL_DATA_SIZE == 2000) |
#define PERFORMANCE_RUN 1 |
#else |
#define VALIDATION_RUN 1 |
215,4 → 227,6
#endif |
#endif |
|
int ee_printf(const char *fmt, ...); |
|
#endif /* CORE_PORTME_H */ |
/sw/example/coremark/core_state.c
18,260 → 18,313
|
#include "coremark.h" |
/* local functions */ |
enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count); |
enum CORE_STATE core_state_transition(ee_u8 **instr, ee_u32 *transition_count); |
|
/* |
Topic: Description |
Simple state machines like this one are used in many embedded products. |
|
For more complex state machines, sometimes a state transition table implementation is used instead, |
trading speed of direct coding for ease of maintenance. |
|
Since the main goal of using a state machine in CoreMark is to excercise the switch/if behaviour, |
we are using a small moore machine. |
|
In particular, this machine tests type of string input, |
trying to determine whether the input is a number or something else. |
(see core_state.png). |
Simple state machines like this one are used in many embedded products. |
|
For more complex state machines, sometimes a state transition table |
implementation is used instead, trading speed of direct coding for ease of |
maintenance. |
|
Since the main goal of using a state machine in CoreMark is to excercise |
the switch/if behaviour, we are using a small moore machine. |
|
In particular, this machine tests type of string input, |
trying to determine whether the input is a number or something else. |
(see core_state.png). |
*/ |
|
/* Function: core_bench_state |
Benchmark function |
Benchmark function |
|
Go over the input twice, once direct, and once after introducing some corruption. |
Go over the input twice, once direct, and once after introducing some |
corruption. |
*/ |
ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock, |
ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc) |
ee_u16 |
core_bench_state(ee_u32 blksize, |
ee_u8 *memblock, |
ee_s16 seed1, |
ee_s16 seed2, |
ee_s16 step, |
ee_u16 crc) |
{ |
ee_u32 final_counts[NUM_CORE_STATES]; |
ee_u32 track_counts[NUM_CORE_STATES]; |
ee_u8 *p=memblock; |
ee_u32 i; |
ee_u32 final_counts[NUM_CORE_STATES]; |
ee_u32 track_counts[NUM_CORE_STATES]; |
ee_u8 *p = memblock; |
ee_u32 i; |
|
|
#if CORE_DEBUG |
ee_printf("State Bench: %d,%d,%d,%04x\n",seed1,seed2,step,crc); |
ee_printf("State Bench: %d,%d,%d,%04x\n", seed1, seed2, step, crc); |
#endif |
for (i=0; i<NUM_CORE_STATES; i++) { |
final_counts[i]=track_counts[i]=0; |
} |
/* run the state machine over the input */ |
while (*p!=0) { |
enum CORE_STATE fstate=core_state_transition(&p,track_counts); |
final_counts[fstate]++; |
for (i = 0; i < NUM_CORE_STATES; i++) |
{ |
final_counts[i] = track_counts[i] = 0; |
} |
/* run the state machine over the input */ |
while (*p != 0) |
{ |
enum CORE_STATE fstate = core_state_transition(&p, track_counts); |
final_counts[fstate]++; |
#if CORE_DEBUG |
ee_printf("%d,",fstate); |
} |
ee_printf("\n"); |
ee_printf("%d,", fstate); |
} |
ee_printf("\n"); |
#else |
} |
} |
#endif |
p=memblock; |
while (p < (memblock+blksize)) { /* insert some corruption */ |
if (*p!=',') |
*p^=(ee_u8)seed1; |
p+=step; |
} |
p=memblock; |
/* run the state machine over the input again */ |
while (*p!=0) { |
enum CORE_STATE fstate=core_state_transition(&p,track_counts); |
final_counts[fstate]++; |
p = memblock; |
while (p < (memblock + blksize)) |
{ /* insert some corruption */ |
if (*p != ',') |
*p ^= (ee_u8)seed1; |
p += step; |
} |
p = memblock; |
/* run the state machine over the input again */ |
while (*p != 0) |
{ |
enum CORE_STATE fstate = core_state_transition(&p, track_counts); |
final_counts[fstate]++; |
#if CORE_DEBUG |
ee_printf("%d,",fstate); |
} |
ee_printf("\n"); |
ee_printf("%d,", fstate); |
} |
ee_printf("\n"); |
#else |
} |
} |
#endif |
p=memblock; |
while (p < (memblock+blksize)) { /* undo corruption is seed1 and seed2 are equal */ |
if (*p!=',') |
*p^=(ee_u8)seed2; |
p+=step; |
} |
/* end timing */ |
for (i=0; i<NUM_CORE_STATES; i++) { |
crc=crcu32(final_counts[i],crc); |
crc=crcu32(track_counts[i],crc); |
} |
return crc; |
p = memblock; |
while (p < (memblock + blksize)) |
{ /* undo corruption is seed1 and seed2 are equal */ |
if (*p != ',') |
*p ^= (ee_u8)seed2; |
p += step; |
} |
/* end timing */ |
for (i = 0; i < NUM_CORE_STATES; i++) |
{ |
crc = crcu32(final_counts[i], crc); |
crc = crcu32(track_counts[i], crc); |
} |
return crc; |
} |
|
/* Default initialization patterns */ |
static ee_u8 *intpat[4] ={(ee_u8 *)"5012",(ee_u8 *)"1234",(ee_u8 *)"-874",(ee_u8 *)"+122"}; |
static ee_u8 *floatpat[4]={(ee_u8 *)"35.54400",(ee_u8 *)".1234500",(ee_u8 *)"-110.700",(ee_u8 *)"+0.64400"}; |
static ee_u8 *scipat[4] ={(ee_u8 *)"5.500e+3",(ee_u8 *)"-.123e-2",(ee_u8 *)"-87e+832",(ee_u8 *)"+0.6e-12"}; |
static ee_u8 *errpat[4] ={(ee_u8 *)"T0.3e-1F",(ee_u8 *)"-T.T++Tq",(ee_u8 *)"1T3.4e4z",(ee_u8 *)"34.0e-T^"}; |
static ee_u8 *intpat[4] |
= { (ee_u8 *)"5012", (ee_u8 *)"1234", (ee_u8 *)"-874", (ee_u8 *)"+122" }; |
static ee_u8 *floatpat[4] = { (ee_u8 *)"35.54400", |
(ee_u8 *)".1234500", |
(ee_u8 *)"-110.700", |
(ee_u8 *)"+0.64400" }; |
static ee_u8 *scipat[4] = { (ee_u8 *)"5.500e+3", |
(ee_u8 *)"-.123e-2", |
(ee_u8 *)"-87e+832", |
(ee_u8 *)"+0.6e-12" }; |
static ee_u8 *errpat[4] = { (ee_u8 *)"T0.3e-1F", |
(ee_u8 *)"-T.T++Tq", |
(ee_u8 *)"1T3.4e4z", |
(ee_u8 *)"34.0e-T^" }; |
|
/* Function: core_init_state |
Initialize the input data for the state machine. |
Initialize the input data for the state machine. |
|
Populate the input with several predetermined strings, interspersed. |
Actual patterns chosen depend on the seed parameter. |
|
Note: |
The seed parameter MUST be supplied from a source that cannot be determined at compile time |
Populate the input with several predetermined strings, interspersed. |
Actual patterns chosen depend on the seed parameter. |
|
Note: |
The seed parameter MUST be supplied from a source that cannot be |
determined at compile time |
*/ |
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p) { |
ee_u32 total=0,next=0,i; |
ee_u8 *buf=0; |
void |
core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p) |
{ |
ee_u32 total = 0, next = 0, i; |
ee_u8 *buf = 0; |
#if CORE_DEBUG |
ee_u8 *start=p; |
ee_printf("State: %d,%d\n",size,seed); |
ee_u8 *start = p; |
ee_printf("State: %d,%d\n", size, seed); |
#endif |
size--; |
next=0; |
while ((total+next+1)<size) { |
if (next>0) { |
for(i=0;i<next;i++) |
*(p+total+i)=buf[i]; |
*(p+total+i)=','; |
total+=next+1; |
} |
seed++; |
switch (seed & 0x7) { |
case 0: /* int */ |
case 1: /* int */ |
case 2: /* int */ |
buf=intpat[(seed>>3) & 0x3]; |
next=4; |
break; |
case 3: /* float */ |
case 4: /* float */ |
buf=floatpat[(seed>>3) & 0x3]; |
next=8; |
break; |
case 5: /* scientific */ |
case 6: /* scientific */ |
buf=scipat[(seed>>3) & 0x3]; |
next=8; |
break; |
case 7: /* invalid */ |
buf=errpat[(seed>>3) & 0x3]; |
next=8; |
break; |
default: /* Never happen, just to make some compilers happy */ |
break; |
} |
} |
size++; |
while (total<size) { /* fill the rest with 0 */ |
*(p+total)=0; |
total++; |
} |
size--; |
next = 0; |
while ((total + next + 1) < size) |
{ |
if (next > 0) |
{ |
for (i = 0; i < next; i++) |
*(p + total + i) = buf[i]; |
*(p + total + i) = ','; |
total += next + 1; |
} |
seed++; |
switch (seed & 0x7) |
{ |
case 0: /* int */ |
case 1: /* int */ |
case 2: /* int */ |
buf = intpat[(seed >> 3) & 0x3]; |
next = 4; |
break; |
case 3: /* float */ |
case 4: /* float */ |
buf = floatpat[(seed >> 3) & 0x3]; |
next = 8; |
break; |
case 5: /* scientific */ |
case 6: /* scientific */ |
buf = scipat[(seed >> 3) & 0x3]; |
next = 8; |
break; |
case 7: /* invalid */ |
buf = errpat[(seed >> 3) & 0x3]; |
next = 8; |
break; |
default: /* Never happen, just to make some compilers happy */ |
break; |
} |
} |
size++; |
while (total < size) |
{ /* fill the rest with 0 */ |
*(p + total) = 0; |
total++; |
} |
#if CORE_DEBUG |
ee_printf("State Input: %s\n",start); |
ee_printf("State Input: %s\n", start); |
#endif |
} |
|
static ee_u8 ee_isdigit(ee_u8 c) { |
ee_u8 retval; |
retval = ((c>='0') & (c<='9')) ? 1 : 0; |
return retval; |
static ee_u8 |
ee_isdigit(ee_u8 c) |
{ |
ee_u8 retval; |
retval = ((c >= '0') & (c <= '9')) ? 1 : 0; |
return retval; |
} |
|
/* Function: core_state_transition |
Actual state machine. |
Actual state machine. |
|
The state machine will continue scanning until either: |
1 - an invalid input is detcted. |
2 - a valid number has been detected. |
|
The input pointer is updated to point to the end of the token, and the end state is returned (either specific format determined or invalid). |
The state machine will continue scanning until either: |
1 - an invalid input is detcted. |
2 - a valid number has been detected. |
|
The input pointer is updated to point to the end of the token, and the |
end state is returned (either specific format determined or invalid). |
*/ |
|
enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count) { |
ee_u8 *str=*instr; |
ee_u8 NEXT_SYMBOL; |
enum CORE_STATE state=CORE_START; |
for( ; *str && state != CORE_INVALID; str++ ) { |
NEXT_SYMBOL = *str; |
if (NEXT_SYMBOL==',') /* end of this input */ { |
str++; |
break; |
} |
switch(state) { |
case CORE_START: |
if(ee_isdigit(NEXT_SYMBOL)) { |
state = CORE_INT; |
} |
else if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) { |
state = CORE_S1; |
} |
else if( NEXT_SYMBOL == '.' ) { |
state = CORE_FLOAT; |
} |
else { |
state = CORE_INVALID; |
transition_count[CORE_INVALID]++; |
} |
transition_count[CORE_START]++; |
break; |
case CORE_S1: |
if(ee_isdigit(NEXT_SYMBOL)) { |
state = CORE_INT; |
transition_count[CORE_S1]++; |
} |
else if( NEXT_SYMBOL == '.' ) { |
state = CORE_FLOAT; |
transition_count[CORE_S1]++; |
} |
else { |
state = CORE_INVALID; |
transition_count[CORE_S1]++; |
} |
break; |
case CORE_INT: |
if( NEXT_SYMBOL == '.' ) { |
state = CORE_FLOAT; |
transition_count[CORE_INT]++; |
} |
else if(!ee_isdigit(NEXT_SYMBOL)) { |
state = CORE_INVALID; |
transition_count[CORE_INT]++; |
} |
break; |
case CORE_FLOAT: |
if( NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e' ) { |
state = CORE_S2; |
transition_count[CORE_FLOAT]++; |
} |
else if(!ee_isdigit(NEXT_SYMBOL)) { |
state = CORE_INVALID; |
transition_count[CORE_FLOAT]++; |
} |
break; |
case CORE_S2: |
if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) { |
state = CORE_EXPONENT; |
transition_count[CORE_S2]++; |
} |
else { |
state = CORE_INVALID; |
transition_count[CORE_S2]++; |
} |
break; |
case CORE_EXPONENT: |
if(ee_isdigit(NEXT_SYMBOL)) { |
state = CORE_SCIENTIFIC; |
transition_count[CORE_EXPONENT]++; |
} |
else { |
state = CORE_INVALID; |
transition_count[CORE_EXPONENT]++; |
} |
break; |
case CORE_SCIENTIFIC: |
if(!ee_isdigit(NEXT_SYMBOL)) { |
state = CORE_INVALID; |
transition_count[CORE_INVALID]++; |
} |
break; |
default: |
break; |
} |
} |
*instr=str; |
return state; |
enum CORE_STATE |
core_state_transition(ee_u8 **instr, ee_u32 *transition_count) |
{ |
ee_u8 * str = *instr; |
ee_u8 NEXT_SYMBOL; |
enum CORE_STATE state = CORE_START; |
for (; *str && state != CORE_INVALID; str++) |
{ |
NEXT_SYMBOL = *str; |
if (NEXT_SYMBOL == ',') /* end of this input */ |
{ |
str++; |
break; |
} |
switch (state) |
{ |
case CORE_START: |
if (ee_isdigit(NEXT_SYMBOL)) |
{ |
state = CORE_INT; |
} |
else if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-') |
{ |
state = CORE_S1; |
} |
else if (NEXT_SYMBOL == '.') |
{ |
state = CORE_FLOAT; |
} |
else |
{ |
state = CORE_INVALID; |
transition_count[CORE_INVALID]++; |
} |
transition_count[CORE_START]++; |
break; |
case CORE_S1: |
if (ee_isdigit(NEXT_SYMBOL)) |
{ |
state = CORE_INT; |
transition_count[CORE_S1]++; |
} |
else if (NEXT_SYMBOL == '.') |
{ |
state = CORE_FLOAT; |
transition_count[CORE_S1]++; |
} |
else |
{ |
state = CORE_INVALID; |
transition_count[CORE_S1]++; |
} |
break; |
case CORE_INT: |
if (NEXT_SYMBOL == '.') |
{ |
state = CORE_FLOAT; |
transition_count[CORE_INT]++; |
} |
else if (!ee_isdigit(NEXT_SYMBOL)) |
{ |
state = CORE_INVALID; |
transition_count[CORE_INT]++; |
} |
break; |
case CORE_FLOAT: |
if (NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e') |
{ |
state = CORE_S2; |
transition_count[CORE_FLOAT]++; |
} |
else if (!ee_isdigit(NEXT_SYMBOL)) |
{ |
state = CORE_INVALID; |
transition_count[CORE_FLOAT]++; |
} |
break; |
case CORE_S2: |
if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-') |
{ |
state = CORE_EXPONENT; |
transition_count[CORE_S2]++; |
} |
else |
{ |
state = CORE_INVALID; |
transition_count[CORE_S2]++; |
} |
break; |
case CORE_EXPONENT: |
if (ee_isdigit(NEXT_SYMBOL)) |
{ |
state = CORE_SCIENTIFIC; |
transition_count[CORE_EXPONENT]++; |
} |
else |
{ |
state = CORE_INVALID; |
transition_count[CORE_EXPONENT]++; |
} |
break; |
case CORE_SCIENTIFIC: |
if (!ee_isdigit(NEXT_SYMBOL)) |
{ |
state = CORE_INVALID; |
transition_count[CORE_INVALID]++; |
} |
break; |
default: |
break; |
} |
} |
*instr = str; |
return state; |
} |
/sw/example/coremark/core_util.c
18,193 → 18,232
|
#include "coremark.h" |
/* Function: get_seed |
Get a values that cannot be determined at compile time. |
Get a values that cannot be determined at compile time. |
|
Since different embedded systems and compilers are used, 3 different methods are provided: |
1 - Using a volatile variable. This method is only valid if the compiler is forced to generate code that |
reads the value of a volatile variable from memory at run time. |
Please note, if using this method, you would need to modify core_portme.c to generate training profile. |
2 - Command line arguments. This is the preferred method if command line arguments are supported. |
3 - System function. If none of the first 2 methods is available on the platform, |
a system function which is not a stub can be used. |
|
e.g. read the value on GPIO pins connected to switches, or invoke special simulator functions. |
Since different embedded systems and compilers are used, 3 different |
methods are provided: 1 - Using a volatile variable. This method is only |
valid if the compiler is forced to generate code that reads the value of a |
volatile variable from memory at run time. Please note, if using this method, |
you would need to modify core_portme.c to generate training profile. 2 - |
Command line arguments. This is the preferred method if command line |
arguments are supported. 3 - System function. If none of the first 2 methods |
is available on the platform, a system function which is not a stub can be |
used. |
|
e.g. read the value on GPIO pins connected to switches, or invoke |
special simulator functions. |
*/ |
#if (SEED_METHOD==SEED_VOLATILE) |
extern volatile ee_s32 seed1_volatile; |
extern volatile ee_s32 seed2_volatile; |
extern volatile ee_s32 seed3_volatile; |
extern volatile ee_s32 seed4_volatile; |
extern volatile ee_s32 seed5_volatile; |
ee_s32 get_seed_32(int i) { |
ee_s32 retval; |
switch (i) { |
case 1: |
retval=seed1_volatile; |
break; |
case 2: |
retval=seed2_volatile; |
break; |
case 3: |
retval=seed3_volatile; |
break; |
case 4: |
retval=seed4_volatile; |
break; |
case 5: |
retval=seed5_volatile; |
break; |
default: |
retval=0; |
break; |
} |
return retval; |
} |
#elif (SEED_METHOD==SEED_ARG) |
ee_s32 parseval(char *valstring) { |
ee_s32 retval=0; |
ee_s32 neg=1; |
int hexmode=0; |
if (*valstring == '-') { |
neg=-1; |
valstring++; |
} |
if ((valstring[0] == '0') && (valstring[1] == 'x')) { |
hexmode=1; |
valstring+=2; |
} |
/* first look for digits */ |
if (hexmode) { |
while (((*valstring >= '0') && (*valstring <= '9')) || ((*valstring >= 'a') && (*valstring <= 'f'))) { |
ee_s32 digit=*valstring-'0'; |
if (digit>9) |
digit=10+*valstring-'a'; |
retval*=16; |
retval+=digit; |
valstring++; |
} |
} else { |
while ((*valstring >= '0') && (*valstring <= '9')) { |
ee_s32 digit=*valstring-'0'; |
retval*=10; |
retval+=digit; |
valstring++; |
} |
} |
/* now add qualifiers */ |
if (*valstring=='K') |
retval*=1024; |
if (*valstring=='M') |
retval*=1024*1024; |
#if (SEED_METHOD == SEED_VOLATILE) |
extern volatile ee_s32 seed1_volatile; |
extern volatile ee_s32 seed2_volatile; |
extern volatile ee_s32 seed3_volatile; |
extern volatile ee_s32 seed4_volatile; |
extern volatile ee_s32 seed5_volatile; |
ee_s32 |
get_seed_32(int i) |
{ |
ee_s32 retval; |
switch (i) |
{ |
case 1: |
retval = seed1_volatile; |
break; |
case 2: |
retval = seed2_volatile; |
break; |
case 3: |
retval = seed3_volatile; |
break; |
case 4: |
retval = seed4_volatile; |
break; |
case 5: |
retval = seed5_volatile; |
break; |
default: |
retval = 0; |
break; |
} |
return retval; |
} |
#elif (SEED_METHOD == SEED_ARG) |
ee_s32 |
parseval(char *valstring) |
{ |
ee_s32 retval = 0; |
ee_s32 neg = 1; |
int hexmode = 0; |
if (*valstring == '-') |
{ |
neg = -1; |
valstring++; |
} |
if ((valstring[0] == '0') && (valstring[1] == 'x')) |
{ |
hexmode = 1; |
valstring += 2; |
} |
/* first look for digits */ |
if (hexmode) |
{ |
while (((*valstring >= '0') && (*valstring <= '9')) |
|| ((*valstring >= 'a') && (*valstring <= 'f'))) |
{ |
ee_s32 digit = *valstring - '0'; |
if (digit > 9) |
digit = 10 + *valstring - 'a'; |
retval *= 16; |
retval += digit; |
valstring++; |
} |
} |
else |
{ |
while ((*valstring >= '0') && (*valstring <= '9')) |
{ |
ee_s32 digit = *valstring - '0'; |
retval *= 10; |
retval += digit; |
valstring++; |
} |
} |
/* now add qualifiers */ |
if (*valstring == 'K') |
retval *= 1024; |
if (*valstring == 'M') |
retval *= 1024 * 1024; |
|
retval*=neg; |
return retval; |
retval *= neg; |
return retval; |
} |
|
ee_s32 get_seed_args(int i, int argc, char *argv[]) { |
if (argc>i) |
return parseval(argv[i]); |
return 0; |
ee_s32 |
get_seed_args(int i, int argc, char *argv[]) |
{ |
if (argc > i) |
return parseval(argv[i]); |
return 0; |
} |
|
#elif (SEED_METHOD==SEED_FUNC) |
/* If using OS based function, you must define and implement the functions below in core_portme.h and core_portme.c ! */ |
ee_s32 get_seed_32(int i) { |
ee_s32 retval; |
switch (i) { |
case 1: |
retval=portme_sys1(); |
break; |
case 2: |
retval=portme_sys2(); |
break; |
case 3: |
retval=portme_sys3(); |
break; |
case 4: |
retval=portme_sys4(); |
break; |
case 5: |
retval=portme_sys5(); |
break; |
default: |
retval=0; |
break; |
} |
return retval; |
#elif (SEED_METHOD == SEED_FUNC) |
/* If using OS based function, you must define and implement the functions below |
* in core_portme.h and core_portme.c ! */ |
ee_s32 |
get_seed_32(int i) |
{ |
ee_s32 retval; |
switch (i) |
{ |
case 1: |
retval = portme_sys1(); |
break; |
case 2: |
retval = portme_sys2(); |
break; |
case 3: |
retval = portme_sys3(); |
break; |
case 4: |
retval = portme_sys4(); |
break; |
case 5: |
retval = portme_sys5(); |
break; |
default: |
retval = 0; |
break; |
} |
return retval; |
} |
#endif |
|
/* Function: crc* |
Service functions to calculate 16b CRC code. |
Service functions to calculate 16b CRC code. |
|
*/ |
ee_u16 crcu8(ee_u8 data, ee_u16 crc ) |
ee_u16 |
crcu8(ee_u8 data, ee_u16 crc) |
{ |
ee_u8 i=0,x16=0,carry=0; |
ee_u8 i = 0, x16 = 0, carry = 0; |
|
for (i = 0; i < 8; i++) |
for (i = 0; i < 8; i++) |
{ |
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1)); |
data >>= 1; |
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1)); |
data >>= 1; |
|
if (x16 == 1) |
{ |
crc ^= 0x4002; |
carry = 1; |
} |
else |
carry = 0; |
crc >>= 1; |
if (carry) |
crc |= 0x8000; |
else |
crc &= 0x7fff; |
if (x16 == 1) |
{ |
crc ^= 0x4002; |
carry = 1; |
} |
else |
carry = 0; |
crc >>= 1; |
if (carry) |
crc |= 0x8000; |
else |
crc &= 0x7fff; |
} |
return crc; |
} |
ee_u16 crcu16(ee_u16 newval, ee_u16 crc) { |
crc=crcu8( (ee_u8) (newval) ,crc); |
crc=crcu8( (ee_u8) ((newval)>>8) ,crc); |
return crc; |
return crc; |
} |
ee_u16 crcu32(ee_u32 newval, ee_u16 crc) { |
crc=crc16((ee_s16) newval ,crc); |
crc=crc16((ee_s16) (newval>>16) ,crc); |
return crc; |
ee_u16 |
crcu16(ee_u16 newval, ee_u16 crc) |
{ |
crc = crcu8((ee_u8)(newval), crc); |
crc = crcu8((ee_u8)((newval) >> 8), crc); |
return crc; |
} |
ee_u16 crc16(ee_s16 newval, ee_u16 crc) { |
return crcu16((ee_u16)newval, crc); |
ee_u16 |
crcu32(ee_u32 newval, ee_u16 crc) |
{ |
crc = crc16((ee_s16)newval, crc); |
crc = crc16((ee_s16)(newval >> 16), crc); |
return crc; |
} |
ee_u16 |
crc16(ee_s16 newval, ee_u16 crc) |
{ |
return crcu16((ee_u16)newval, crc); |
} |
|
ee_u8 check_data_types() { |
ee_u8 retval=0; |
if (sizeof(ee_u8) != 1) { |
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n"); |
retval++; |
} |
if (sizeof(ee_u16) != 2) { |
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n"); |
retval++; |
} |
if (sizeof(ee_s16) != 2) { |
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n"); |
retval++; |
} |
if (sizeof(ee_s32) != 4) { |
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n"); |
retval++; |
} |
if (sizeof(ee_u32) != 4) { |
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n"); |
retval++; |
} |
if (sizeof(ee_ptr_int) != sizeof(int *)) { |
ee_printf("ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n"); |
retval++; |
} |
if (retval>0) { |
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n"); |
} |
return retval; |
ee_u8 |
check_data_types() |
{ |
ee_u8 retval = 0; |
if (sizeof(ee_u8) != 1) |
{ |
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n"); |
retval++; |
} |
if (sizeof(ee_u16) != 2) |
{ |
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n"); |
retval++; |
} |
if (sizeof(ee_s16) != 2) |
{ |
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n"); |
retval++; |
} |
if (sizeof(ee_s32) != 4) |
{ |
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n"); |
retval++; |
} |
if (sizeof(ee_u32) != 4) |
{ |
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n"); |
retval++; |
} |
if (sizeof(ee_ptr_int) != sizeof(int *)) |
{ |
ee_printf( |
"ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n"); |
retval++; |
} |
if (retval > 0) |
{ |
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n"); |
} |
return retval; |
} |
/sw/example/coremark/coremark.h
17,23 → 17,23
*/ |
|
/* Topic: Description |
This file contains declarations of the various benchmark functions. |
This file contains declarations of the various benchmark functions. |
*/ |
|
/* Configuration: TOTAL_DATA_SIZE |
Define total size for data algorithms will operate on |
Define total size for data algorithms will operate on |
*/ |
#ifndef TOTAL_DATA_SIZE |
#define TOTAL_DATA_SIZE 2*1000 |
#ifndef TOTAL_DATA_SIZE |
#define TOTAL_DATA_SIZE 2 * 1000 |
#endif |
|
#define SEED_ARG 0 |
#define SEED_FUNC 1 |
#define SEED_ARG 0 |
#define SEED_FUNC 1 |
#define SEED_VOLATILE 2 |
|
#define MEM_STATIC 0 |
#define MEM_MALLOC 1 |
#define MEM_STACK 2 |
#define MEM_STACK 2 |
|
#include "core_portme.h" |
|
48,8 → 48,8
void *iterate(void *pres); |
|
/* Typedef: secs_ret |
For machines that have floating point support, get number of seconds as a double. |
Otherwise an unsigned int. |
For machines that have floating point support, get number of seconds as |
a double. Otherwise an unsigned int. |
*/ |
#if HAS_FLOAT |
typedef double secs_ret; |
58,17 → 58,17
#endif |
|
#if MAIN_HAS_NORETURN |
#define MAIN_RETURN_VAL |
#define MAIN_RETURN_VAL |
#define MAIN_RETURN_TYPE void |
#else |
#define MAIN_RETURN_VAL 0 |
#define MAIN_RETURN_VAL 0 |
#define MAIN_RETURN_TYPE int |
#endif |
#endif |
|
void start_time(void); |
void stop_time(void); |
void start_time(void); |
void stop_time(void); |
CORE_TICKS get_time(void); |
secs_ret time_in_secs(CORE_TICKS ticks); |
secs_ret time_in_secs(CORE_TICKS ticks); |
|
/* Misc useful functions */ |
ee_u16 crcu8(ee_u8 data, ee_u16 crc); |
75,30 → 75,31
ee_u16 crc16(ee_s16 newval, ee_u16 crc); |
ee_u16 crcu16(ee_u16 newval, ee_u16 crc); |
ee_u16 crcu32(ee_u32 newval, ee_u16 crc); |
ee_u8 check_data_types(); |
void *portable_malloc(ee_size_t size); |
void portable_free(void *p); |
ee_u8 check_data_types(void); |
void * portable_malloc(ee_size_t size); |
void portable_free(void *p); |
ee_s32 parseval(char *valstring); |
|
/* Algorithm IDS */ |
#define ID_LIST (1<<0) |
#define ID_MATRIX (1<<1) |
#define ID_STATE (1<<2) |
#define ALL_ALGORITHMS_MASK (ID_LIST|ID_MATRIX|ID_STATE) |
#define NUM_ALGORITHMS 3 |
#define ID_LIST (1 << 0) |
#define ID_MATRIX (1 << 1) |
#define ID_STATE (1 << 2) |
#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE) |
#define NUM_ALGORITHMS 3 |
|
/* list data structures */ |
typedef struct list_data_s { |
ee_s16 data16; |
ee_s16 idx; |
typedef struct list_data_s |
{ |
ee_s16 data16; |
ee_s16 idx; |
} list_data; |
|
typedef struct list_head_s { |
struct list_head_s *next; |
struct list_data_s *info; |
typedef struct list_head_s |
{ |
struct list_head_s *next; |
struct list_data_s *info; |
} list_head; |
|
|
/*matrix benchmark related stuff */ |
#define MATDAT_INT 1 |
#if MATDAT_INT |
109,52 → 110,54
typedef ee_f32 MATRES; |
#endif |
|
typedef struct MAT_PARAMS_S { |
int N; |
MATDAT *A; |
MATDAT *B; |
MATRES *C; |
typedef struct MAT_PARAMS_S |
{ |
int N; |
MATDAT *A; |
MATDAT *B; |
MATRES *C; |
} mat_params; |
|
/* state machine related stuff */ |
/* List of all the possible states for the FSM */ |
typedef enum CORE_STATE { |
CORE_START=0, |
CORE_INVALID, |
CORE_S1, |
CORE_S2, |
CORE_INT, |
CORE_FLOAT, |
CORE_EXPONENT, |
CORE_SCIENTIFIC, |
NUM_CORE_STATES |
} core_state_e ; |
typedef enum CORE_STATE |
{ |
CORE_START = 0, |
CORE_INVALID, |
CORE_S1, |
CORE_S2, |
CORE_INT, |
CORE_FLOAT, |
CORE_EXPONENT, |
CORE_SCIENTIFIC, |
NUM_CORE_STATES |
} core_state_e; |
|
|
/* Helper structure to hold results */ |
typedef struct RESULTS_S { |
/* inputs */ |
ee_s16 seed1; /* Initializing seed */ |
ee_s16 seed2; /* Initializing seed */ |
ee_s16 seed3; /* Initializing seed */ |
void *memblock[4]; /* Pointer to safe memory location */ |
ee_u32 size; /* Size of the data */ |
ee_u32 iterations; /* Number of iterations to execute */ |
ee_u32 execs; /* Bitmask of operations to execute */ |
struct list_head_s *list; |
mat_params mat; |
/* outputs */ |
ee_u16 crc; |
ee_u16 crclist; |
ee_u16 crcmatrix; |
ee_u16 crcstate; |
ee_s16 err; |
/* ultithread specific */ |
core_portable port; |
typedef struct RESULTS_S |
{ |
/* inputs */ |
ee_s16 seed1; /* Initializing seed */ |
ee_s16 seed2; /* Initializing seed */ |
ee_s16 seed3; /* Initializing seed */ |
void * memblock[4]; /* Pointer to safe memory location */ |
ee_u32 size; /* Size of the data */ |
ee_u32 iterations; /* Number of iterations to execute */ |
ee_u32 execs; /* Bitmask of operations to execute */ |
struct list_head_s *list; |
mat_params mat; |
/* outputs */ |
ee_u16 crc; |
ee_u16 crclist; |
ee_u16 crcmatrix; |
ee_u16 crcstate; |
ee_s16 err; |
/* ultithread specific */ |
core_portable port; |
} core_results; |
|
/* Multicore execution handling */ |
#if (MULTITHREAD>1) |
#if (MULTITHREAD > 1) |
ee_u8 core_start_parallel(core_results *res); |
ee_u8 core_stop_parallel(core_results *res); |
#endif |
161,14 → 164,20
|
/* list benchmark functions */ |
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed); |
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx); |
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx); |
|
/* state benchmark functions */ |
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p); |
ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock, |
ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc); |
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p); |
ee_u16 core_bench_state(ee_u32 blksize, |
ee_u8 *memblock, |
ee_s16 seed1, |
ee_s16 seed2, |
ee_s16 step, |
ee_u16 crc); |
|
/* matrix benchmark functions */ |
ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p); |
ee_u32 core_init_matrix(ee_u32 blksize, |
void * memblk, |
ee_s32 seed, |
mat_params *p); |
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc); |
|
/sw/example/coremark/cvt.c
0,0 → 1,127
/* |
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) |
|
Licensed under the Apache License, Version 2.0 (the "License"); |
you may not use this file except in compliance with the License. |
You may obtain a copy of the License at |
|
http://www.apache.org/licenses/LICENSE-2.0 |
|
Unless required by applicable law or agreed to in writing, software |
distributed under the License is distributed on an "AS IS" BASIS, |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
See the License for the specific language governing permissions and |
limitations under the License. |
*/ |
#include <math.h> |
#define CVTBUFSIZE 80 |
static char CVTBUF[CVTBUFSIZE]; |
|
static char * |
cvt(double arg, int ndigits, int *decpt, int *sign, char *buf, int eflag) |
{ |
int r2; |
double fi, fj; |
char * p, *p1; |
|
if (ndigits < 0) |
ndigits = 0; |
if (ndigits >= CVTBUFSIZE - 1) |
ndigits = CVTBUFSIZE - 2; |
r2 = 0; |
*sign = 0; |
p = &buf[0]; |
if (arg < 0) |
{ |
*sign = 1; |
arg = -arg; |
} |
arg = modf(arg, &fi); |
p1 = &buf[CVTBUFSIZE]; |
|
if (fi != 0) |
{ |
p1 = &buf[CVTBUFSIZE]; |
while (fi != 0) |
{ |
fj = modf(fi / 10, &fi); |
*--p1 = (int)((fj + .03) * 10) + '0'; |
r2++; |
} |
while (p1 < &buf[CVTBUFSIZE]) |
*p++ = *p1++; |
} |
else if (arg > 0) |
{ |
while ((fj = arg * 10) < 1) |
{ |
arg = fj; |
r2--; |
} |
} |
p1 = &buf[ndigits]; |
if (eflag == 0) |
p1 += r2; |
*decpt = r2; |
if (p1 < &buf[0]) |
{ |
buf[0] = '\0'; |
return buf; |
} |
while (p <= p1 && p < &buf[CVTBUFSIZE]) |
{ |
arg *= 10; |
arg = modf(arg, &fj); |
*p++ = (int)fj + '0'; |
} |
if (p1 >= &buf[CVTBUFSIZE]) |
{ |
buf[CVTBUFSIZE - 1] = '\0'; |
return buf; |
} |
p = p1; |
*p1 += 5; |
while (*p1 > '9') |
{ |
*p1 = '0'; |
if (p1 > buf) |
++*--p1; |
else |
{ |
*p1 = '1'; |
(*decpt)++; |
if (eflag == 0) |
{ |
if (p > buf) |
*p = '0'; |
p++; |
} |
} |
} |
*p = '\0'; |
return buf; |
} |
|
char * |
ecvt(double arg, int ndigits, int *decpt, int *sign) |
{ |
return cvt(arg, ndigits, decpt, sign, CVTBUF, 1); |
} |
|
char * |
ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf) |
{ |
return cvt(arg, ndigits, decpt, sign, buf, 1); |
} |
|
char * |
fcvt(double arg, int ndigits, int *decpt, int *sign) |
{ |
return cvt(arg, ndigits, decpt, sign, CVTBUF, 0); |
} |
|
char * |
fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf) |
{ |
return cvt(arg, ndigits, decpt, sign, buf, 0); |
} |
/sw/example/coremark/ee_printf.c
1,634 → 1,710
/* File : barebones/ee_printf.c |
This file contains an implementation of ee_printf that only requires a method to output a char to a UART without pulling in library code. |
/* |
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) |
|
This code is based on a file that contains the following: |
Copyright (C) 2002 Michael Ringgaard. All rights reserved. |
Licensed under the Apache License, Version 2.0 (the "License"); |
you may not use this file except in compliance with the License. |
You may obtain a copy of the License at |
|
Redistribution and use in source and binary forms, with or without |
modification, are permitted provided that the following conditions |
are met: |
http://www.apache.org/licenses/LICENSE-2.0 |
|
1. Redistributions of source code must retain the above copyright |
notice, this list of conditions and the following disclaimer. |
2. Redistributions in binary form must reproduce the above copyright |
notice, this list of conditions and the following disclaimer in the |
documentation and/or other materials provided with the distribution. |
3. Neither the name of the project nor the names of its contributors |
may be used to endorse or promote products derived from this software |
without specific prior written permission. |
Unless required by applicable law or agreed to in writing, software |
distributed under the License is distributed on an "AS IS" BASIS, |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
See the License for the specific language governing permissions and |
limitations under the License. |
*/ |
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE |
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
SUCH DAMAGE. |
/* Modified for the NEORV32 Processor - by Stephan Nolting */ |
|
*/ |
#include <coremark.h> |
#include <stdarg.h> |
|
#include <core_portme.h> |
#include <neorv32.h> |
#include <stdarg.h> |
#include <stdbool.h> |
#include <string.h> |
|
#define ZEROPAD (1<<0) /* Pad with zero */ |
#define SIGN (1<<1) /* Unsigned/signed long */ |
#define PLUS (1<<2) /* Show plus */ |
#define SPACE (1<<3) /* Spacer */ |
#define LEFT (1<<4) /* Left justified */ |
#define HEX_PREP (1<<5) /* 0x */ |
#define UPPERCASE (1<<6) /* 'ABCDEF' */ |
#define ZEROPAD (1 << 0) /* Pad with zero */ |
#define SIGN (1 << 1) /* Unsigned/signed long */ |
#define PLUS (1 << 2) /* Show plus */ |
#define SPACE (1 << 3) /* Spacer */ |
#define LEFT (1 << 4) /* Left justified */ |
#define HEX_PREP (1 << 5) /* 0x */ |
#define UPPERCASE (1 << 6) /* 'ABCDEF' */ |
|
#define is_digit(c) ((c) >= '0' && (c) <= '9') |
|
/* |
Serial initialization and new line replacement is a direct copy from mbed_retarget.cpp |
If the static modifier were to be removed, this part of the code would not be necessary. |
*/ |
//#include "hal/serial_api.h" |
static char * digits = "0123456789abcdefghijklmnopqrstuvwxyz"; |
static char * upper_digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
static ee_size_t strnlen(const char *s, ee_size_t count); |
|
#if DEVICE_SERIAL |
static serial_t stdio_uart = { 0 }; |
#if MBED_CONF_PLATFORM_STDIO_CONVERT_NEWLINES |
static char mbed_stdio_out_prev = 0; |
#endif |
#endif |
|
/* module variable for keeping track of initialization */ |
static bool not_initialized = true; |
|
static void init_serial() |
static ee_size_t |
strnlen(const char *s, ee_size_t count) |
{ |
if (not_initialized) |
{ |
not_initialized = false; |
|
#if DEVICE_SERIAL |
// serial_init(&stdio_uart, STDIO_UART_TX, STDIO_UART_RX); |
#if MBED_CONF_PLATFORM_STDIO_BAUD_RATE |
// serial_baud(&stdio_uart, MBED_CONF_PLATFORM_STDIO_BAUD_RATE); |
#endif |
#endif |
} |
const char *sc; |
for (sc = s; *sc != '\0' && count--; ++sc) |
; |
return sc - s; |
} |
|
#define MBED_INITIALIZE_PRINT(x) { init_serial(); } |
#define MBED_PRINT_CHARACTER(x) { serial_putc(&stdio_uart, x); } |
|
static char *lower_digits = "0123456789abcdefghijklmnopqrstuvwxyz"; |
static char *upper_digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
|
static int ee_skip_atoi(const char **s) |
static int |
skip_atoi(const char **s) |
{ |
int i = 0; |
while (is_digit(**s)) i = i*10 + *((*s)++) - '0'; |
return i; |
int i = 0; |
while (is_digit(**s)) |
i = i * 10 + *((*s)++) - '0'; |
return i; |
} |
|
static char *ee_number(char *str, long num, int base, int size, int precision, int type) |
static char * |
number(char *str, long num, int base, int size, int precision, int type) |
{ |
char c, sign, tmp[66]; |
char *dig = lower_digits; |
int i; |
char c, sign, tmp[66]; |
char *dig = digits; |
int i; |
|
if (type & UPPERCASE) dig = upper_digits; |
if (type & LEFT) type &= ~ZEROPAD; |
if (base < 2 || base > 36) return 0; |
if (type & UPPERCASE) |
dig = upper_digits; |
if (type & LEFT) |
type &= ~ZEROPAD; |
if (base < 2 || base > 36) |
return 0; |
|
c = (type & ZEROPAD) ? '0' : ' '; |
sign = 0; |
if (type & SIGN) |
{ |
if (num < 0) |
c = (type & ZEROPAD) ? '0' : ' '; |
sign = 0; |
if (type & SIGN) |
{ |
sign = '-'; |
num = -num; |
size--; |
if (num < 0) |
{ |
sign = '-'; |
num = -num; |
size--; |
} |
else if (type & PLUS) |
{ |
sign = '+'; |
size--; |
} |
else if (type & SPACE) |
{ |
sign = ' '; |
size--; |
} |
} |
else if (type & PLUS) |
|
if (type & HEX_PREP) |
{ |
sign = '+'; |
size--; |
if (base == 16) |
size -= 2; |
else if (base == 8) |
size--; |
} |
else if (type & SPACE) |
{ |
sign = ' '; |
size--; |
} |
} |
|
if (type & HEX_PREP) |
{ |
if (base == 16) |
size -= 2; |
else if (base == 8) |
size--; |
} |
i = 0; |
|
i = 0; |
|
if (num == 0) |
tmp[i++] = '0'; |
else |
{ |
while (num != 0) |
if (num == 0) |
tmp[i++] = '0'; |
else |
{ |
tmp[i++] = dig[((unsigned long) num) % (unsigned) base]; |
num = ((unsigned long) num) / (unsigned) base; |
while (num != 0) |
{ |
tmp[i++] = dig[((unsigned long)num) % (unsigned)base]; |
num = ((unsigned long)num) / (unsigned)base; |
} |
} |
} |
|
if (i > precision) precision = i; |
size -= precision; |
if (!(type & (ZEROPAD | LEFT))) while (size-- > 0) *str++ = ' '; |
if (sign) *str++ = sign; |
if (i > precision) |
precision = i; |
size -= precision; |
if (!(type & (ZEROPAD | LEFT))) |
while (size-- > 0) |
*str++ = ' '; |
if (sign) |
*str++ = sign; |
|
if (type & HEX_PREP) |
{ |
if (base == 8) |
*str++ = '0'; |
else if (base == 16) |
if (type & HEX_PREP) |
{ |
*str++ = '0'; |
*str++ = lower_digits[33]; |
if (base == 8) |
*str++ = '0'; |
else if (base == 16) |
{ |
*str++ = '0'; |
*str++ = digits[33]; |
} |
} |
} |
|
if (!(type & LEFT)) while (size-- > 0) *str++ = c; |
while (i < precision--) *str++ = '0'; |
while (i-- > 0) *str++ = tmp[i]; |
while (size-- > 0) *str++ = ' '; |
if (!(type & LEFT)) |
while (size-- > 0) |
*str++ = c; |
while (i < precision--) |
*str++ = '0'; |
while (i-- > 0) |
*str++ = tmp[i]; |
while (size-- > 0) |
*str++ = ' '; |
|
return str; |
return str; |
} |
|
static char *eaddr(char *str, unsigned char *addr, int size, int precision, int type) |
static char * |
eaddr(char *str, unsigned char *addr, int size, int precision, int type) |
{ |
char tmp[24]; |
char *dig = lower_digits; |
int i, len; |
char tmp[24]; |
char *dig = digits; |
int i, len; |
|
if (type & UPPERCASE) dig = upper_digits; |
len = 0; |
for (i = 0; i < 6; i++) |
{ |
if (i != 0) tmp[len++] = ':'; |
tmp[len++] = dig[addr[i] >> 4]; |
tmp[len++] = dig[addr[i] & 0x0F]; |
} |
if (type & UPPERCASE) |
dig = upper_digits; |
len = 0; |
for (i = 0; i < 6; i++) |
{ |
if (i != 0) |
tmp[len++] = ':'; |
tmp[len++] = dig[addr[i] >> 4]; |
tmp[len++] = dig[addr[i] & 0x0F]; |
} |
|
if (!(type & LEFT)) while (len < size--) *str++ = ' '; |
for (i = 0; i < len; ++i) *str++ = tmp[i]; |
while (len < size--) *str++ = ' '; |
if (!(type & LEFT)) |
while (len < size--) |
*str++ = ' '; |
for (i = 0; i < len; ++i) |
*str++ = tmp[i]; |
while (len < size--) |
*str++ = ' '; |
|
return str; |
return str; |
} |
|
static char *iaddr(char *str, unsigned char *addr, int size, int precision, int type) |
static char * |
iaddr(char *str, unsigned char *addr, int size, int precision, int type) |
{ |
char tmp[24]; |
int i, n, len; |
char tmp[24]; |
int i, n, len; |
|
len = 0; |
for (i = 0; i < 4; i++) |
{ |
if (i != 0) tmp[len++] = '.'; |
n = addr[i]; |
|
if (n == 0) |
tmp[len++] = lower_digits[0]; |
else |
len = 0; |
for (i = 0; i < 4; i++) |
{ |
if (n >= 100) |
{ |
tmp[len++] = lower_digits[n / 100]; |
n = n % 100; |
tmp[len++] = lower_digits[n / 10]; |
n = n % 10; |
} |
else if (n >= 10) |
{ |
tmp[len++] = lower_digits[n / 10]; |
n = n % 10; |
} |
if (i != 0) |
tmp[len++] = '.'; |
n = addr[i]; |
|
tmp[len++] = lower_digits[n]; |
if (n == 0) |
tmp[len++] = digits[0]; |
else |
{ |
if (n >= 100) |
{ |
tmp[len++] = digits[n / 100]; |
n = n % 100; |
tmp[len++] = digits[n / 10]; |
n = n % 10; |
} |
else if (n >= 10) |
{ |
tmp[len++] = digits[n / 10]; |
n = n % 10; |
} |
|
tmp[len++] = digits[n]; |
} |
} |
} |
|
if (!(type & LEFT)) while (len < size--) *str++ = ' '; |
for (i = 0; i < len; ++i) *str++ = tmp[i]; |
while (len < size--) *str++ = ' '; |
if (!(type & LEFT)) |
while (len < size--) |
*str++ = ' '; |
for (i = 0; i < len; ++i) |
*str++ = tmp[i]; |
while (len < size--) |
*str++ = ' '; |
|
return str; |
return str; |
} |
|
#if defined(HAS_FLOAT) && HAS_FLOAT == 1 |
#if HAS_FLOAT |
|
char * ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf); |
char * fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf); |
static void ee_bufcpy(char *d, char *s, int count); |
|
void ee_bufcpy(char *pd, char *ps, int count) { |
char *pe=ps+count; |
while (ps!=pe) |
*pd++=*ps++; |
void |
ee_bufcpy(char *pd, char *ps, int count) |
{ |
char *pe = ps + count; |
while (ps != pe) |
*pd++ = *ps++; |
} |
|
static void parse_float(double value, char *buffer, char fmt, int precision) |
static void |
parse_float(double value, char *buffer, char fmt, int precision) |
{ |
int decpt, sign, exp, pos; |
char *fdigits = NULL; |
char cvtbuf[80]; |
int capexp = 0; |
int magnitude; |
int decpt, sign, exp, pos; |
char *digits = NULL; |
char cvtbuf[80]; |
int capexp = 0; |
int magnitude; |
|
if (fmt == 'G' || fmt == 'E') |
{ |
capexp = 1; |
fmt += 'a' - 'A'; |
} |
|
if (fmt == 'g') |
{ |
fdigits = ecvtbuf(value, precision, &decpt, &sign, cvtbuf); |
magnitude = decpt - 1; |
if (magnitude < -4 || magnitude > precision - 1) |
if (fmt == 'G' || fmt == 'E') |
{ |
fmt = 'e'; |
precision -= 1; |
capexp = 1; |
fmt += 'a' - 'A'; |
} |
else |
|
if (fmt == 'g') |
{ |
fmt = 'f'; |
precision -= decpt; |
digits = ecvtbuf(value, precision, &decpt, &sign, cvtbuf); |
magnitude = decpt - 1; |
if (magnitude < -4 || magnitude > precision - 1) |
{ |
fmt = 'e'; |
precision -= 1; |
} |
else |
{ |
fmt = 'f'; |
precision -= decpt; |
} |
} |
} |
|
if (fmt == 'e') |
{ |
fdigits = ecvtbuf(value, precision + 1, &decpt, &sign, cvtbuf); |
if (fmt == 'e') |
{ |
digits = ecvtbuf(value, precision + 1, &decpt, &sign, cvtbuf); |
|
if (sign) *buffer++ = '-'; |
*buffer++ = *fdigits; |
if (precision > 0) *buffer++ = '.'; |
ee_bufcpy(buffer, fdigits + 1, precision); |
buffer += precision; |
*buffer++ = capexp ? 'E' : 'e'; |
if (sign) |
*buffer++ = '-'; |
*buffer++ = *digits; |
if (precision > 0) |
*buffer++ = '.'; |
ee_bufcpy(buffer, digits + 1, precision); |
buffer += precision; |
*buffer++ = capexp ? 'E' : 'e'; |
|
if (decpt == 0) |
if (decpt == 0) |
{ |
if (value == 0.0) |
exp = 0; |
else |
exp = -1; |
} |
else |
exp = decpt - 1; |
|
if (exp < 0) |
{ |
*buffer++ = '-'; |
exp = -exp; |
} |
else |
*buffer++ = '+'; |
|
buffer[2] = (exp % 10) + '0'; |
exp = exp / 10; |
buffer[1] = (exp % 10) + '0'; |
exp = exp / 10; |
buffer[0] = (exp % 10) + '0'; |
buffer += 3; |
} |
else if (fmt == 'f') |
{ |
if (value == 0.0) |
exp = 0; |
else |
exp = -1; |
digits = fcvtbuf(value, precision, &decpt, &sign, cvtbuf); |
if (sign) |
*buffer++ = '-'; |
if (*digits) |
{ |
if (decpt <= 0) |
{ |
*buffer++ = '0'; |
*buffer++ = '.'; |
for (pos = 0; pos < -decpt; pos++) |
*buffer++ = '0'; |
while (*digits) |
*buffer++ = *digits++; |
} |
else |
{ |
pos = 0; |
while (*digits) |
{ |
if (pos++ == decpt) |
*buffer++ = '.'; |
*buffer++ = *digits++; |
} |
} |
} |
else |
{ |
*buffer++ = '0'; |
if (precision > 0) |
{ |
*buffer++ = '.'; |
for (pos = 0; pos < precision; pos++) |
*buffer++ = '0'; |
} |
} |
} |
else |
exp = decpt - 1; |
|
if (exp < 0) |
*buffer = '\0'; |
} |
|
static void |
decimal_point(char *buffer) |
{ |
while (*buffer) |
{ |
*buffer++ = '-'; |
exp = -exp; |
if (*buffer == '.') |
return; |
if (*buffer == 'e' || *buffer == 'E') |
break; |
buffer++; |
} |
else |
*buffer++ = '+'; |
|
buffer[2] = (exp % 10) + '0'; |
exp = exp / 10; |
buffer[1] = (exp % 10) + '0'; |
exp = exp / 10; |
buffer[0] = (exp % 10) + '0'; |
buffer += 3; |
} |
else if (fmt == 'f') |
{ |
fdigits = fcvtbuf(value, precision, &decpt, &sign, cvtbuf); |
if (sign) *buffer++ = '-'; |
if (*fdigits) |
if (*buffer) |
{ |
if (decpt <= 0) |
{ |
*buffer++ = '0'; |
*buffer++ = '.'; |
for (pos = 0; pos < -decpt; pos++) *buffer++ = '0'; |
while (*fdigits) *buffer++ = *fdigits++; |
} |
else |
{ |
pos = 0; |
while (*fdigits) |
int n = strnlen(buffer, 256); |
while (n > 0) |
{ |
if (pos++ == decpt) *buffer++ = '.'; |
*buffer++ = *fdigits++; |
buffer[n + 1] = buffer[n]; |
n--; |
} |
} |
|
*buffer = '.'; |
} |
else |
{ |
*buffer++ = '0'; |
if (precision > 0) |
{ |
*buffer++ = '.'; |
for (pos = 0; pos < precision; pos++) *buffer++ = '0'; |
} |
*buffer = '\0'; |
} |
} |
|
*buffer = '\0'; |
} |
|
static void decimal_point(char *buffer) |
static void |
cropzeros(char *buffer) |
{ |
while (*buffer) |
{ |
if (*buffer == '.') return; |
if (*buffer == 'e' || *buffer == 'E') break; |
buffer++; |
} |
char *stop; |
|
if (*buffer) |
{ |
int n = strnlen(buffer,256); |
while (n > 0) |
while (*buffer && *buffer != '.') |
buffer++; |
if (*buffer++) |
{ |
buffer[n + 1] = buffer[n]; |
n--; |
while (*buffer && *buffer != 'e' && *buffer != 'E') |
buffer++; |
stop = buffer--; |
while (*buffer == '0') |
buffer--; |
if (*buffer == '.') |
buffer--; |
while (buffer != stop) |
*++buffer = 0; |
} |
|
*buffer = '.'; |
} |
else |
{ |
*buffer++ = '.'; |
*buffer = '\0'; |
} |
} |
|
static void cropzeros(char *buffer) |
static char * |
flt(char *str, double num, int size, int precision, char fmt, int flags) |
{ |
char *stop; |
char tmp[80]; |
char c, sign; |
int n, i; |
|
while (*buffer && *buffer != '.') buffer++; |
if (*buffer++) |
{ |
while (*buffer && *buffer != 'e' && *buffer != 'E') buffer++; |
stop = buffer--; |
while (*buffer == '0') buffer--; |
if (*buffer == '.') buffer--; |
while (buffer!=stop) |
*++buffer=0; |
} |
} |
// Left align means no zero padding |
if (flags & LEFT) |
flags &= ~ZEROPAD; |
|
static char *flt(char *str, double num, int size, int precision, char fmt, int flags) |
{ |
char tmp[80]; |
char c, sign; |
int n, i; |
|
// Left align means no zero padding |
if (flags & LEFT) flags &= ~ZEROPAD; |
|
// Determine padding and sign char |
c = (flags & ZEROPAD) ? '0' : ' '; |
sign = 0; |
if (flags & SIGN) |
{ |
if (num < 0.0) |
// Determine padding and sign char |
c = (flags & ZEROPAD) ? '0' : ' '; |
sign = 0; |
if (flags & SIGN) |
{ |
sign = '-'; |
num = -num; |
size--; |
if (num < 0.0) |
{ |
sign = '-'; |
num = -num; |
size--; |
} |
else if (flags & PLUS) |
{ |
sign = '+'; |
size--; |
} |
else if (flags & SPACE) |
{ |
sign = ' '; |
size--; |
} |
} |
else if (flags & PLUS) |
{ |
sign = '+'; |
size--; |
} |
else if (flags & SPACE) |
{ |
sign = ' '; |
size--; |
} |
} |
|
// Compute the precision value |
if (precision < 0) |
precision = 6; // Default precision: 6 |
// Compute the precision value |
if (precision < 0) |
precision = 6; // Default precision: 6 |
|
// Convert floating point number to text |
parse_float(num, tmp, fmt, precision); |
// Convert floating point number to text |
parse_float(num, tmp, fmt, precision); |
|
if ((flags & HEX_PREP) && precision == 0) decimal_point(tmp); |
if (fmt == 'g' && !(flags & HEX_PREP)) cropzeros(tmp); |
if ((flags & HEX_PREP) && precision == 0) |
decimal_point(tmp); |
if (fmt == 'g' && !(flags & HEX_PREP)) |
cropzeros(tmp); |
|
n = strnlen(tmp,256); |
n = strnlen(tmp, 256); |
|
// Output number with alignment and padding |
size -= n; |
if (!(flags & (ZEROPAD | LEFT))) while (size-- > 0) *str++ = ' '; |
if (sign) *str++ = sign; |
if (!(flags & LEFT)) while (size-- > 0) *str++ = c; |
for (i = 0; i < n; i++) *str++ = tmp[i]; |
while (size-- > 0) *str++ = ' '; |
// Output number with alignment and padding |
size -= n; |
if (!(flags & (ZEROPAD | LEFT))) |
while (size-- > 0) |
*str++ = ' '; |
if (sign) |
*str++ = sign; |
if (!(flags & LEFT)) |
while (size-- > 0) |
*str++ = c; |
for (i = 0; i < n; i++) |
*str++ = tmp[i]; |
while (size-- > 0) |
*str++ = ' '; |
|
return str; |
return str; |
} |
|
#endif |
|
static int ee_vsprintf(char *buf, const char *fmt, va_list args) |
static int |
ee_vsprintf(char *buf, const char *fmt, va_list args) |
{ |
int len; |
unsigned long num; |
int i, base; |
char *str; |
char *s; |
int len; |
unsigned long num; |
int i, base; |
char * str; |
char * s; |
|
int flags; // Flags to number() |
int flags; // Flags to number() |
|
int field_width; // Width of output field |
int precision; // Min. # of digits for integers; max number of chars for from string |
int qualifier; // 'h', 'l', or 'L' for integer fields |
int field_width; // Width of output field |
int precision; // Min. # of digits for integers; max number of chars for |
// from string |
int qualifier; // 'h', 'l', or 'L' for integer fields |
|
for (str = buf; *fmt; fmt++) |
{ |
if (*fmt != '%') |
for (str = buf; *fmt; fmt++) |
{ |
*str++ = *fmt; |
continue; |
} |
if (*fmt != '%') |
{ |
*str++ = *fmt; |
continue; |
} |
|
// Process flags |
flags = 0; |
repeat: |
fmt++; // This also skips first '%' |
switch (*fmt) |
{ |
case '-': flags |= LEFT; goto repeat; |
case '+': flags |= PLUS; goto repeat; |
case ' ': flags |= SPACE; goto repeat; |
case '#': flags |= HEX_PREP; goto repeat; |
case '0': flags |= ZEROPAD; goto repeat; |
} |
// Process flags |
flags = 0; |
repeat: |
fmt++; // This also skips first '%' |
switch (*fmt) |
{ |
case '-': |
flags |= LEFT; |
goto repeat; |
case '+': |
flags |= PLUS; |
goto repeat; |
case ' ': |
flags |= SPACE; |
goto repeat; |
case '#': |
flags |= HEX_PREP; |
goto repeat; |
case '0': |
flags |= ZEROPAD; |
goto repeat; |
} |
|
// Get field width |
field_width = -1; |
if (is_digit(*fmt)) |
field_width = ee_skip_atoi(&fmt); |
else if (*fmt == '*') |
{ |
fmt++; |
field_width = va_arg(args, int); |
if (field_width < 0) |
{ |
field_width = -field_width; |
flags |= LEFT; |
} |
} |
// Get field width |
field_width = -1; |
if (is_digit(*fmt)) |
field_width = skip_atoi(&fmt); |
else if (*fmt == '*') |
{ |
fmt++; |
field_width = va_arg(args, int); |
if (field_width < 0) |
{ |
field_width = -field_width; |
flags |= LEFT; |
} |
} |
|
// Get the precision |
precision = -1; |
if (*fmt == '.') |
{ |
++fmt; |
if (is_digit(*fmt)) |
precision = ee_skip_atoi(&fmt); |
else if (*fmt == '*') |
{ |
++fmt; |
precision = va_arg(args, int); |
} |
if (precision < 0) precision = 0; |
} |
// Get the precision |
precision = -1; |
if (*fmt == '.') |
{ |
++fmt; |
if (is_digit(*fmt)) |
precision = skip_atoi(&fmt); |
else if (*fmt == '*') |
{ |
++fmt; |
precision = va_arg(args, int); |
} |
if (precision < 0) |
precision = 0; |
} |
|
// Get the conversion qualifier |
qualifier = -1; |
if (*fmt == 'l' || *fmt == 'L') |
{ |
qualifier = *fmt; |
fmt++; |
} |
// Get the conversion qualifier |
qualifier = -1; |
if (*fmt == 'l' || *fmt == 'L') |
{ |
qualifier = *fmt; |
fmt++; |
} |
|
// Default base |
base = 10; |
// Default base |
base = 10; |
|
switch (*fmt) |
{ |
case 'c': |
if (!(flags & LEFT)) while (--field_width > 0) *str++ = ' '; |
*str++ = (unsigned char) va_arg(args, int); |
while (--field_width > 0) *str++ = ' '; |
continue; |
switch (*fmt) |
{ |
case 'c': |
if (!(flags & LEFT)) |
while (--field_width > 0) |
*str++ = ' '; |
*str++ = (unsigned char)va_arg(args, int); |
while (--field_width > 0) |
*str++ = ' '; |
continue; |
|
case 's': |
s = va_arg(args, char *); |
if (!s) s = "<NULL>"; |
len = strnlen(s, precision); |
if (!(flags & LEFT)) while (len < field_width--) *str++ = ' '; |
for (i = 0; i < len; ++i) *str++ = *s++; |
while (len < field_width--) *str++ = ' '; |
continue; |
case 's': |
s = va_arg(args, char *); |
if (!s) |
s = "<NULL>"; |
len = strnlen(s, precision); |
if (!(flags & LEFT)) |
while (len < field_width--) |
*str++ = ' '; |
for (i = 0; i < len; ++i) |
*str++ = *s++; |
while (len < field_width--) |
*str++ = ' '; |
continue; |
|
case 'p': |
if (field_width == -1) |
{ |
field_width = 2 * sizeof(void *); |
flags |= ZEROPAD; |
} |
str = ee_number(str, (unsigned long) va_arg(args, void *), 16, field_width, precision, flags); |
continue; |
case 'p': |
if (field_width == -1) |
{ |
field_width = 2 * sizeof(void *); |
flags |= ZEROPAD; |
} |
str = number(str, |
(unsigned long)va_arg(args, void *), |
16, |
field_width, |
precision, |
flags); |
continue; |
|
case 'A': |
flags |= UPPERCASE; |
case 'A': |
flags |= UPPERCASE; |
|
case 'a': |
if (qualifier == 'l') |
str = eaddr(str, va_arg(args, unsigned char *), field_width, precision, flags); |
else |
str = iaddr(str, va_arg(args, unsigned char *), field_width, precision, flags); |
continue; |
case 'a': |
if (qualifier == 'l') |
str = eaddr(str, |
va_arg(args, unsigned char *), |
field_width, |
precision, |
flags); |
else |
str = iaddr(str, |
va_arg(args, unsigned char *), |
field_width, |
precision, |
flags); |
continue; |
|
// Integer number formats - set up the flags and "break" |
case 'o': |
base = 8; |
break; |
// Integer number formats - set up the flags and "break" |
case 'o': |
base = 8; |
break; |
|
case 'X': |
flags |= UPPERCASE; |
case 'X': |
flags |= UPPERCASE; |
|
case 'x': |
base = 16; |
break; |
case 'x': |
base = 16; |
break; |
|
case 'd': |
case 'i': |
flags |= SIGN; |
case 'd': |
case 'i': |
flags |= SIGN; |
|
case 'u': |
break; |
case 'u': |
break; |
|
#if defined(HAS_FLOAT) && HAS_FLOAT == 1 |
#if HAS_FLOAT |
|
case 'f': |
str = flt(str, va_arg(args, double), field_width, precision, *fmt, flags | SIGN); |
continue; |
case 'f': |
str = flt(str, |
va_arg(args, double), |
field_width, |
precision, |
*fmt, |
flags | SIGN); |
continue; |
|
#endif |
|
default: |
if (*fmt != '%') *str++ = '%'; |
if (*fmt) |
*str++ = *fmt; |
default: |
if (*fmt != '%') |
*str++ = '%'; |
if (*fmt) |
*str++ = *fmt; |
else |
--fmt; |
continue; |
} |
|
if (qualifier == 'l') |
num = va_arg(args, unsigned long); |
else if (flags & SIGN) |
num = va_arg(args, int); |
else |
--fmt; |
continue; |
num = va_arg(args, unsigned int); |
|
str = number(str, num, base, field_width, precision, flags); |
} |
|
if (qualifier == 'l') |
num = va_arg(args, unsigned long); |
else if (flags & SIGN) |
num = va_arg(args, int); |
else |
num = va_arg(args, unsigned int); |
*str = '\0'; |
return str - buf; |
} |
|
str = ee_number(str, num, base, field_width, precision, flags); |
} |
void |
uart_send_char(char c) |
{ |
//#error "You must implement the method uart_send_char to use this file!\n"; |
/* Output of a char to a UART usually follows the following model: |
Wait until UART is ready |
Write char to UART |
Wait until UART is done |
|
*str = '\0'; |
return str - buf; |
} |
Or in code: |
while (*UART_CONTROL_ADDRESS != UART_READY); |
*UART_DATA_ADDRESS = c; |
while (*UART_CONTROL_ADDRESS != UART_READY); |
|
void uart_send_char(char c) { |
// this is Mbed OS putc to standard uart |
//MBED_PRINT_CHARACTER(c); |
if (c == '\n') |
Check the UART sample code on your platform or the board |
documentation. |
*/ |
|
/* NEORV32-specific */ |
if (c == '\n') { |
neorv32_uart_putc('\r'); |
} |
neorv32_uart_putc(c); |
} |
|
int ee_printf(const char *fmt, ...) |
int |
ee_printf(const char *fmt, ...) |
{ |
MBED_INITIALIZE_PRINT(); |
char buf[256], *p; |
va_list args; |
int n = 0; |
|
char buf[15*80],*p; |
va_list args; |
int n=0; |
va_start(args, fmt); |
ee_vsprintf(buf, fmt, args); |
va_end(args); |
p = buf; |
while (*p) |
{ |
uart_send_char(*p); |
n++; |
p++; |
} |
|
va_start(args, fmt); |
ee_vsprintf(buf, fmt, args); |
va_end(args); |
p=buf; |
while (*p) { |
uart_send_char(*p); |
n++; |
p++; |
} |
|
return n; |
return n; |
} |
/sw/example/cpu_test/main.c
82,10 → 82,23
* This program uses mostly synthetic case to trigger all implemented exceptions. |
* Each exception is captured and evaluated for correct detection. |
* |
* @note Applications has to be compiler with <USER_FLAGS+=-DRUN_CPUTEST> |
* |
* @return Irrelevant. |
**************************************************************************/ |
int main() { |
|
// Disable cpu_test compilation by default |
#ifndef RUN_CPUTEST |
#warning cpu_test HAS NOT BEEN COMPILED! Use >>make USER_FLAGS+=-DRUN_CPUTEST clean_all exe<< to compile it. |
|
// inform the user if you are actually executing this |
neorv32_uart_printf("ERROR! cpu_test has not been compiled. Use >>make USER_FLAGS+=-DRUN_CPUTEST clean_all exe<< to compile it.\n"); |
|
return 0; |
#endif |
|
|
register uint32_t tmp_a, tmp_b, tmp_c; |
uint32_t i, j; |
volatile uint32_t dummy_dst __attribute__((unused)); |
296,22 → 309,27
// Bus timeout latency estimation |
// ---------------------------------------------------------- |
neorv32_cpu_csr_write(CSR_MCAUSE, 0); |
neorv32_uart_printf("[%i] Estimate bus time-out latency: ", cnt_test); |
neorv32_uart_printf("[%i] Estimating bus time-out latency: ", cnt_test); |
cnt_test++; |
|
// start timing |
tmp_a = neorv32_cpu_csr_read(CSR_CYCLE); |
|
// this will timeout |
// this store access will timeout |
MMR_UNREACHABLE = 0; |
|
tmp_a = neorv32_cpu_csr_read(CSR_CYCLE) - tmp_a; |
|
// wait for timeout |
while (neorv32_cpu_csr_read(CSR_MCAUSE) == 0); |
// make sure there was a time-out |
if (neorv32_cpu_csr_read(CSR_MCAUSE) == TRAP_CODE_S_ACCESS) { |
neorv32_uart_printf("~%u cycles ", tmp_a/4); // divide by average CPI |
test_ok(); |
} |
else { |
test_fail(); |
} |
|
tmp_a = tmp_a / 4; // divide by average CPI |
neorv32_uart_printf("~%u cycles\n", tmp_a); |
|
|
// ---------------------------------------------------------- |
// External memory interface test |
// ---------------------------------------------------------- |
/CHANGELOG.md
14,7 → 14,9
|
| Date (*dd.mm.yyyy*) | Version | Comment | |
|:----------:|:-------:|:--------| |
| 11.11.2020 | 1.4.7.0 | Further optimized pipeline front-end: Jumps and branches are one cycle faster (+5% coremark performance); updated synthesis results; updated performance results; added `hello_world` example program | |
| 20.11.2020 | 1.4.7.2 | :warning: fixed bug in CPU bus unit that caused a memory exception after reset in some cases; added second simulated external (Wishbone) memory to testbench (one memory for simulating an external IMEM, one memory for simulating external memory-mapped IO); external bus interface (`wishbone`) now makes sure that a canceled bus transfer is really understood by the accessed peripheral | |
| 20.11.2020 | 1.4.7.1 | Removed legacy (and unused) "update_enable signal" from IMEM | |
| 11.11.2020 | [**:rocket:1.4.7.0**](https://github.com/stnolting/neorv32/releases/tag/v1.4.7.0) | Further optimized pipeline front-end: Jumps and branches are one cycle faster (+5% coremark performance); updated synthesis results; updated performance results; added `hello_world` example program | |
| 07.11.2020 | 1.4.6.7 | Updated bootloader (size optimization) and changed processor version output; added project logo; minor data sheet edits | |
| 03.11.2020 | 1.4.6.6 | Removed SPI module's *buggy* "LSB-first mode", SPI module now always sends data MSB-first; removed SPI.CTRL `SPI_CT_DIR` bit; modfied bit order in SPI CTRL register; updated SPI SW library | |
| 02.11.2020 | 1.4.6.5 | :warning: Fixed bug in CPU's illegal instruction detection logic; CPU rtl code optimizations - further reduced hardware footprint; rtl code clean-ups | |
49,7 → 51,7
| 11.09.2020 | 1.4.0.4 | Reworked `TRNG` architecture and interface; added text regarding fast interrupt channels usage for the NEORV32 processor | |
| 02.09.2020 | 1.4.0.2 | :warning: Fixed bugs in external memory interface; added option to define latency of simulated external memory in testbench; hardware configuration sanity checks will now only appear once in console; added more details to data sheet section 3.3. Address Space; fixed typos in MEM_*_BASE and MEM_*_SIZE generic names | |
| 01.09.2020 | 1.4.0.1 | Using registers above `x15` when the `E` extensions is enabled will now correctly cause an illegal instruction exception | |
| 29.08.2020 | [**:rocket:1.4.0.0**](https://github.com/stnolting/neorv32/releases/tag/v1.4.0.0) | Rearranged and reworked this document; added FreeRTOS port, demo & short referencing chapter; removed booloader-specific linker scripts – main linker script is used for both, applications and bootloader; bootloader can now have `.data` and `.bss` sections; improved IMEM and BOOTROM memory initialization – faster synthesis; image generator now constrains init array size to actual executable size; peripheral/IO devices can only be written in full word mode (= 32-bit); GPIO ports are now 32-bit wide | |
| 29.08.2020 | [**:rocket:1.4.0.0**](https://github.com/stnolting/neorv32/releases/tag/v1.4.0.0) | Rearranged and reworked data sheet; added FreeRTOS port, demo & short referencing chapter; removed booloader-specific linker scripts – main linker script is used for both, applications and bootloader; bootloader can now have `.data` and `.bss` sections; improved IMEM and BOOTROM memory initialization – faster synthesis; image generator now constrains init array size to actual executable size; peripheral/IO devices can only be written in full word mode (= 32-bit); GPIO ports are now 32-bit wide | |
| 23.08.2020 | 1.3.7.3 | Added custom `mzext` CSR to check for available Z* CPU extensions; multiplier's FAST_MUL mode is one cycle faster now; updated performance data | |
| 20.08.2020 | 1.3.7.2 | Removed bootloader-specific crt0 – bootloader now uses std crt0; makefiles now also support asm and cpp files; made linker scripts more general; renamed makefile "compile" (which is still available for compatibility) target into "exe" | |
| 14.08.2020 | [**:rocket:1.3.7.0**](https://github.com/stnolting/neorv32/releases/tag/v1.3.7.0) | Simplified CPU fetch engine; added configurable CPU instruction prefetch buffer (ipb) FIFO; optimized CPU execute engine; updated performance data | |
/README.md
91,17 → 91,17
* Use LaTeX for data sheet |
* More support for FreeRTOS |
* Further size and performance optimization |
* Add a cache for the external memory interface |
* Synthesis results (+ wrappers?) for more/specific platforms |
* Maybe port additional RTOSs (like [Zephyr](https://github.com/zephyrproject-rtos/zephyr) or [RIOT](https://www.riot-os.org)) |
* Implement further CPU extensions: |
* Bitmanipulation operations (`B`) - when they are *official* |
* Floating-point instructions (`F`) |
* ... |
* Implement further RISC-V (or custom?) CPU extensions (like floating-point operations ('F')) |
* ... |
|
#### Work-in-progress |
|
* A cache for the external memory/bus interface (also providing burst mode?) |
* RISC-V `B` extension ([bitmanipulation](https://github.com/riscv/riscv-bitmanip)) |
|
|
## Features |
|
The full-blown data sheet of the NEORV32 Processor and CPU is available as pdf file: |
180,13 → 180,13
* Machine CSRs: `mstatus` `misa`(read-only!) `mie` `mtvec` `mscratch` `mepc` `mcause` `mtval` `mip` `mvendorid` [`marchid`](https://github.com/riscv/riscv-isa-manual/blob/master/marchid.md) `mimpid` `mhartid` `mzext`(custom) |
* Supported exceptions and interrupts: |
* Misaligned instruction address |
* Instruction access fault |
* Instruction access fault (via unacknowledged bus access after timeout) |
* Illegal instruction |
* Breakpoint (via `ebreak` instruction) |
* Load address misaligned |
* Load access fault |
* Load access fault (via unacknowledged bus access after timeout) |
* Store address misaligned |
* Store access fault |
* Store access fault (via unacknowledged bus access after timeout) |
* Environment call from M-mode (via `ecall` instruction) |
* Machine timer interrupt `mti` (via processor's MTIME unit) |
* Machine software interrupt `msi` (via external signal) |
299,11 → 299,11
|
~~~ |
**Configuration** |
Hardware: 32kB IMEM, 16kB DMEM, 100MHz clock |
CoreMark: 2000 iterations, MEM_METHOD is MEM_STACK |
Compiler: RISCV32-GCC 10.1.0 (rv32i toolchain) |
Flags: default, see makefile |
Peripherals: UART for printing the results |
Hardware: 32kB IMEM, 16kB DMEM, 100MHz clock |
CoreMark: 2000 iterations, MEM_METHOD is MEM_STACK |
Compiler: RISCV32-GCC 10.1.0 (rv32i toolchain) |
Compiler flags: default, see makefile |
Peripherals: UART for printing the results |
~~~ |
|
| CPU | Executable Size | Optimization | CoreMark Score | CoreMarks/MHz | |