1 |
2 |
zero_gravi |
/*
|
2 |
|
|
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
3 |
|
|
|
4 |
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
|
|
you may not use this file except in compliance with the License.
|
6 |
|
|
You may obtain a copy of the License at
|
7 |
|
|
|
8 |
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
9 |
|
|
|
10 |
|
|
Unless required by applicable law or agreed to in writing, software
|
11 |
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
|
|
See the License for the specific language governing permissions and
|
14 |
|
|
limitations under the License.
|
15 |
|
|
|
16 |
|
|
Original Author: Shay Gal-on
|
17 |
|
|
*/
|
18 |
|
|
|
19 |
38 |
zero_gravi |
/* Modified for the NEORV32 Processor - by Stephan Nolting */
|
20 |
|
|
|
21 |
2 |
zero_gravi |
#include "coremark.h"
|
22 |
|
|
#include "core_portme.h"
|
23 |
|
|
|
24 |
|
|
#if VALIDATION_RUN
|
25 |
38 |
zero_gravi |
volatile ee_s32 seed1_volatile = 0x3415;
|
26 |
|
|
volatile ee_s32 seed2_volatile = 0x3415;
|
27 |
|
|
volatile ee_s32 seed3_volatile = 0x66;
|
28 |
2 |
zero_gravi |
#endif
|
29 |
|
|
#if PERFORMANCE_RUN
|
30 |
38 |
zero_gravi |
volatile ee_s32 seed1_volatile = 0x0;
|
31 |
|
|
volatile ee_s32 seed2_volatile = 0x0;
|
32 |
|
|
volatile ee_s32 seed3_volatile = 0x66;
|
33 |
2 |
zero_gravi |
#endif
|
34 |
|
|
#if PROFILE_RUN
|
35 |
38 |
zero_gravi |
volatile ee_s32 seed1_volatile = 0x8;
|
36 |
|
|
volatile ee_s32 seed2_volatile = 0x8;
|
37 |
|
|
volatile ee_s32 seed3_volatile = 0x8;
|
38 |
2 |
zero_gravi |
#endif
|
39 |
38 |
zero_gravi |
volatile ee_s32 seed4_volatile = ITERATIONS;
|
40 |
|
|
volatile ee_s32 seed5_volatile = 0;
|
41 |
2 |
zero_gravi |
/* Porting : Timing functions
|
42 |
38 |
zero_gravi |
How to capture time and convert to seconds must be ported to whatever is
|
43 |
|
|
supported by the platform. e.g. Read value from on board RTC, read value from
|
44 |
|
|
cpu clock cycles performance counter etc. Sample implementation for standard
|
45 |
|
|
time.h and windows.h definitions included.
|
46 |
2 |
zero_gravi |
*/
|
47 |
38 |
zero_gravi |
CORETIMETYPE
|
48 |
|
|
barebones_clock()
|
49 |
|
|
{
|
50 |
|
|
/*
|
51 |
|
|
#error \
|
52 |
|
|
"You must implement a method to measure time in barebones_clock()! This function should return current time.\n"
|
53 |
|
|
*/
|
54 |
|
|
return 0;
|
55 |
|
|
}
|
56 |
2 |
zero_gravi |
/* Define : TIMER_RES_DIVIDER
|
57 |
38 |
zero_gravi |
Divider to trade off timer resolution and total time that can be
|
58 |
|
|
measured.
|
59 |
2 |
zero_gravi |
|
60 |
38 |
zero_gravi |
Use lower values to increase resolution, but make sure that overflow
|
61 |
|
|
does not occur. If there are issues with the return value overflowing,
|
62 |
|
|
increase this value.
|
63 |
|
|
*/
|
64 |
|
|
#define GETMYTIME(_t) (*_t = (CORETIMETYPE)neorv32_cpu_get_cycle())
|
65 |
|
|
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
|
66 |
|
|
#define TIMER_RES_DIVIDER 1
|
67 |
2 |
zero_gravi |
#define SAMPLE_TIME_IMPLEMENTATION 1
|
68 |
38 |
zero_gravi |
#define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER)
|
69 |
2 |
zero_gravi |
|
70 |
|
|
/** Define Host specific (POSIX), or target specific global time variables. */
|
71 |
38 |
zero_gravi |
static CORETIMETYPE start_time_val, stop_time_val;
|
72 |
2 |
zero_gravi |
|
73 |
|
|
/* Function : start_time
|
74 |
38 |
zero_gravi |
This function will be called right before starting the timed portion of
|
75 |
|
|
the benchmark.
|
76 |
2 |
zero_gravi |
|
77 |
38 |
zero_gravi |
Implementation may be capturing a system timer (as implemented in the
|
78 |
|
|
example code) or zeroing some system parameters - e.g. setting the cpu clocks
|
79 |
|
|
cycles to 0.
|
80 |
2 |
zero_gravi |
*/
|
81 |
38 |
zero_gravi |
void
|
82 |
|
|
start_time(void)
|
83 |
|
|
{
|
84 |
42 |
zero_gravi |
neorv32_cpu_csr_write(CSR_MCOUNTINHIBIT, 0); // start all counters
|
85 |
38 |
zero_gravi |
GETMYTIME(&start_time_val);
|
86 |
2 |
zero_gravi |
}
|
87 |
|
|
/* Function : stop_time
|
88 |
38 |
zero_gravi |
This function will be called right after ending the timed portion of the
|
89 |
|
|
benchmark.
|
90 |
2 |
zero_gravi |
|
91 |
38 |
zero_gravi |
Implementation may be capturing a system timer (as implemented in the
|
92 |
|
|
example code) or other system parameters - e.g. reading the current value of
|
93 |
|
|
cpu cycles counter.
|
94 |
2 |
zero_gravi |
*/
|
95 |
38 |
zero_gravi |
void
|
96 |
|
|
stop_time(void)
|
97 |
|
|
{
|
98 |
42 |
zero_gravi |
neorv32_cpu_csr_write(CSR_MCOUNTINHIBIT, -1); // stop all counters
|
99 |
38 |
zero_gravi |
GETMYTIME(&stop_time_val);
|
100 |
2 |
zero_gravi |
}
|
101 |
|
|
/* Function : get_time
|
102 |
38 |
zero_gravi |
Return an abstract "ticks" number that signifies time on the system.
|
103 |
|
|
|
104 |
|
|
Actual value returned may be cpu cycles, milliseconds or any other
|
105 |
|
|
value, as long as it can be converted to seconds by <time_in_secs>. This
|
106 |
|
|
methodology is taken to accomodate any hardware or simulated platform. The
|
107 |
|
|
sample implementation returns millisecs by default, and the resolution is
|
108 |
|
|
controlled by <TIMER_RES_DIVIDER>
|
109 |
2 |
zero_gravi |
*/
|
110 |
38 |
zero_gravi |
CORE_TICKS
|
111 |
|
|
get_time(void)
|
112 |
|
|
{
|
113 |
|
|
CORE_TICKS elapsed
|
114 |
|
|
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
115 |
|
|
return elapsed;
|
116 |
2 |
zero_gravi |
}
|
117 |
|
|
/* Function : time_in_secs
|
118 |
38 |
zero_gravi |
Convert the value returned by get_time to seconds.
|
119 |
2 |
zero_gravi |
|
120 |
38 |
zero_gravi |
The <secs_ret> type is used to accomodate systems with no support for
|
121 |
|
|
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
|
122 |
|
|
macro above.
|
123 |
2 |
zero_gravi |
*/
|
124 |
38 |
zero_gravi |
secs_ret
|
125 |
|
|
time_in_secs(CORE_TICKS ticks)
|
126 |
|
|
{
|
127 |
|
|
/* NEORV32-specific */
|
128 |
42 |
zero_gravi |
secs_ret retval = (secs_ret)(((CORE_TICKS)ticks) / ((CORE_TICKS)SYSINFO_CLK));
|
129 |
38 |
zero_gravi |
return retval;
|
130 |
2 |
zero_gravi |
}
|
131 |
|
|
|
132 |
38 |
zero_gravi |
ee_u32 default_num_contexts = 1;
|
133 |
2 |
zero_gravi |
|
134 |
42 |
zero_gravi |
/* Number of available hardware performance monitors */
|
135 |
|
|
uint32_t num_hpm_cnts_global = 0;
|
136 |
|
|
|
137 |
|
|
|
138 |
2 |
zero_gravi |
/* Function : portable_init
|
139 |
38 |
zero_gravi |
Target specific initialization code
|
140 |
|
|
Test for some common mistakes.
|
141 |
2 |
zero_gravi |
*/
|
142 |
38 |
zero_gravi |
#ifndef RUN_COREMARK
|
143 |
|
|
void
|
144 |
|
|
__attribute__((__noreturn__))
|
145 |
|
|
portable_init(core_portable *p, int *argc, char *argv[])
|
146 |
|
|
#else
|
147 |
|
|
void
|
148 |
|
|
portable_init(core_portable *p, int *argc, char *argv[])
|
149 |
|
|
#endif
|
150 |
2 |
zero_gravi |
{
|
151 |
38 |
zero_gravi |
/* NEORV32-specific */
|
152 |
|
|
neorv32_cpu_dint(); // no interrupt, thanks
|
153 |
51 |
zero_gravi |
neorv32_rte_setup(); // capture all exceptions and give debug information, ho hw flow control
|
154 |
|
|
neorv32_uart_setup(BAUD_RATE, PARITY_NONE, FLOW_CONTROL_NONE);
|
155 |
2 |
zero_gravi |
|
156 |
|
|
|
157 |
38 |
zero_gravi |
// Disable coremark compilation by default
|
158 |
|
|
#ifndef RUN_COREMARK
|
159 |
|
|
#warning COREMARK HAS NOT BEEN COMPILED! Use >>make USER_FLAGS+=-DRUN_COREMARK clean_all exe<< to compile it.
|
160 |
2 |
zero_gravi |
|
161 |
38 |
zero_gravi |
// inform the user if you are actually executing this
|
162 |
|
|
neorv32_uart_printf("ERROR! CoreMark has not been compiled. Use >>make USER_FLAGS+=-DRUN_COREMARK clean_all exe<< to compile it.\n");
|
163 |
|
|
|
164 |
|
|
while(1);
|
165 |
|
|
#endif
|
166 |
|
|
|
167 |
44 |
zero_gravi |
// check available hardware extensions and compare with compiler flags
|
168 |
|
|
neorv32_rte_check_isa(0); // silent = 0 -> show message if isa mismatch
|
169 |
|
|
|
170 |
42 |
zero_gravi |
num_hpm_cnts_global = neorv32_cpu_hpm_get_counters();
|
171 |
38 |
zero_gravi |
|
172 |
42 |
zero_gravi |
// try to setup as many HPMs as possible
|
173 |
|
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER3, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT3, 1 << HPMCNT_EVENT_CIR);
|
174 |
|
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER4, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT4, 1 << HPMCNT_EVENT_WAIT_IF);
|
175 |
|
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER5, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT5, 1 << HPMCNT_EVENT_WAIT_II);
|
176 |
45 |
zero_gravi |
neorv32_cpu_csr_write(CSR_MHPMCOUNTER6, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT6, 1 << HPMCNT_EVENT_WAIT_MC);
|
177 |
|
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER7, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT7, 1 << HPMCNT_EVENT_LOAD);
|
178 |
|
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER8, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT8, 1 << HPMCNT_EVENT_STORE);
|
179 |
|
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER9, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT9, 1 << HPMCNT_EVENT_WAIT_LS);
|
180 |
|
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER10, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT10, 1 << HPMCNT_EVENT_JUMP);
|
181 |
|
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER11, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT11, 1 << HPMCNT_EVENT_BRANCH);
|
182 |
|
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER12, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT12, 1 << HPMCNT_EVENT_TBRANCH);
|
183 |
|
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER13, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT13, 1 << HPMCNT_EVENT_TRAP);
|
184 |
|
|
neorv32_cpu_csr_write(CSR_MHPMCOUNTER14, 0); neorv32_cpu_csr_write(CSR_MHPMEVENT14, 1 << HPMCNT_EVENT_ILLEGAL);
|
185 |
42 |
zero_gravi |
|
186 |
12 |
zero_gravi |
neorv32_uart_printf("NEORV32: Processor running at %u Hz\n", (uint32_t)SYSINFO_CLK);
|
187 |
2 |
zero_gravi |
neorv32_uart_printf("NEORV32: Executing coremark (%u iterations). This may take some time...\n\n", (uint32_t)ITERATIONS);
|
188 |
|
|
|
189 |
42 |
zero_gravi |
// clear cycle counter
|
190 |
|
|
neorv32_cpu_set_mcycle(0);
|
191 |
|
|
neorv32_cpu_csr_write(CSR_MCOUNTEREN, -1); // enable access to all counters
|
192 |
|
|
|
193 |
38 |
zero_gravi |
/*
|
194 |
|
|
#error \
|
195 |
|
|
"Call board initialization routines in portable init (if needed), in particular initialize UART!\n"
|
196 |
|
|
*/
|
197 |
|
|
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
|
198 |
|
|
{
|
199 |
|
|
ee_printf(
|
200 |
|
|
"ERROR! Please define ee_ptr_int to a type that holds a "
|
201 |
|
|
"pointer!\n");
|
202 |
|
|
}
|
203 |
|
|
if (sizeof(ee_u32) != 4)
|
204 |
|
|
{
|
205 |
|
|
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
|
206 |
|
|
}
|
207 |
|
|
p->portable_id = 1;
|
208 |
|
|
|
209 |
|
|
#ifndef RUN_COREMARK
|
210 |
|
|
while(1);
|
211 |
|
|
#endif
|
212 |
2 |
zero_gravi |
}
|
213 |
42 |
zero_gravi |
|
214 |
|
|
|
215 |
2 |
zero_gravi |
/* Function : portable_fini
|
216 |
38 |
zero_gravi |
Target specific final code
|
217 |
2 |
zero_gravi |
*/
|
218 |
38 |
zero_gravi |
void
|
219 |
|
|
portable_fini(core_portable *p)
|
220 |
2 |
zero_gravi |
{
|
221 |
38 |
zero_gravi |
p->portable_id = 0;
|
222 |
2 |
zero_gravi |
|
223 |
42 |
zero_gravi |
/* NEORV32-specific */
|
224 |
38 |
zero_gravi |
|
225 |
2 |
zero_gravi |
// show executed instructions, required cycles and resulting average CPI
|
226 |
|
|
union {
|
227 |
|
|
uint64_t uint64;
|
228 |
|
|
uint32_t uint32[sizeof(uint64_t)/2];
|
229 |
|
|
} exe_instructions, exe_time;
|
230 |
|
|
|
231 |
38 |
zero_gravi |
exe_time.uint64 = (uint64_t)get_time();
|
232 |
12 |
zero_gravi |
exe_instructions.uint64 = neorv32_cpu_get_instret();
|
233 |
2 |
zero_gravi |
|
234 |
45 |
zero_gravi |
neorv32_uart_printf("\nNEORV32: All reported numbers only show the integer part.\n\n");
|
235 |
22 |
zero_gravi |
|
236 |
42 |
zero_gravi |
neorv32_uart_printf("NEORV32: HPM results\n");
|
237 |
|
|
if (num_hpm_cnts_global == 0) {neorv32_uart_printf("no HPMs available\n"); }
|
238 |
|
|
if (num_hpm_cnts_global > 0) {neorv32_uart_printf("# Retired compr. instructions: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER3)); }
|
239 |
|
|
if (num_hpm_cnts_global > 1) {neorv32_uart_printf("# I-fetch wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER4)); }
|
240 |
|
|
if (num_hpm_cnts_global > 2) {neorv32_uart_printf("# I-issue wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER5)); }
|
241 |
45 |
zero_gravi |
if (num_hpm_cnts_global > 3) {neorv32_uart_printf("# Multi-cycle ALU wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER6)); }
|
242 |
|
|
if (num_hpm_cnts_global > 4) {neorv32_uart_printf("# Load operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER7)); }
|
243 |
|
|
if (num_hpm_cnts_global > 5) {neorv32_uart_printf("# Store operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER8)); }
|
244 |
|
|
if (num_hpm_cnts_global > 6) {neorv32_uart_printf("# Load/store wait cycles: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER9)); }
|
245 |
|
|
if (num_hpm_cnts_global > 7) {neorv32_uart_printf("# Unconditional jumps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER10)); }
|
246 |
|
|
if (num_hpm_cnts_global > 8) {neorv32_uart_printf("# Conditional branches (all): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER11)); }
|
247 |
|
|
if (num_hpm_cnts_global > 9) {neorv32_uart_printf("# Conditional branches (taken): %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER12)); }
|
248 |
|
|
if (num_hpm_cnts_global > 10) {neorv32_uart_printf("# Entered traps: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER13)); }
|
249 |
|
|
if (num_hpm_cnts_global > 11) {neorv32_uart_printf("# Illegal operations: %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER14)); }
|
250 |
42 |
zero_gravi |
neorv32_uart_printf("\n");
|
251 |
|
|
|
252 |
22 |
zero_gravi |
neorv32_uart_printf("NEORV32: Executed instructions 0x%x_%x\n", (uint32_t)exe_instructions.uint32[1], (uint32_t)exe_instructions.uint32[0]);
|
253 |
12 |
zero_gravi |
neorv32_uart_printf("NEORV32: CoreMark core clock cycles 0x%x_%x\n", (uint32_t)exe_time.uint32[1], (uint32_t)exe_time.uint32[0]);
|
254 |
2 |
zero_gravi |
|
255 |
38 |
zero_gravi |
uint64_t average_cpi_int = exe_time.uint64 / exe_instructions.uint64;
|
256 |
|
|
neorv32_uart_printf("NEORV32: Average CPI (integer part only): %u cycles/instruction\n", (uint32_t)average_cpi_int);
|
257 |
|
|
|
258 |
2 |
zero_gravi |
}
|