1 |
2 |
zero_gravi |
/*
|
2 |
|
|
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
3 |
|
|
|
4 |
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
|
|
you may not use this file except in compliance with the License.
|
6 |
|
|
You may obtain a copy of the License at
|
7 |
|
|
|
8 |
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
9 |
|
|
|
10 |
|
|
Unless required by applicable law or agreed to in writing, software
|
11 |
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
|
|
See the License for the specific language governing permissions and
|
14 |
|
|
limitations under the License.
|
15 |
|
|
|
16 |
|
|
Original Author: Shay Gal-on
|
17 |
|
|
*/
|
18 |
|
|
|
19 |
|
|
/* File: core_main.c
|
20 |
|
|
This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results.
|
21 |
|
|
*/
|
22 |
|
|
#include "coremark.h"
|
23 |
|
|
|
24 |
|
|
/* Function: iterate
|
25 |
|
|
Run the benchmark for a specified number of iterations.
|
26 |
|
|
|
27 |
|
|
Operation:
|
28 |
|
|
For each type of benchmarked algorithm:
|
29 |
|
|
a - Initialize the data block for the algorithm.
|
30 |
|
|
b - Execute the algorithm N times.
|
31 |
|
|
|
32 |
|
|
Returns:
|
33 |
|
|
NULL.
|
34 |
|
|
*/
|
35 |
|
|
static ee_u16 list_known_crc[] = {(ee_u16)0xd4b0,(ee_u16)0x3340,(ee_u16)0x6a79,(ee_u16)0xe714,(ee_u16)0xe3c1};
|
36 |
|
|
static ee_u16 matrix_known_crc[] = {(ee_u16)0xbe52,(ee_u16)0x1199,(ee_u16)0x5608,(ee_u16)0x1fd7,(ee_u16)0x0747};
|
37 |
|
|
static ee_u16 state_known_crc[] = {(ee_u16)0x5e47,(ee_u16)0x39bf,(ee_u16)0xe5a4,(ee_u16)0x8e3a,(ee_u16)0x8d84};
|
38 |
|
|
void *iterate(void *pres) {
|
39 |
|
|
ee_u32 i;
|
40 |
|
|
ee_u16 crc;
|
41 |
|
|
core_results *res=(core_results *)pres;
|
42 |
|
|
ee_u32 iterations=res->iterations;
|
43 |
|
|
res->crc=0;
|
44 |
|
|
res->crclist=0;
|
45 |
|
|
res->crcmatrix=0;
|
46 |
|
|
res->crcstate=0;
|
47 |
|
|
|
48 |
|
|
for (i=0; i<iterations; i++) {
|
49 |
|
|
crc=core_bench_list(res,1);
|
50 |
|
|
res->crc=crcu16(crc,res->crc);
|
51 |
|
|
crc=core_bench_list(res,-1);
|
52 |
|
|
res->crc=crcu16(crc,res->crc);
|
53 |
|
|
if (i==0) res->crclist=res->crc;
|
54 |
|
|
}
|
55 |
|
|
return NULL;
|
56 |
|
|
}
|
57 |
|
|
|
58 |
|
|
#if (SEED_METHOD==SEED_ARG)
|
59 |
|
|
ee_s32 get_seed_args(int i, int argc, char *argv[]);
|
60 |
|
|
#define get_seed(x) (ee_s16)get_seed_args(x,argc,argv)
|
61 |
|
|
#define get_seed_32(x) get_seed_args(x,argc,argv)
|
62 |
|
|
#else /* via function or volatile */
|
63 |
|
|
ee_s32 get_seed_32(int i);
|
64 |
|
|
#define get_seed(x) (ee_s16)get_seed_32(x)
|
65 |
|
|
#endif
|
66 |
|
|
|
67 |
|
|
#if (MEM_METHOD==MEM_STATIC)
|
68 |
|
|
ee_u8 static_memblk[TOTAL_DATA_SIZE];
|
69 |
|
|
#endif
|
70 |
|
|
char *mem_name[3] = {"Static","Heap","Stack"};
|
71 |
|
|
/* Function: main
|
72 |
|
|
Main entry routine for the benchmark.
|
73 |
|
|
This function is responsible for the following steps:
|
74 |
|
|
|
75 |
|
|
1 - Initialize input seeds from a source that cannot be determined at compile time.
|
76 |
|
|
2 - Initialize memory block for use.
|
77 |
|
|
3 - Run and time the benchmark.
|
78 |
|
|
4 - Report results, testing the validity of the output if the seeds are known.
|
79 |
|
|
|
80 |
|
|
Arguments:
|
81 |
|
|
1 - first seed : Any value
|
82 |
|
|
2 - second seed : Must be identical to first for iterations to be identical
|
83 |
|
|
3 - third seed : Any value, should be at least an order of magnitude less then the input size, but bigger then 32.
|
84 |
|
|
4 - Iterations : Special, if set to 0, iterations will be automatically determined such that the benchmark will run between 10 to 100 secs
|
85 |
|
|
|
86 |
|
|
*/
|
87 |
|
|
|
88 |
|
|
#if MAIN_HAS_NOARGC
|
89 |
|
|
MAIN_RETURN_TYPE main(void) {
|
90 |
|
|
int argc=0;
|
91 |
|
|
char *argv[1];
|
92 |
|
|
#else
|
93 |
|
|
MAIN_RETURN_TYPE main(int argc, char *argv[]) {
|
94 |
|
|
#endif
|
95 |
|
|
|
96 |
|
|
// -----------------------------------------------
|
97 |
|
|
// -----------------------------------------------
|
98 |
|
|
// Disable coremark compilation by default
|
99 |
|
|
#ifndef RUN_COREMARK
|
100 |
|
|
#warning COREMARK HAS NOT BEEN COMPILED! Use >>make clean compile USER_FLAGS+=-DRUN_COREMARK<< to compile it.
|
101 |
|
|
return 0;
|
102 |
|
|
#endif
|
103 |
|
|
// -----------------------------------------------
|
104 |
|
|
// -----------------------------------------------
|
105 |
|
|
|
106 |
|
|
|
107 |
|
|
ee_u16 i,j=0,num_algorithms=0;
|
108 |
|
|
ee_s16 known_id=-1,total_errors=0;
|
109 |
|
|
ee_u16 seedcrc=0;
|
110 |
|
|
CORE_TICKS total_time;
|
111 |
|
|
core_results results[MULTITHREAD];
|
112 |
|
|
#if (MEM_METHOD==MEM_STACK)
|
113 |
|
|
ee_u8 stack_memblock[TOTAL_DATA_SIZE*MULTITHREAD];
|
114 |
|
|
#endif
|
115 |
|
|
/* first call any initializations needed */
|
116 |
|
|
portable_init(&(results[0].port), &argc, argv);
|
117 |
|
|
/* First some checks to make sure benchmark will run ok */
|
118 |
|
|
if (sizeof(struct list_head_s)>128) {
|
119 |
|
|
ee_printf("list_head structure too big for comparable data!\n");
|
120 |
|
|
return MAIN_RETURN_VAL;
|
121 |
|
|
}
|
122 |
|
|
results[0].seed1=get_seed(1);
|
123 |
|
|
results[0].seed2=get_seed(2);
|
124 |
|
|
results[0].seed3=get_seed(3);
|
125 |
|
|
results[0].iterations=get_seed_32(4);
|
126 |
|
|
#if CORE_DEBUG
|
127 |
|
|
results[0].iterations=1;
|
128 |
|
|
#endif
|
129 |
|
|
results[0].execs=get_seed_32(5);
|
130 |
|
|
if (results[0].execs==0) { /* if not supplied, execute all algorithms */
|
131 |
|
|
results[0].execs=ALL_ALGORITHMS_MASK;
|
132 |
|
|
}
|
133 |
|
|
/* put in some default values based on one seed only for easy testing */
|
134 |
|
|
if ((results[0].seed1==0) && (results[0].seed2==0) && (results[0].seed3==0)) { /* validation run */
|
135 |
|
|
results[0].seed1=0;
|
136 |
|
|
results[0].seed2=0;
|
137 |
|
|
results[0].seed3=0x66;
|
138 |
|
|
}
|
139 |
|
|
if ((results[0].seed1==1) && (results[0].seed2==0) && (results[0].seed3==0)) { /* perfromance run */
|
140 |
|
|
results[0].seed1=0x3415;
|
141 |
|
|
results[0].seed2=0x3415;
|
142 |
|
|
results[0].seed3=0x66;
|
143 |
|
|
}
|
144 |
|
|
#if (MEM_METHOD==MEM_STATIC)
|
145 |
|
|
results[0].memblock[0]=(void *)static_memblk;
|
146 |
|
|
results[0].size=TOTAL_DATA_SIZE;
|
147 |
|
|
results[0].err=0;
|
148 |
|
|
#if (MULTITHREAD>1)
|
149 |
|
|
#error "Cannot use a static data area with multiple contexts!"
|
150 |
|
|
#endif
|
151 |
|
|
#elif (MEM_METHOD==MEM_MALLOC)
|
152 |
|
|
for (i=0 ; i<MULTITHREAD; i++) {
|
153 |
|
|
ee_s32 malloc_override=get_seed(7);
|
154 |
|
|
if (malloc_override != 0)
|
155 |
|
|
results[i].size=malloc_override;
|
156 |
|
|
else
|
157 |
|
|
results[i].size=TOTAL_DATA_SIZE;
|
158 |
|
|
results[i].memblock[0]=portable_malloc(results[i].size);
|
159 |
|
|
results[i].seed1=results[0].seed1;
|
160 |
|
|
results[i].seed2=results[0].seed2;
|
161 |
|
|
results[i].seed3=results[0].seed3;
|
162 |
|
|
results[i].err=0;
|
163 |
|
|
results[i].execs=results[0].execs;
|
164 |
|
|
}
|
165 |
|
|
#elif (MEM_METHOD==MEM_STACK)
|
166 |
|
|
for (i=0 ; i<MULTITHREAD; i++) {
|
167 |
|
|
results[i].memblock[0]=stack_memblock+i*TOTAL_DATA_SIZE;
|
168 |
|
|
results[i].size=TOTAL_DATA_SIZE;
|
169 |
|
|
results[i].seed1=results[0].seed1;
|
170 |
|
|
results[i].seed2=results[0].seed2;
|
171 |
|
|
results[i].seed3=results[0].seed3;
|
172 |
|
|
results[i].err=0;
|
173 |
|
|
results[i].execs=results[0].execs;
|
174 |
|
|
}
|
175 |
|
|
#else
|
176 |
|
|
#error "Please define a way to initialize a memory block."
|
177 |
|
|
#endif
|
178 |
|
|
/* Data init */
|
179 |
|
|
/* Find out how space much we have based on number of algorithms */
|
180 |
|
|
for (i=0; i<NUM_ALGORITHMS; i++) {
|
181 |
|
|
if ((1<<(ee_u32)i) & results[0].execs)
|
182 |
|
|
num_algorithms++;
|
183 |
|
|
}
|
184 |
|
|
for (i=0 ; i<MULTITHREAD; i++)
|
185 |
|
|
results[i].size=results[i].size/num_algorithms;
|
186 |
|
|
/* Assign pointers */
|
187 |
|
|
for (i=0; i<NUM_ALGORITHMS; i++) {
|
188 |
|
|
ee_u32 ctx;
|
189 |
|
|
if ((1<<(ee_u32)i) & results[0].execs) {
|
190 |
|
|
for (ctx=0 ; ctx<MULTITHREAD; ctx++)
|
191 |
|
|
results[ctx].memblock[i+1]=(char *)(results[ctx].memblock[0])+results[0].size*j;
|
192 |
|
|
j++;
|
193 |
|
|
}
|
194 |
|
|
}
|
195 |
|
|
/* call inits */
|
196 |
|
|
for (i=0 ; i<MULTITHREAD; i++) {
|
197 |
|
|
if (results[i].execs & ID_LIST) {
|
198 |
|
|
results[i].list=core_list_init(results[0].size,results[i].memblock[1],results[i].seed1);
|
199 |
|
|
}
|
200 |
|
|
if (results[i].execs & ID_MATRIX) {
|
201 |
|
|
core_init_matrix(results[0].size, results[i].memblock[2], (ee_s32)results[i].seed1 | (((ee_s32)results[i].seed2) << 16), &(results[i].mat) );
|
202 |
|
|
}
|
203 |
|
|
if (results[i].execs & ID_STATE) {
|
204 |
|
|
core_init_state(results[0].size,results[i].seed1,results[i].memblock[3]);
|
205 |
|
|
}
|
206 |
|
|
}
|
207 |
|
|
|
208 |
|
|
/* automatically determine number of iterations if not set */
|
209 |
|
|
if (results[0].iterations==0) {
|
210 |
|
|
secs_ret secs_passed=0;
|
211 |
|
|
ee_u32 divisor;
|
212 |
|
|
results[0].iterations=1;
|
213 |
|
|
while (secs_passed < (secs_ret)1) {
|
214 |
|
|
results[0].iterations*=10;
|
215 |
|
|
start_time();
|
216 |
|
|
iterate(&results[0]);
|
217 |
|
|
stop_time();
|
218 |
|
|
secs_passed=time_in_secs(get_time());
|
219 |
|
|
}
|
220 |
|
|
/* now we know it executes for at least 1 sec, set actual run time at about 10 secs */
|
221 |
|
|
divisor=(ee_u32)secs_passed;
|
222 |
|
|
if (divisor==0) /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */
|
223 |
|
|
divisor=1;
|
224 |
|
|
results[0].iterations*=1+10/divisor;
|
225 |
|
|
}
|
226 |
|
|
/* perform actual benchmark */
|
227 |
|
|
start_time();
|
228 |
|
|
#if (MULTITHREAD>1)
|
229 |
|
|
if (default_num_contexts>MULTITHREAD) {
|
230 |
|
|
default_num_contexts=MULTITHREAD;
|
231 |
|
|
}
|
232 |
|
|
for (i=0 ; i<default_num_contexts; i++) {
|
233 |
|
|
results[i].iterations=results[0].iterations;
|
234 |
|
|
results[i].execs=results[0].execs;
|
235 |
|
|
core_start_parallel(&results[i]);
|
236 |
|
|
}
|
237 |
|
|
for (i=0 ; i<default_num_contexts; i++) {
|
238 |
|
|
core_stop_parallel(&results[i]);
|
239 |
|
|
}
|
240 |
|
|
#else
|
241 |
|
|
iterate(&results[0]);
|
242 |
|
|
#endif
|
243 |
|
|
stop_time();
|
244 |
|
|
total_time=get_time();
|
245 |
|
|
/* get a function of the input to report */
|
246 |
|
|
seedcrc=crc16(results[0].seed1,seedcrc);
|
247 |
|
|
seedcrc=crc16(results[0].seed2,seedcrc);
|
248 |
|
|
seedcrc=crc16(results[0].seed3,seedcrc);
|
249 |
|
|
seedcrc=crc16(results[0].size,seedcrc);
|
250 |
|
|
|
251 |
|
|
switch (seedcrc) { /* test known output for common seeds */
|
252 |
|
|
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
|
253 |
|
|
known_id=0;
|
254 |
|
|
ee_printf("6k performance run parameters for coremark.\n");
|
255 |
|
|
break;
|
256 |
|
|
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per algorithm */
|
257 |
|
|
known_id=1;
|
258 |
|
|
ee_printf("6k validation run parameters for coremark.\n");
|
259 |
|
|
break;
|
260 |
|
|
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm */
|
261 |
|
|
known_id=2;
|
262 |
|
|
ee_printf("Profile generation run parameters for coremark.\n");
|
263 |
|
|
break;
|
264 |
|
|
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
|
265 |
|
|
known_id=3;
|
266 |
|
|
ee_printf("2K performance run parameters for coremark.\n");
|
267 |
|
|
break;
|
268 |
|
|
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per algorithm */
|
269 |
|
|
known_id=4;
|
270 |
|
|
ee_printf("2K validation run parameters for coremark.\n");
|
271 |
|
|
break;
|
272 |
|
|
default:
|
273 |
|
|
total_errors=-1;
|
274 |
|
|
break;
|
275 |
|
|
}
|
276 |
|
|
if (known_id>=0) {
|
277 |
|
|
for (i=0 ; i<default_num_contexts; i++) {
|
278 |
|
|
results[i].err=0;
|
279 |
|
|
if ((results[i].execs & ID_LIST) &&
|
280 |
|
|
(results[i].crclist!=list_known_crc[known_id])) {
|
281 |
|
|
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",i,results[i].crclist,list_known_crc[known_id]);
|
282 |
|
|
results[i].err++;
|
283 |
|
|
}
|
284 |
|
|
if ((results[i].execs & ID_MATRIX) &&
|
285 |
|
|
(results[i].crcmatrix!=matrix_known_crc[known_id])) {
|
286 |
|
|
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",i,results[i].crcmatrix,matrix_known_crc[known_id]);
|
287 |
|
|
results[i].err++;
|
288 |
|
|
}
|
289 |
|
|
if ((results[i].execs & ID_STATE) &&
|
290 |
|
|
(results[i].crcstate!=state_known_crc[known_id])) {
|
291 |
|
|
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",i,results[i].crcstate,state_known_crc[known_id]);
|
292 |
|
|
results[i].err++;
|
293 |
|
|
}
|
294 |
|
|
total_errors+=results[i].err;
|
295 |
|
|
}
|
296 |
|
|
}
|
297 |
|
|
total_errors+=check_data_types();
|
298 |
|
|
/* and report results */
|
299 |
|
|
ee_printf("CoreMark Size : %lu\n", (long unsigned) results[0].size);
|
300 |
|
|
ee_printf("Total ticks : see NEORV32 output below\n", (long unsigned) total_time); /* NEORV32 modified due to overflow in %lu in ee_printf */
|
301 |
|
|
#if HAS_FLOAT
|
302 |
|
|
ee_printf("Total time (secs): %f\n",time_in_secs(total_time));
|
303 |
|
|
if (time_in_secs(total_time) > 0)
|
304 |
|
|
ee_printf("Iterations/Sec : %f\n",default_num_contexts*results[0].iterations/time_in_secs(total_time));
|
305 |
|
|
#else
|
306 |
|
|
ee_printf("Total time (secs): %d\n",time_in_secs(total_time));
|
307 |
|
|
if (time_in_secs(total_time) > 0)
|
308 |
|
|
ee_printf("Iterations/Sec : %d\n",default_num_contexts*results[0].iterations/time_in_secs(total_time));
|
309 |
|
|
#endif
|
310 |
|
|
if (time_in_secs(total_time) < 10) {
|
311 |
|
|
ee_printf("ERROR! Must execute for at least 10 secs for a valid result!\n");
|
312 |
|
|
total_errors++;
|
313 |
|
|
}
|
314 |
|
|
|
315 |
|
|
ee_printf("Iterations : %lu\n", (long unsigned) default_num_contexts*results[0].iterations);
|
316 |
|
|
ee_printf("Compiler version : %s\n",COMPILER_VERSION);
|
317 |
|
|
ee_printf("Compiler flags : %s\n",COMPILER_FLAGS);
|
318 |
|
|
#if (MULTITHREAD>1)
|
319 |
|
|
ee_printf("Parallel %s : %d\n",PARALLEL_METHOD,default_num_contexts);
|
320 |
|
|
#endif
|
321 |
|
|
ee_printf("Memory location : %s\n",MEM_LOCATION);
|
322 |
|
|
/* output for verification */
|
323 |
|
|
ee_printf("seedcrc : 0x%04x\n",seedcrc);
|
324 |
|
|
if (results[0].execs & ID_LIST)
|
325 |
|
|
for (i=0 ; i<default_num_contexts; i++)
|
326 |
|
|
ee_printf("[%d]crclist : 0x%04x\n",i,results[i].crclist);
|
327 |
|
|
if (results[0].execs & ID_MATRIX)
|
328 |
|
|
for (i=0 ; i<default_num_contexts; i++)
|
329 |
|
|
ee_printf("[%d]crcmatrix : 0x%04x\n",i,results[i].crcmatrix);
|
330 |
|
|
if (results[0].execs & ID_STATE)
|
331 |
|
|
for (i=0 ; i<default_num_contexts; i++)
|
332 |
|
|
ee_printf("[%d]crcstate : 0x%04x\n",i,results[i].crcstate);
|
333 |
|
|
for (i=0 ; i<default_num_contexts; i++)
|
334 |
|
|
ee_printf("[%d]crcfinal : 0x%04x\n",i,results[i].crc);
|
335 |
|
|
if (total_errors==0) {
|
336 |
|
|
ee_printf("Correct operation validated. See README.md for run and reporting rules.\n");
|
337 |
|
|
#if HAS_FLOAT
|
338 |
|
|
if (known_id==3) {
|
339 |
|
|
ee_printf("CoreMark 1.0 : %f / %s %s",default_num_contexts*results[0].iterations/time_in_secs(total_time),COMPILER_VERSION,COMPILER_FLAGS);
|
340 |
|
|
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
|
341 |
|
|
ee_printf(" / %s",MEM_LOCATION);
|
342 |
|
|
#else
|
343 |
|
|
ee_printf(" / %s",mem_name[MEM_METHOD]);
|
344 |
|
|
#endif
|
345 |
|
|
|
346 |
|
|
#if (MULTITHREAD>1)
|
347 |
|
|
ee_printf(" / %d:%s",default_num_contexts,PARALLEL_METHOD);
|
348 |
|
|
#endif
|
349 |
|
|
ee_printf("\n");
|
350 |
|
|
}
|
351 |
|
|
#endif
|
352 |
|
|
}
|
353 |
|
|
if (total_errors>0)
|
354 |
|
|
ee_printf("Errors detected\n");
|
355 |
|
|
if (total_errors<0)
|
356 |
|
|
ee_printf("Cannot validate operation for these seed values, please compare with results on a known platform.\n");
|
357 |
|
|
|
358 |
|
|
#if (MEM_METHOD==MEM_MALLOC)
|
359 |
|
|
for (i=0 ; i<MULTITHREAD; i++)
|
360 |
|
|
portable_free(results[i].memblock[0]);
|
361 |
|
|
#endif
|
362 |
|
|
/* And last call any target specific code for finalizing */
|
363 |
|
|
portable_fini(&(results[0].port));
|
364 |
|
|
|
365 |
|
|
return MAIN_RETURN_VAL;
|
366 |
|
|
}
|
367 |
|
|
|
368 |
|
|
|