/* cuc.c -- OpenRISC Custom Unit Compiler
|
/* cuc.c -- OpenRISC Custom Unit Compiler
|
* Copyright (C) 2002 Marko Mlinar, markom@opencores.org
|
* Copyright (C) 2002 Marko Mlinar, markom@opencores.org
|
*
|
*
|
* This file is part of OpenRISC 1000 Architectural Simulator.
|
* This file is part of OpenRISC 1000 Architectural Simulator.
|
*
|
*
|
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
* (at your option) any later version.
|
* (at your option) any later version.
|
*
|
*
|
* This program is distributed in the hope that it will be useful,
|
* This program is distributed in the hope that it will be useful,
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* GNU General Public License for more details.
|
* GNU General Public License for more details.
|
*
|
*
|
* You should have received a copy of the GNU General Public License
|
* You should have received a copy of the GNU General Public License
|
* along with this program; if not, write to the Free Software
|
* along with this program; if not, write to the Free Software
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
|
|
/* Main file, including code optimization and command prompt */
|
/* Main file, including code optimization and command prompt */
|
|
|
#include <stdio.h>
|
#include <stdio.h>
|
#include <stdlib.h>
|
#include <stdlib.h>
|
#include <stdarg.h>
|
#include <stdarg.h>
|
#include <assert.h>
|
#include <assert.h>
|
#include <ctype.h>
|
#include <ctype.h>
|
#include <string.h>
|
#include <string.h>
|
#include <unistd.h>
|
#include <unistd.h>
|
|
|
|
#include "config.h"
|
|
|
|
#ifdef HAVE_INTTYPES_H
|
|
#include <inttypes.h>
|
|
#endif
|
|
|
|
#include "port.h"
|
|
#include "arch.h"
|
#include "abstract.h"
|
#include "abstract.h"
|
#include "sim-config.h"
|
#include "sim-config.h"
|
#include "cuc.h"
|
#include "cuc.h"
|
#include "insn.h"
|
#include "insn.h"
|
#include "profiler.h"
|
#include "profiler.h"
|
#include "opcode/or32.h"
|
#include "opcode/or32.h"
|
#include "parse.h"
|
#include "parse.h"
|
#include "debug.h"
|
#include "debug.h"
|
|
|
FILE *flog;
|
FILE *flog;
|
int cuc_debug = 0;
|
int cuc_debug = 0;
|
|
|
/* Last used registers by software convention */
|
/* Last used registers by software convention */
|
/* Note that r11 is caller saved register, and we can destroy it.
|
/* Note that r11 is caller saved register, and we can destroy it.
|
Due to CUC architecture we must always return something, even garbage (so that
|
Due to CUC architecture we must always return something, even garbage (so that
|
caller knows, we are finished, when we send acknowledge).
|
caller knows, we are finished, when we send acknowledge).
|
In case r11 was not used (trivial register assignment) we will remove it later,
|
In case r11 was not used (trivial register assignment) we will remove it later,
|
but if we assigned a value to it, it must not be removed, so caller_saved[11] = 0 */
|
but if we assigned a value to it, it must not be removed, so caller_saved[11] = 0 */
|
const int caller_saved[MAX_REGS] = {
|
const int caller_saved[MAX_REGS] = {
|
0, 0, 0, 1, 1, 1, 1, 1,
|
0, 0, 0, 1, 1, 1, 1, 1,
|
1, 1, 0, 0, 0, 1, 0, 1,
|
1, 1, 0, 0, 0, 1, 0, 1,
|
0, 1, 0, 1, 0, 1, 0, 1,
|
0, 1, 0, 1, 0, 1, 0, 1,
|
0, 1, 0, 1, 0, 1, 0, 1,
|
0, 1, 0, 1, 0, 1, 0, 1,
|
1, 1};
|
1, 1};
|
|
|
/* returns log2(x) */
|
/* returns log2(x) */
|
/* Call this log2_int, because there is a library function named log2 */
|
/* Call this log2_int, because there is a library function named log2 */
|
int log2_int (unsigned long x)
|
int log2_int (unsigned long x)
|
{
|
{
|
int c = 0;
|
int c = 0;
|
assert (x >= 0);
|
assert (x >= 0);
|
if (!x) return 0; /* not by the book, but practical */
|
if (!x) return 0; /* not by the book, but practical */
|
while (x != 1) x >>= 1, c++;
|
while (x != 1) x >>= 1, c++;
|
return c;
|
return c;
|
}
|
}
|
|
|
/* Does all known instruction optimizations */
|
/* Does all known instruction optimizations */
|
void cuc_optimize (cuc_func *func)
|
void cuc_optimize (cuc_func *func)
|
{
|
{
|
int modified = 0;
|
int modified = 0;
|
int first = 1;
|
int first = 1;
|
log ("Optimizing.\n");
|
log ("Optimizing.\n");
|
do {
|
do {
|
modified = 0;
|
modified = 0;
|
clean_deps (func);
|
clean_deps (func);
|
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_CLEAN_DEPS");
|
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_CLEAN_DEPS");
|
if (optimize_cmovs (func)) {
|
if (optimize_cmovs (func)) {
|
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_CMOVS");
|
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_CMOVS");
|
modified = 1;
|
modified = 1;
|
}
|
}
|
if (cuc_debug) cuc_check (func);
|
if (cuc_debug) cuc_check (func);
|
if (optimize_tree (func)) {
|
if (optimize_tree (func)) {
|
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_TREE1");
|
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_TREE1");
|
modified = 1;
|
modified = 1;
|
}
|
}
|
if (remove_nops (func)) {
|
if (remove_nops (func)) {
|
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
|
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
|
modified = 1;
|
modified = 1;
|
}
|
}
|
if (cuc_debug) cuc_check (func);
|
if (cuc_debug) cuc_check (func);
|
if (remove_dead (func)) {
|
if (remove_dead (func)) {
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD");
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD");
|
modified = 1;
|
modified = 1;
|
}
|
}
|
if (cuc_debug) cuc_check (func);
|
if (cuc_debug) cuc_check (func);
|
if (cse (func)) {
|
if (cse (func)) {
|
log ("Common subexpression elimination.\n");
|
log ("Common subexpression elimination.\n");
|
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_CSE");
|
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_CSE");
|
modified = 1;
|
modified = 1;
|
}
|
}
|
if (first) {
|
if (first) {
|
insert_conditional_facts (func);
|
insert_conditional_facts (func);
|
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_COND_FACT");
|
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_COND_FACT");
|
if (cuc_debug) cuc_check (func);
|
if (cuc_debug) cuc_check (func);
|
first = 0;
|
first = 0;
|
}
|
}
|
if (optimize_bb (func)) {
|
if (optimize_bb (func)) {
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_OPT_BB");
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_OPT_BB");
|
modified = 1;
|
modified = 1;
|
}
|
}
|
if (cuc_debug) cuc_check (func);
|
if (cuc_debug) cuc_check (func);
|
if (remove_nops (func)) {
|
if (remove_nops (func)) {
|
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
|
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
|
modified = 1;
|
modified = 1;
|
}
|
}
|
if (remove_dead_bb (func)) {
|
if (remove_dead_bb (func)) {
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD_BB");
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD_BB");
|
modified = 1;
|
modified = 1;
|
}
|
}
|
if (remove_trivial_regs (func)) {
|
if (remove_trivial_regs (func)) {
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL");
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL");
|
modified = 1;
|
modified = 1;
|
}
|
}
|
if (remove_nops (func)) {
|
if (remove_nops (func)) {
|
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
|
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
|
modified = 1;
|
modified = 1;
|
}
|
}
|
add_memory_dep (func, func->memory_order);
|
add_memory_dep (func, func->memory_order);
|
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP");
|
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP");
|
add_data_dep (func);
|
add_data_dep (func);
|
if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP");
|
if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP");
|
if (schedule_memory (func, func->memory_order)) {
|
if (schedule_memory (func, func->memory_order)) {
|
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM");
|
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM");
|
modified = 1;
|
modified = 1;
|
}
|
}
|
} while (modified);
|
} while (modified);
|
set_io (func);
|
set_io (func);
|
#if 0
|
#if 0
|
detect_max_values (func);
|
detect_max_values (func);
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_MAX_VALUES");
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_MAX_VALUES");
|
#endif
|
#endif
|
}
|
}
|
|
|
/* Pre/unrolls basic block and optimizes it */
|
/* Pre/unrolls basic block and optimizes it */
|
cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j)
|
cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j)
|
{
|
{
|
cuc_func *func;
|
cuc_func *func;
|
cucdebug (2, "BB%i unroll %i times preroll %i times\n", b, j, i);
|
cucdebug (2, "BB%i unroll %i times preroll %i times\n", b, j, i);
|
log ("BB%i unroll %i times preroll %i times\n", b, j, i);
|
log ("BB%i unroll %i times preroll %i times\n", b, j, i);
|
func = preunroll_loop (f, b, i, j, bb_filename);
|
func = preunroll_loop (f, b, i, j, bb_filename);
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_PREUNROLL");
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_PREUNROLL");
|
cuc_optimize (func);
|
cuc_optimize (func);
|
analyse_timings (func, timings);
|
analyse_timings (func, timings);
|
|
|
cucdebug (2, "new_time = %i, old_time = %i, size = %f\n",
|
cucdebug (2, "new_time = %i, old_time = %i, size = %f\n",
|
timings->new_time, func->orig_time, timings->size);
|
timings->new_time, func->orig_time, timings->size);
|
log ("new time = %icyc, old_time = %icyc, size = %.0f gates\n",
|
log ("new time = %icyc, old_time = %icyc, size = %.0f gates\n",
|
timings->new_time, func->orig_time, timings->size);
|
timings->new_time, func->orig_time, timings->size);
|
//output_verilog (func, argv[1]);
|
//output_verilog (func, argv[1]);
|
free_func (func);
|
free_func (func);
|
timings->b = b;
|
timings->b = b;
|
timings->unroll = j;
|
timings->unroll = j;
|
timings->preroll = i;
|
timings->preroll = i;
|
timings->nshared = 0;
|
timings->nshared = 0;
|
return timings;
|
return timings;
|
}
|
}
|
|
|
/* Simple comparison function */
|
/* Simple comparison function */
|
int tim_comp (cuc_timings *a, cuc_timings *b)
|
int tim_comp (cuc_timings *a, cuc_timings *b)
|
{
|
{
|
if (a->new_time < b->new_time) return -1;
|
if (a->new_time < b->new_time) return -1;
|
else if (a->new_time > b->new_time) return 1;
|
else if (a->new_time > b->new_time) return 1;
|
else return 0;
|
else return 0;
|
}
|
}
|
|
|
/* Analyses function; done when cuc command is entered in (sim) prompt */
|
/* Analyses function; done when cuc command is entered in (sim) prompt */
|
cuc_func *analyse_function (char *module_name, long orig_time,
|
cuc_func *analyse_function (char *module_name, long orig_time,
|
unsigned long start_addr, unsigned long end_addr,
|
unsigned long start_addr, unsigned long end_addr,
|
int memory_order, int num_runs)
|
int memory_order, int num_runs)
|
{
|
{
|
cuc_timings timings;
|
cuc_timings timings;
|
cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func));
|
cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func));
|
cuc_func *saved;
|
cuc_func *saved;
|
int b, i, j;
|
int b, i, j;
|
char tmp1[256];
|
char tmp1[256];
|
char tmp2[256];
|
char tmp2[256];
|
|
|
func->orig_time = orig_time;
|
func->orig_time = orig_time;
|
func->start_addr = start_addr;
|
func->start_addr = start_addr;
|
func->end_addr = end_addr;
|
func->end_addr = end_addr;
|
func->memory_order = memory_order;
|
func->memory_order = memory_order;
|
func->nfdeps = 0;
|
func->nfdeps = 0;
|
func->fdeps = NULL;
|
func->fdeps = NULL;
|
func->num_runs = num_runs;
|
func->num_runs = num_runs;
|
|
|
sprintf (tmp1, "%s.bin", module_name);
|
sprintf (tmp1, "%s.bin", module_name);
|
cucdebug (2, "Loading %s.bin\n", module_name);
|
cucdebug (2, "Loading %s.bin\n", module_name);
|
if (cuc_load (tmp1)) {
|
if (cuc_load (tmp1)) {
|
free (func);
|
free (func);
|
return NULL;
|
return NULL;
|
}
|
}
|
|
|
log ("Detecting basic blocks\n");
|
log ("Detecting basic blocks\n");
|
detect_bb (func);
|
detect_bb (func);
|
if (cuc_debug >= 2) print_cuc_insns ("WITH_BB_LIMITS", 0);
|
if (cuc_debug >= 2) print_cuc_insns ("WITH_BB_LIMITS", 0);
|
|
|
//sprintf (tmp1, "%s.bin.mp", module_name);
|
//sprintf (tmp1, "%s.bin.mp", module_name);
|
sprintf (tmp2, "%s.bin.bb", module_name);
|
sprintf (tmp2, "%s.bin.bb", module_name);
|
generate_bb_seq (func, config.sim.mprof_fn, tmp2);
|
generate_bb_seq (func, config.sim.mprof_fn, tmp2);
|
log ("Assuming %i clk cycle load (%i cyc burst)\n", runtime.cuc.mdelay[0], runtime.cuc.mdelay[2]);
|
log ("Assuming %i clk cycle load (%i cyc burst)\n", runtime.cuc.mdelay[0], runtime.cuc.mdelay[2]);
|
log ("Assuming %i clk cycle store (%i cyc burst)\n", runtime.cuc.mdelay[1], runtime.cuc.mdelay[3]);
|
log ("Assuming %i clk cycle store (%i cyc burst)\n", runtime.cuc.mdelay[1], runtime.cuc.mdelay[3]);
|
|
|
build_bb (func);
|
build_bb (func);
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_BUILD_BB");
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_BUILD_BB");
|
reg_dep (func);
|
reg_dep (func);
|
|
|
log ("Detecting dependencies\n");
|
log ("Detecting dependencies\n");
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_REG_DEP");
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_REG_DEP");
|
cuc_optimize (func);
|
cuc_optimize (func);
|
|
|
#if 0
|
#if 0
|
csm (func);
|
csm (func);
|
#endif
|
#endif
|
assert (saved = dup_func (func));
|
assert (saved = dup_func (func));
|
|
|
timings.preroll = timings.unroll = 1;
|
timings.preroll = timings.unroll = 1;
|
timings.nshared = 0;
|
timings.nshared = 0;
|
|
|
add_latches (func);
|
add_latches (func);
|
if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES");
|
if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES");
|
analyse_timings (func, &timings);
|
analyse_timings (func, &timings);
|
|
|
free_func (func);
|
free_func (func);
|
log ("Base option: pre%i,un%i,sha%i: %icyc %.1f\n",
|
log ("Base option: pre%i,un%i,sha%i: %icyc %.1f\n",
|
timings.preroll, timings.unroll, timings.nshared, timings.new_time, timings.size);
|
timings.preroll, timings.unroll, timings.nshared, timings.new_time, timings.size);
|
saved->timings = timings;
|
saved->timings = timings;
|
|
|
#if 1
|
#if 1
|
/* detect and unroll simple loops */
|
/* detect and unroll simple loops */
|
for (b = 0; b < saved->num_bb; b++) {
|
for (b = 0; b < saved->num_bb; b++) {
|
cuc_timings t[MAX_UNROLL * MAX_PREROLL];
|
cuc_timings t[MAX_UNROLL * MAX_PREROLL];
|
cuc_timings *ut;
|
cuc_timings *ut;
|
cuc_timings *cut = &t[0];
|
cuc_timings *cut = &t[0];
|
int nt = 1;
|
int nt = 1;
|
double csize;
|
double csize;
|
saved->bb[b].selected_tim = -1;
|
saved->bb[b].selected_tim = -1;
|
|
|
/* Is it a loop? */
|
/* Is it a loop? */
|
if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue;
|
if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue;
|
log ("Found loop at BB%x. Trying to unroll.\n", b);
|
log ("Found loop at BB%x. Trying to unroll.\n", b);
|
t[0] = timings;
|
t[0] = timings;
|
t[0].b = b;
|
t[0].b = b;
|
t[0].preroll = 1;
|
t[0].preroll = 1;
|
t[0].unroll = 1;
|
t[0].unroll = 1;
|
t[0].nshared = 0;
|
t[0].nshared = 0;
|
|
|
sprintf (tmp1, "%s.bin.bb", module_name);
|
sprintf (tmp1, "%s.bin.bb", module_name);
|
i = 1;
|
i = 1;
|
do {
|
do {
|
cuc_timings *pt;
|
cuc_timings *pt;
|
cuc_timings *cpt = cut;
|
cuc_timings *cpt = cut;
|
j = 1;
|
j = 1;
|
|
|
do {
|
do {
|
pt = cpt;
|
pt = cpt;
|
cpt = preunroll_bb (tmp1, saved, &t[nt++], b, ++j, i);
|
cpt = preunroll_bb (tmp1, saved, &t[nt++], b, ++j, i);
|
} while (j <= MAX_PREROLL && pt->new_time > cpt->new_time);
|
} while (j <= MAX_PREROLL && pt->new_time > cpt->new_time);
|
i++;
|
i++;
|
ut = cut;
|
ut = cut;
|
cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i);
|
cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i);
|
} while (i <= MAX_UNROLL && ut->new_time > cut->new_time);
|
} while (i <= MAX_UNROLL && ut->new_time > cut->new_time);
|
|
|
/* Sort the timings */
|
/* Sort the timings */
|
#if 0
|
#if 0
|
if (cuc_debug >= 3)
|
if (cuc_debug >= 3)
|
for (i = 0; i < nt; i++) PRINTF ("%i:%i,%i: %icyc\n",
|
for (i = 0; i < nt; i++) PRINTF ("%i:%i,%i: %icyc\n",
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time);
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time);
|
#endif
|
#endif
|
|
|
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
|
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
|
|
|
/* Delete timings, that have worst time and bigger size than other */
|
/* Delete timings, that have worst time and bigger size than other */
|
j = 1;
|
j = 1;
|
csize = t[0].size;
|
csize = t[0].size;
|
for (i = 1; i < nt; i++)
|
for (i = 1; i < nt; i++)
|
if (t[i].size < csize) t[j++] = t[i];
|
if (t[i].size < csize) t[j++] = t[i];
|
nt = j;
|
nt = j;
|
|
|
cucdebug (1, "Available options\n");
|
cucdebug (1, "Available options\n");
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
/* Add results from CSM */
|
/* Add results from CSM */
|
j = nt;
|
j = nt;
|
for (i = 0; i < saved->bb[b].ntim; i++) {
|
for (i = 0; i < saved->bb[b].ntim; i++) {
|
int i1;
|
int i1;
|
for (i1 = 0; i1 < nt; i1++) {
|
for (i1 = 0; i1 < nt; i1++) {
|
t[j] = t[i1];
|
t[j] = t[i1];
|
t[j].size += saved->bb[b].tim[i].size - timings.size;
|
t[j].size += saved->bb[b].tim[i].size - timings.size;
|
t[j].new_time += saved->bb[b].tim[i].new_time - timings.new_time;
|
t[j].new_time += saved->bb[b].tim[i].new_time - timings.new_time;
|
t[j].nshared = saved->bb[b].tim[i].nshared;
|
t[j].nshared = saved->bb[b].tim[i].nshared;
|
t[j].shared = saved->bb[b].tim[i].shared;
|
t[j].shared = saved->bb[b].tim[i].shared;
|
if (++j >= MAX_UNROLL * MAX_PREROLL) goto full;
|
if (++j >= MAX_UNROLL * MAX_PREROLL) goto full;
|
}
|
}
|
}
|
}
|
|
|
full:
|
full:
|
nt = j;
|
nt = j;
|
|
|
cucdebug (1, "Available options:\n");
|
cucdebug (1, "Available options:\n");
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
|
|
/* Sort again with new timings added */
|
/* Sort again with new timings added */
|
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
|
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
|
|
|
/* Delete timings, that have worst time and bigger size than other */
|
/* Delete timings, that have worst time and bigger size than other */
|
j = 1;
|
j = 1;
|
csize = t[0].size;
|
csize = t[0].size;
|
for (i = 1; i < nt; i++)
|
for (i = 1; i < nt; i++)
|
if (t[i].size < csize) t[j++] = t[i];
|
if (t[i].size < csize) t[j++] = t[i];
|
nt = j;
|
nt = j;
|
|
|
cucdebug (1, "Available options:\n");
|
cucdebug (1, "Available options:\n");
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
|
|
if (saved->bb[b].ntim) free (saved->bb[b].tim);
|
if (saved->bb[b].ntim) free (saved->bb[b].tim);
|
saved->bb[b].ntim = nt;
|
saved->bb[b].ntim = nt;
|
assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt));
|
assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt));
|
|
|
/* Copy options in reverse order -- smallest first */
|
/* Copy options in reverse order -- smallest first */
|
for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i];
|
for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i];
|
|
|
log ("Available options:\n");
|
log ("Available options:\n");
|
for (i = 0; i < saved->bb[b].ntim; i++) {
|
for (i = 0; i < saved->bb[b].ntim; i++) {
|
log ("%i:pre%i,un%i,sha%i: %icyc %.1f\n",
|
log ("%i:pre%i,un%i,sha%i: %icyc %.1f\n",
|
saved->bb[b].tim[i].b, saved->bb[b].tim[i].preroll, saved->bb[b].tim[i].unroll,
|
saved->bb[b].tim[i].b, saved->bb[b].tim[i].preroll, saved->bb[b].tim[i].unroll,
|
saved->bb[b].tim[i].nshared, saved->bb[b].tim[i].new_time, saved->bb[b].tim[i].size);
|
saved->bb[b].tim[i].nshared, saved->bb[b].tim[i].new_time, saved->bb[b].tim[i].size);
|
}
|
}
|
}
|
}
|
#endif
|
#endif
|
return saved;
|
return saved;
|
}
|
}
|
|
|
/* Utility option formatting functions */
|
/* Utility option formatting functions */
|
static const char *option_char = "?abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
static const char *option_char = "?abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
|
|
/*static */char *gen_option (char *s, int bb_no, int f_opt)
|
/*static */char *gen_option (char *s, int bb_no, int f_opt)
|
{
|
{
|
if (bb_no >= 0) sprintf (s, "%i", bb_no);
|
if (bb_no >= 0) sprintf (s, "%i", bb_no);
|
assert (f_opt <= strlen (option_char));
|
assert (f_opt <= strlen (option_char));
|
sprintf (s, "%s%c", s, option_char[f_opt]);
|
sprintf (s, "%s%c", s, option_char[f_opt]);
|
return s;
|
return s;
|
}
|
}
|
|
|
/*static */void print_option (int bb_no, int f_opt)
|
/*static */void print_option (int bb_no, int f_opt)
|
{
|
{
|
char tmp1[10];
|
char tmp1[10];
|
char tmp2[10];
|
char tmp2[10];
|
sprintf (tmp2, "%s", gen_option (tmp1, bb_no, f_opt));
|
sprintf (tmp2, "%s", gen_option (tmp1, bb_no, f_opt));
|
PRINTF ("%3s", tmp2);
|
PRINTF ("%3s", tmp2);
|
}
|
}
|
|
|
static char *format_func_options (char *s, cuc_func *f)
|
static char *format_func_options (char *s, cuc_func *f)
|
{
|
{
|
int b, first = 1;
|
int b, first = 1;
|
*s = '\0';
|
*s = '\0';
|
for (b = 0; b < f->num_bb; b++)
|
for (b = 0; b < f->num_bb; b++)
|
if (f->bb[b].selected_tim >= 0) {
|
if (f->bb[b].selected_tim >= 0) {
|
char tmp[10];
|
char tmp[10];
|
sprintf (s, "%s%s%s", s, first ? "" : ",", gen_option (tmp, b, f->bb[b].selected_tim));
|
sprintf (s, "%s%s%s", s, first ? "" : ",", gen_option (tmp, b, f->bb[b].selected_tim));
|
first = 0;
|
first = 0;
|
}
|
}
|
return s;
|
return s;
|
}
|
}
|
|
|
static void options_cmd (int func_no, cuc_func *f)
|
static void options_cmd (int func_no, cuc_func *f)
|
{
|
{
|
int b, i;
|
int b, i;
|
char tmp[30];
|
char tmp[30];
|
char *name = prof_func[func_no].name;
|
char *name = prof_func[func_no].name;
|
PRINTF ("-----------------------------------------------------------------------------\n");
|
PRINTF ("-----------------------------------------------------------------------------\n");
|
PRINTF ("|%-28s|pre/unrolled|shared| time | gates |old_time|\n",
|
PRINTF ("|%-28s|pre/unrolled|shared| time | gates |old_time|\n",
|
strstrip (tmp, name, 28));
|
strstrip (tmp, name, 28));
|
PRINTF ("| BASE |%4i / %4i | %4i |%8i|%8.f|%8i|\n", 1, 1, 0,
|
PRINTF ("| BASE |%4i / %4i | %4i |%8i|%8.f|%8i|\n", 1, 1, 0,
|
f->timings.new_time, f->timings.size, f->orig_time);
|
f->timings.new_time, f->timings.size, f->orig_time);
|
for (b = 0; b < f->num_bb; b++) {
|
for (b = 0; b < f->num_bb; b++) {
|
/* Print out results */
|
/* Print out results */
|
for (i = 1; i < f->bb[b].ntim; i++) { /* First one is base option */
|
for (i = 1; i < f->bb[b].ntim; i++) { /* First one is base option */
|
int time = f->bb[b].tim[i].new_time - f->timings.new_time;
|
int time = f->bb[b].tim[i].new_time - f->timings.new_time;
|
double size = f->bb[b].tim[i].size - f->timings.size;
|
double size = f->bb[b].tim[i].size - f->timings.size;
|
PRINTF ("| ");
|
PRINTF ("| ");
|
print_option (b, i);
|
print_option (b, i);
|
PRINTF (" |%4i / %4i | %4i |%+8i|%+8.f| |\n",
|
PRINTF (" |%4i / %4i | %4i |%+8i|%+8.f| |\n",
|
f->bb[b].tim[i].preroll, f->bb[b].tim[i].unroll, f->bb[b].tim[i].nshared,
|
f->bb[b].tim[i].preroll, f->bb[b].tim[i].unroll, f->bb[b].tim[i].nshared,
|
time, size);
|
time, size);
|
}
|
}
|
}
|
}
|
}
|
}
|
|
|
/* Generates a function, based on specified parameters */
|
/* Generates a function, based on specified parameters */
|
cuc_func *generate_function (cuc_func *rf, char *name, char *cut_filename)
|
cuc_func *generate_function (cuc_func *rf, char *name, char *cut_filename)
|
{
|
{
|
int b;
|
int b;
|
char tmp[256];
|
char tmp[256];
|
cuc_timings tt;
|
cuc_timings tt;
|
cuc_func *f;
|
cuc_func *f;
|
assert (f = dup_func (rf));
|
assert (f = dup_func (rf));
|
|
|
if (cuc_debug >= 2) print_cuc_bb (f, "BEFORE_GENERATE");
|
if (cuc_debug >= 2) print_cuc_bb (f, "BEFORE_GENERATE");
|
log ("Generating function %s.\n", name);
|
log ("Generating function %s.\n", name);
|
PRINTF ("Generating function %s.\n", name);
|
PRINTF ("Generating function %s.\n", name);
|
|
|
format_func_options (tmp, rf);
|
format_func_options (tmp, rf);
|
if (strlen (tmp)) PRINTF ("Applying options: %s\n", tmp);
|
if (strlen (tmp)) PRINTF ("Applying options: %s\n", tmp);
|
else PRINTF ("Using basic options.\n");
|
else PRINTF ("Using basic options.\n");
|
|
|
/* Generate function as specified by options */
|
/* Generate function as specified by options */
|
for (b = 0; b < f->num_bb; b++) {
|
for (b = 0; b < f->num_bb; b++) {
|
cuc_timings *st;
|
cuc_timings *st;
|
if (rf->bb[b].selected_tim < 0) continue;
|
if (rf->bb[b].selected_tim < 0) continue;
|
st = &rf->bb[b].tim[rf->bb[b].selected_tim];
|
st = &rf->bb[b].tim[rf->bb[b].selected_tim];
|
sprintf (tmp, "%s.bin.bb", name);
|
sprintf (tmp, "%s.bin.bb", name);
|
preunroll_bb (&tmp[0], f, &tt, b, st->preroll, st->unroll);
|
preunroll_bb (&tmp[0], f, &tt, b, st->preroll, st->unroll);
|
if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_PREUNROLL");
|
if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_PREUNROLL");
|
}
|
}
|
for (b = 0; b < f->num_bb; b++) {
|
for (b = 0; b < f->num_bb; b++) {
|
cuc_timings *st;
|
cuc_timings *st;
|
if (rf->bb[b].selected_tim < 0) continue;
|
if (rf->bb[b].selected_tim < 0) continue;
|
st = &rf->bb[b].tim[rf->bb[b].selected_tim];
|
st = &rf->bb[b].tim[rf->bb[b].selected_tim];
|
if (!st->nshared) continue;
|
if (!st->nshared) continue;
|
assert (0);
|
assert (0);
|
//csm_gen (f, rf, st->nshared, st->shared);
|
//csm_gen (f, rf, st->nshared, st->shared);
|
}
|
}
|
add_latches (f);
|
add_latches (f);
|
if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_LATCHES");
|
if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_LATCHES");
|
analyse_timings (f, &tt);
|
analyse_timings (f, &tt);
|
|
|
sprintf (tmp, "%s%s", cut_filename, name);
|
sprintf (tmp, "%s%s", cut_filename, name);
|
output_verilog (f, tmp, name);
|
output_verilog (f, tmp, name);
|
return f;
|
return f;
|
}
|
}
|
|
|
/* Calculates required time, based on selected options */
|
/* Calculates required time, based on selected options */
|
int calc_cycles (cuc_func *f)
|
int calc_cycles (cuc_func *f)
|
{
|
{
|
int b, ntime = f->timings.new_time;
|
int b, ntime = f->timings.new_time;
|
for (b = 0; b < f->num_bb; b++)
|
for (b = 0; b < f->num_bb; b++)
|
if (f->bb[b].selected_tim >= 0) {
|
if (f->bb[b].selected_tim >= 0) {
|
assert (f->bb[b].selected_tim < f->bb[b].ntim);
|
assert (f->bb[b].selected_tim < f->bb[b].ntim);
|
ntime += f->bb[b].tim[f->bb[b].selected_tim].new_time - f->timings.new_time;
|
ntime += f->bb[b].tim[f->bb[b].selected_tim].new_time - f->timings.new_time;
|
}
|
}
|
return ntime;
|
return ntime;
|
}
|
}
|
|
|
/* Calculates required size, based on selected options */
|
/* Calculates required size, based on selected options */
|
double calc_size (cuc_func *f)
|
double calc_size (cuc_func *f)
|
{
|
{
|
int b;
|
int b;
|
double size = f->timings.size;
|
double size = f->timings.size;
|
for (b = 0; b < f->num_bb; b++)
|
for (b = 0; b < f->num_bb; b++)
|
if (f->bb[b].selected_tim >= 0) {
|
if (f->bb[b].selected_tim >= 0) {
|
assert (f->bb[b].selected_tim < f->bb[b].ntim);
|
assert (f->bb[b].selected_tim < f->bb[b].ntim);
|
size += f->bb[b].tim[f->bb[b].selected_tim].size - f->timings.size;
|
size += f->bb[b].tim[f->bb[b].selected_tim].size - f->timings.size;
|
}
|
}
|
return size;
|
return size;
|
}
|
}
|
|
|
/* Dumps specified function to file (hex) */
|
/* Dumps specified function to file (hex) */
|
unsigned long extract_function (char *out_fn, unsigned long start_addr)
|
unsigned long extract_function (char *out_fn, unsigned long start_addr)
|
{
|
{
|
FILE *fo;
|
FILE *fo;
|
unsigned long a = start_addr;
|
unsigned long a = start_addr;
|
int x = 0;
|
int x = 0;
|
assert (fo = fopen (out_fn, "wt+"));
|
assert (fo = fopen (out_fn, "wt+"));
|
|
|
do {
|
do {
|
unsigned long d = evalsim_mem32 (a);
|
unsigned long d = evalsim_mem32 (a);
|
int index = insn_decode (d);
|
int index = insn_decode (d);
|
assert (index >= 0);
|
assert (index >= 0);
|
if (x) x++;
|
if (x) x++;
|
if (strcmp (insn_name (index), "l.jr") == 0) x = 1;
|
if (strcmp (insn_name (index), "l.jr") == 0) x = 1;
|
a += 4;
|
a += 4;
|
fprintf (fo, "%08lx\n", d);
|
fprintf (fo, "%08lx\n", d);
|
} while (x < 2);
|
} while (x < 2);
|
|
|
fclose (fo);
|
fclose (fo);
|
return a - 4;
|
return a - 4;
|
}
|
}
|
|
|
static cuc_func *func[MAX_FUNCS];
|
static cuc_func *func[MAX_FUNCS];
|
static int func_v[MAX_FUNCS];
|
static int func_v[MAX_FUNCS];
|
|
|
/* Detects function dependencies and removes */
|
/* Detects function dependencies and removes */
|
static void set_func_deps ()
|
static void set_func_deps ()
|
{
|
{
|
int f, b, i, j;
|
int f, b, i, j;
|
restart:
|
restart:
|
for (f = 0; f < prof_nfuncs - 1; f++) if (func[f]) {
|
for (f = 0; f < prof_nfuncs - 1; f++) if (func[f]) {
|
int fused[MAX_FUNCS] = {0};
|
int fused[MAX_FUNCS] = {0};
|
int c;
|
int c;
|
for (b = 0; b < func[f]->num_bb; b++)
|
for (b = 0; b < func[f]->num_bb; b++)
|
for (i = 0; i < func[f]->bb[b].ninsn; i++) {
|
for (i = 0; i < func[f]->bb[b].ninsn; i++) {
|
cuc_insn *ii = &func[f]->bb[b].insn[i];
|
cuc_insn *ii = &func[f]->bb[b].insn[i];
|
if (ii->index == II_CALL) {
|
if (ii->index == II_CALL) {
|
assert (ii->opt[0] == OPT_CONST);
|
assert (ii->opt[0] == OPT_CONST);
|
for (j = 0; j < prof_nfuncs - 1; j++)
|
for (j = 0; j < prof_nfuncs - 1; j++)
|
if (func[j] && func[j]->start_addr == ii->op[0]) break;
|
if (func[j] && func[j]->start_addr == ii->op[0]) break;
|
if (j >= prof_nfuncs - 1) {
|
if (j >= prof_nfuncs - 1) {
|
log ("%s is calling unknown function, address %08lx\n",
|
log ("%s is calling unknown function, address %08lx\n",
|
prof_func[f].name, ii->op[0]);
|
prof_func[f].name, ii->op[0]);
|
debug (1, "%s is calling unknown function, address %08x\n",
|
debug (1, "%s is calling unknown function, address %08lx\n",
|
prof_func[f].name, ii->op[0]);
|
prof_func[f].name, ii->op[0]);
|
free_func (func[f]);
|
free_func (func[f]);
|
func[f] = NULL;
|
func[f] = NULL;
|
goto restart;
|
goto restart;
|
} else if (f == j) {
|
} else if (f == j) {
|
log ("%s is recursive, ignoring\n", prof_func[f].name);
|
log ("%s is recursive, ignoring\n", prof_func[f].name);
|
debug (1, "%s is recursive, ignoring\n", prof_func[f].name);
|
debug (1, "%s is recursive, ignoring\n", prof_func[f].name);
|
free_func (func[f]);
|
free_func (func[f]);
|
func[f] = NULL;
|
func[f] = NULL;
|
goto restart;
|
goto restart;
|
} else fused[j]++;
|
} else fused[j]++;
|
}
|
}
|
}
|
}
|
for (i = 0; i < MAX_FUNCS; i++) if (fused[i]) c++;
|
for (i = 0; i < MAX_FUNCS; i++) if (fused[i]) c++;
|
if (func[f]->nfdeps) free (func[f]->fdeps);
|
if (func[f]->nfdeps) free (func[f]->fdeps);
|
func[f]->nfdeps = c;
|
func[f]->nfdeps = c;
|
func[f]->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * c);
|
func[f]->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * c);
|
for (i = 0, j = 0; i < MAX_FUNCS; i++)
|
for (i = 0, j = 0; i < MAX_FUNCS; i++)
|
if (fused[i]) func[f]->fdeps[j++] = func[i];
|
if (fused[i]) func[f]->fdeps[j++] = func[i];
|
}
|
}
|
|
|
/* Detect loops */
|
/* Detect loops */
|
{
|
{
|
int change;
|
int change;
|
for (f = 0; f < MAX_FUNCS; f++) if (func[f]) func[f]->tmp = 0;
|
for (f = 0; f < MAX_FUNCS; f++) if (func[f]) func[f]->tmp = 0;
|
do {
|
do {
|
change = 0;
|
change = 0;
|
for (f = 0; f < MAX_FUNCS; f++) if (func[f] && !func[f]->tmp) {
|
for (f = 0; f < MAX_FUNCS; f++) if (func[f] && !func[f]->tmp) {
|
int o = 1;
|
int o = 1;
|
for (i = 0; i < func[f]->nfdeps; i++)
|
for (i = 0; i < func[f]->nfdeps; i++)
|
if (!func[f]->fdeps[i]->tmp) {o = 0; break;}
|
if (!func[f]->fdeps[i]->tmp) {o = 0; break;}
|
if (o) {
|
if (o) {
|
func[f]->tmp = 1;
|
func[f]->tmp = 1;
|
change = 1;
|
change = 1;
|
}
|
}
|
}
|
}
|
} while (change);
|
} while (change);
|
|
|
change = 0;
|
change = 0;
|
for (f = 0; f < MAX_FUNCS; f++) if (func[f] && !func[f]->tmp) {
|
for (f = 0; f < MAX_FUNCS; f++) if (func[f] && !func[f]->tmp) {
|
free_func (func[f]);
|
free_func (func[f]);
|
func[f] = NULL;
|
func[f] = NULL;
|
change = 1;
|
change = 1;
|
}
|
}
|
if (change) goto restart;
|
if (change) goto restart;
|
}
|
}
|
}
|
}
|
|
|
void main_cuc (char *filename)
|
void main_cuc (char *filename)
|
{
|
{
|
int i, j;
|
int i, j;
|
char tmp1[256];
|
char tmp1[256];
|
char filename_cut[256];
|
char filename_cut[256];
|
#if 0 /* Select prefix, based on binary program name */
|
#if 0 /* Select prefix, based on binary program name */
|
for (i = 0; i < sizeof (filename_cut); i++) {
|
for (i = 0; i < sizeof (filename_cut); i++) {
|
if (isalpha(filename[i])) filename_cut[i] = filename[i];
|
if (isalpha(filename[i])) filename_cut[i] = filename[i];
|
else {
|
else {
|
filename_cut[i] = '\0';
|
filename_cut[i] = '\0';
|
break;
|
break;
|
}
|
}
|
}
|
}
|
#else
|
#else
|
strcpy (filename_cut, "cu");
|
strcpy (filename_cut, "cu");
|
#endif
|
#endif
|
|
|
PRINTF ("Entering OpenRISC Custom Unit Compiler command prompt\n");
|
PRINTF ("Entering OpenRISC Custom Unit Compiler command prompt\n");
|
PRINTF ("Using profile file \"%s\" and memory profile file \"%s\".\n", config.sim.prof_fn, config.sim.mprof_fn);
|
PRINTF ("Using profile file \"%s\" and memory profile file \"%s\".\n", config.sim.prof_fn, config.sim.mprof_fn);
|
sprintf (tmp1, "%s.log", filename_cut);
|
sprintf (tmp1, "%s.log", filename_cut);
|
PRINTF ("Analyzing. (log file \"%s\").\n", tmp1);
|
PRINTF ("Analyzing. (log file \"%s\").\n", tmp1);
|
assert (flog = fopen (tmp1, "wt+"));
|
assert (flog = fopen (tmp1, "wt+"));
|
|
|
/* Loads in the specified timings table */
|
/* Loads in the specified timings table */
|
PRINTF ("Using timings from \"%s\" at %s\n",config.cuc.timings_fn,
|
PRINTF ("Using timings from \"%s\" at %s\n",config.cuc.timings_fn,
|
generate_time_pretty (tmp1, config.sim.clkcycle_ps));
|
generate_time_pretty (tmp1, config.sim.clkcycle_ps));
|
load_timing_table (config.cuc.timings_fn);
|
load_timing_table (config.cuc.timings_fn);
|
runtime.cuc.cycle_duration = 1000. * config.sim.clkcycle_ps;
|
runtime.cuc.cycle_duration = 1000. * config.sim.clkcycle_ps;
|
PRINTF ("Multicycle logic %s, bursts %s, %s memory order.\n",
|
PRINTF ("Multicycle logic %s, bursts %s, %s memory order.\n",
|
config.cuc.no_multicycle ? "OFF" : "ON", config.cuc.enable_bursts ? "ON" : "OFF",
|
config.cuc.no_multicycle ? "OFF" : "ON", config.cuc.enable_bursts ? "ON" : "OFF",
|
config.cuc.memory_order == MO_NONE ? "no" : config.cuc.memory_order == MO_WEAK ? "weak" :
|
config.cuc.memory_order == MO_NONE ? "no" : config.cuc.memory_order == MO_WEAK ? "weak" :
|
config.cuc.memory_order == MO_STRONG ? "strong" : "exact");
|
config.cuc.memory_order == MO_STRONG ? "strong" : "exact");
|
|
|
prof_set (1, 0);
|
prof_set (1, 0);
|
assert (prof_acquire (config.sim.prof_fn) == 0);
|
assert (prof_acquire (config.sim.prof_fn) == 0);
|
|
|
if (config.cuc.calling_convention)
|
if (config.cuc.calling_convention)
|
PRINTF ("Assuming OpenRISC standard calling convention.\n");
|
PRINTF ("Assuming OpenRISC standard calling convention.\n");
|
|
|
/* Try all functions except "total" */
|
/* Try all functions except "total" */
|
for (i = 0; i < prof_nfuncs - 1; i++) {
|
for (i = 0; i < prof_nfuncs - 1; i++) {
|
long orig_time;
|
long orig_time;
|
unsigned long start_addr, end_addr;
|
unsigned long start_addr, end_addr;
|
orig_time = prof_func[i].cum_cycles;
|
orig_time = prof_func[i].cum_cycles;
|
start_addr = prof_func[i].addr;
|
start_addr = prof_func[i].addr;
|
|
|
/* Extract the function from the binary */
|
/* Extract the function from the binary */
|
sprintf (tmp1, "%s.bin", prof_func[i].name);
|
sprintf (tmp1, "%s.bin", prof_func[i].name);
|
end_addr = extract_function (tmp1, start_addr);
|
end_addr = extract_function (tmp1, start_addr);
|
|
|
log ("Testing function %s (%08lx - %08lx)\n", prof_func[i].name, start_addr,
|
log ("Testing function %s (%08lx - %08lx)\n", prof_func[i].name, start_addr,
|
end_addr);
|
end_addr);
|
PRINTF ("Testing function %s (%08lx - %08lx)\n", prof_func[i].name,
|
PRINTF ("Testing function %s (%08lx - %08lx)\n", prof_func[i].name,
|
start_addr, end_addr);
|
start_addr, end_addr);
|
func[i] = analyse_function (prof_func[i].name, orig_time, start_addr,
|
func[i] = analyse_function (prof_func[i].name, orig_time, start_addr,
|
end_addr, config.cuc.memory_order, prof_func[i].calls);
|
end_addr, config.cuc.memory_order, prof_func[i].calls);
|
func_v[i] = 0;
|
func_v[i] = 0;
|
}
|
}
|
set_func_deps ();
|
set_func_deps ();
|
|
|
while (1) {
|
while (1) {
|
char *s;
|
char *s;
|
wait_command:
|
wait_command:
|
PRINTF ("(cuc) ");
|
PRINTF ("(cuc) ");
|
fflush (stdout);
|
fflush (stdout);
|
wait_command_empty:
|
wait_command_empty:
|
s = fgets(tmp1, sizeof tmp1, stdin);
|
s = fgets(tmp1, sizeof tmp1, stdin);
|
usleep (100);
|
usleep (100);
|
if (!s) goto wait_command_empty;
|
if (!s) goto wait_command_empty;
|
for (s = tmp1; *s != '\0' && *s != '\n' && *s != '\r'; s++);
|
for (s = tmp1; *s != '\0' && *s != '\n' && *s != '\r'; s++);
|
*s = '\0';
|
*s = '\0';
|
|
|
/* quit command */
|
/* quit command */
|
if (strcmp (tmp1, "q") == 0 || strcmp (tmp1, "quit") == 0) {
|
if (strcmp (tmp1, "q") == 0 || strcmp (tmp1, "quit") == 0) {
|
/* Delete temporary files */
|
/* Delete temporary files */
|
for (i = 0; i < prof_nfuncs - 1; i++) {
|
for (i = 0; i < prof_nfuncs - 1; i++) {
|
sprintf (tmp1, "%s.bin", prof_func[i].name);
|
sprintf (tmp1, "%s.bin", prof_func[i].name);
|
log ("Deleting temporary file %s %s\n", tmp1, remove (tmp1) ? "FAILED" : "OK");
|
log ("Deleting temporary file %s %s\n", tmp1, remove (tmp1) ? "FAILED" : "OK");
|
sprintf (tmp1, "%s.bin.bb", prof_func[i].name);
|
sprintf (tmp1, "%s.bin.bb", prof_func[i].name);
|
log ("Deleting temporary file %s %s\n", tmp1, remove (tmp1) ? "FAILED" : "OK");
|
log ("Deleting temporary file %s %s\n", tmp1, remove (tmp1) ? "FAILED" : "OK");
|
}
|
}
|
break;
|
break;
|
|
|
/* profile command */
|
/* profile command */
|
} else if (strcmp (tmp1, "p") == 0 || strcmp (tmp1, "profile") == 0) {
|
} else if (strcmp (tmp1, "p") == 0 || strcmp (tmp1, "profile") == 0) {
|
int ntime = 0;
|
int ntime = 0;
|
int size = 0;
|
int size = 0;
|
PRINTF ("-----------------------------------------------------------------------------\n");
|
PRINTF ("-----------------------------------------------------------------------------\n");
|
PRINTF ("|function name |calls|avg cycles |old%%| max. f. | impr. f.| options |\n");
|
PRINTF ("|function name |calls|avg cycles |old%%| max. f. | impr. f.| options |\n");
|
PRINTF ("|--------------------+-----+------------+----+----------|---------+---------|\n");
|
PRINTF ("|--------------------+-----+------------+----+----------|---------+---------|\n");
|
for (j = 0; j < prof_nfuncs; j++) {
|
for (j = 0; j < prof_nfuncs; j++) {
|
int bestcyc = 0, besti = 0;
|
int bestcyc = 0, besti = 0;
|
char tmp[100];
|
char tmp[100];
|
for (i = 0; i < prof_nfuncs; i++)
|
for (i = 0; i < prof_nfuncs; i++)
|
if (prof_func[i].cum_cycles > bestcyc) {
|
if (prof_func[i].cum_cycles > bestcyc) {
|
bestcyc = prof_func[i].cum_cycles;
|
bestcyc = prof_func[i].cum_cycles;
|
besti = i;
|
besti = i;
|
}
|
}
|
i = besti;
|
i = besti;
|
PRINTF ("|%-20s|%5li|%12.1f|%3.0f%%| ",
|
PRINTF ("|%-20s|%5li|%12.1f|%3.0f%%| ",
|
strstrip (tmp, prof_func[i].name, 20), prof_func[i].calls,
|
strstrip (tmp, prof_func[i].name, 20), prof_func[i].calls,
|
((double)prof_func[i].cum_cycles / prof_func[i].calls),
|
((double)prof_func[i].cum_cycles / prof_func[i].calls),
|
(100. * prof_func[i].cum_cycles / prof_cycles));
|
(100. * prof_func[i].cum_cycles / prof_cycles));
|
if (func[i]) {
|
if (func[i]) {
|
double f = 1.0;
|
double f = 1.0;
|
if (func_v[i]) {
|
if (func_v[i]) {
|
int nt = calc_cycles (func[i]);
|
int nt = calc_cycles (func[i]);
|
int s = calc_size (func[i]);
|
int s = calc_size (func[i]);
|
f = 1. * func[i]->orig_time / nt;
|
f = 1. * func[i]->orig_time / nt;
|
ntime += nt;
|
ntime += nt;
|
size += s;
|
size += s;
|
} else ntime += prof_func[i].cum_cycles;
|
} else ntime += prof_func[i].cum_cycles;
|
PRINTF ("%8.1f |%8.1f | %-8s|\n", 1.f * prof_func[i].cum_cycles
|
PRINTF ("%8.1f |%8.1f | %-8s|\n", 1.f * prof_func[i].cum_cycles
|
/ func[i]->timings.new_time, f, format_func_options (tmp, func[i]));
|
/ func[i]->timings.new_time, f, format_func_options (tmp, func[i]));
|
} else {
|
} else {
|
PRINTF (" N/A | N/A | N/A |\n");
|
PRINTF (" N/A | N/A | N/A |\n");
|
ntime += prof_func[i].cum_cycles;
|
ntime += prof_func[i].cum_cycles;
|
}
|
}
|
prof_func[i].cum_cycles = -prof_func[i].cum_cycles;
|
prof_func[i].cum_cycles = -prof_func[i].cum_cycles;
|
}
|
}
|
for (i = 0; i < prof_nfuncs; i++)
|
for (i = 0; i < prof_nfuncs; i++)
|
prof_func[i].cum_cycles = -prof_func[i].cum_cycles;
|
prof_func[i].cum_cycles = -prof_func[i].cum_cycles;
|
PRINTF ("-----------------------------------------------------------------------------\n");
|
PRINTF ("-----------------------------------------------------------------------------\n");
|
PRINTF ("Total %i cycles (was %i), total added gates = %i. Speed factor %.1f\n",
|
PRINTF ("Total %i cycles (was %i), total added gates = %i. Speed factor %.1f\n",
|
ntime, prof_cycles, size, 1. * prof_cycles / ntime);
|
ntime, prof_cycles, size, 1. * prof_cycles / ntime);
|
|
|
/* debug command */
|
/* debug command */
|
} else if (strncmp (tmp1, "d", 1) == 0 || strncmp (tmp1, "debug", 5) == 0) {
|
} else if (strncmp (tmp1, "d", 1) == 0 || strncmp (tmp1, "debug", 5) == 0) {
|
sscanf (tmp1, "%*s %i", &cuc_debug);
|
sscanf (tmp1, "%*s %i", &cuc_debug);
|
if (cuc_debug < 0) cuc_debug = 0;
|
if (cuc_debug < 0) cuc_debug = 0;
|
if (cuc_debug > 9) cuc_debug = 9;
|
if (cuc_debug > 9) cuc_debug = 9;
|
|
|
/* generate command */
|
/* generate command */
|
} else if (strcmp (tmp1, "g") == 0 || strcmp (tmp1, "generate") == 0) {
|
} else if (strcmp (tmp1, "g") == 0 || strcmp (tmp1, "generate") == 0) {
|
/* check for function dependencies */
|
/* check for function dependencies */
|
for (i = 0; i < prof_nfuncs; i++)
|
for (i = 0; i < prof_nfuncs; i++)
|
if (func[i]) func[i]->tmp = func_v[i];
|
if (func[i]) func[i]->tmp = func_v[i];
|
for (i = 0; i < prof_nfuncs; i++) if (func[i])
|
for (i = 0; i < prof_nfuncs; i++) if (func[i])
|
for (j = 0; j < func[i]->nfdeps; j++)
|
for (j = 0; j < func[i]->nfdeps; j++)
|
if (!func[i]->fdeps[j] || !func[i]->fdeps[j]->tmp) {
|
if (!func[i]->fdeps[j] || !func[i]->fdeps[j]->tmp) {
|
PRINTF ("Function %s must be selected for translation (required by %s)\n",
|
PRINTF ("Function %s must be selected for translation (required by %s)\n",
|
prof_func[j].name, prof_func[i].name);
|
prof_func[j].name, prof_func[i].name);
|
goto wait_command;
|
goto wait_command;
|
}
|
}
|
for (i = 0; i < prof_nfuncs; i++)
|
for (i = 0; i < prof_nfuncs; i++)
|
if (func[i] && func_v[i]) generate_function (func[i], prof_func[i].name, filename_cut);
|
if (func[i] && func_v[i]) generate_function (func[i], prof_func[i].name, filename_cut);
|
generate_main (prof_nfuncs, func, filename_cut);
|
generate_main (prof_nfuncs, func, filename_cut);
|
|
|
/* list command */
|
/* list command */
|
} else if (strcmp (tmp1, "l") == 0 || strcmp (tmp1, "list") == 0) {
|
} else if (strcmp (tmp1, "l") == 0 || strcmp (tmp1, "list") == 0) {
|
/* check for function dependencies */
|
/* check for function dependencies */
|
for (i = 0; i < prof_nfuncs; i++)
|
for (i = 0; i < prof_nfuncs; i++)
|
if (func_v[i]) {
|
if (func_v[i]) {
|
PRINTF ("%s\n", prof_func[j].name);
|
PRINTF ("%s\n", prof_func[j].name);
|
}
|
}
|
|
|
/* selectall command */
|
/* selectall command */
|
} else if (strcmp (tmp1, "sa") == 0 || strcmp (tmp1, "selectall") == 0) {
|
} else if (strcmp (tmp1, "sa") == 0 || strcmp (tmp1, "selectall") == 0) {
|
int f;
|
int f;
|
for (f = 0; f < prof_nfuncs; f++) if (func[f]) {
|
for (f = 0; f < prof_nfuncs; f++) if (func[f]) {
|
func_v[f] = 1;
|
func_v[f] = 1;
|
PRINTF ("Function %s selected for translation.\n", prof_func[f].name);
|
PRINTF ("Function %s selected for translation.\n", prof_func[f].name);
|
}
|
}
|
|
|
/* select command */
|
/* select command */
|
} else if (strncmp (tmp1, "s", 1) == 0 || strncmp (tmp1, "select", 6) == 0) {
|
} else if (strncmp (tmp1, "s", 1) == 0 || strncmp (tmp1, "select", 6) == 0) {
|
char tmp[50], ch;
|
char tmp[50], ch;
|
int p, o, b, f;
|
int p, o, b, f;
|
p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch);
|
p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch);
|
if (p < 1) PRINTF ("Invalid parameters.\n");
|
if (p < 1) PRINTF ("Invalid parameters.\n");
|
else {
|
else {
|
/* Check if we have valid option */
|
/* Check if we have valid option */
|
for (f = 0; f < prof_nfuncs; f++)
|
for (f = 0; f < prof_nfuncs; f++)
|
if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break;
|
if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break;
|
if (f < prof_nfuncs) {
|
if (f < prof_nfuncs) {
|
if (p == 1) {
|
if (p == 1) {
|
if (func[f]) {
|
if (func[f]) {
|
func_v[f] = 1;
|
func_v[f] = 1;
|
PRINTF ("Function %s selected for translation.\n", prof_func[f].name);
|
PRINTF ("Function %s selected for translation.\n", prof_func[f].name);
|
} else PRINTF ("Function %s not suitable for translation.\n", prof_func[f].name);
|
} else PRINTF ("Function %s not suitable for translation.\n", prof_func[f].name);
|
} else {
|
} else {
|
if (!func_v[f])
|
if (!func_v[f])
|
PRINTF ("Function %s not yet selected for translation.\n", prof_func[f].name);
|
PRINTF ("Function %s not yet selected for translation.\n", prof_func[f].name);
|
if (p < 3) goto invalid_option;
|
if (p < 3) goto invalid_option;
|
for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++);
|
for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++);
|
if (!option_char[o]) goto invalid_option;
|
if (!option_char[o]) goto invalid_option;
|
if (b < 0 || b >= func[f]->num_bb) goto invalid_option;
|
if (b < 0 || b >= func[f]->num_bb) goto invalid_option;
|
if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option;
|
if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option;
|
|
|
/* select an option */
|
/* select an option */
|
func[f]->bb[b].selected_tim = o;
|
func[f]->bb[b].selected_tim = o;
|
if (func[f]->bb[b].tim[o].nshared) {
|
if (func[f]->bb[b].tim[o].nshared) {
|
PRINTF ("Option has shared instructions: ");
|
PRINTF ("Option has shared instructions: ");
|
print_shared (func[f], func[f]->bb[b].tim[o].shared, func[f]->bb[b].tim[o].nshared);
|
print_shared (func[f], func[f]->bb[b].tim[o].shared, func[f]->bb[b].tim[o].nshared);
|
PRINTF ("\n");
|
PRINTF ("\n");
|
}
|
}
|
goto wait_command;
|
goto wait_command;
|
invalid_option:
|
invalid_option:
|
PRINTF ("Invalid option.\n");
|
PRINTF ("Invalid option.\n");
|
}
|
}
|
} else PRINTF ("Invalid function.\n");
|
} else PRINTF ("Invalid function.\n");
|
}
|
}
|
|
|
/* unselect command */
|
/* unselect command */
|
} else if (strncmp (tmp1, "u", 1) == 0 || strncmp (tmp1, "unselect", 8) == 0) {
|
} else if (strncmp (tmp1, "u", 1) == 0 || strncmp (tmp1, "unselect", 8) == 0) {
|
char tmp[50], ch;
|
char tmp[50], ch;
|
int p, o, b, f;
|
int p, o, b, f;
|
p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch);
|
p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch);
|
if (p < 1) PRINTF ("Invalid parameters.\n");
|
if (p < 1) PRINTF ("Invalid parameters.\n");
|
else {
|
else {
|
/* Check if we have valid option */
|
/* Check if we have valid option */
|
for (f = 0; f < prof_nfuncs; f++)
|
for (f = 0; f < prof_nfuncs; f++)
|
if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break;
|
if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break;
|
if (f < prof_nfuncs) {
|
if (f < prof_nfuncs) {
|
if (p == 1) {
|
if (p == 1) {
|
if (func[f]) {
|
if (func[f]) {
|
func_v[f] = 0;
|
func_v[f] = 0;
|
PRINTF ("Function %s unselected for translation.\n", prof_func[f].name);
|
PRINTF ("Function %s unselected for translation.\n", prof_func[f].name);
|
} else PRINTF ("Function %s not suitable for translation.\n", prof_func[f].name);
|
} else PRINTF ("Function %s not suitable for translation.\n", prof_func[f].name);
|
} else {
|
} else {
|
if (p < 3) goto invalid_option;
|
if (p < 3) goto invalid_option;
|
for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++);
|
for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++);
|
if (!option_char[o]) goto invalid_option;
|
if (!option_char[o]) goto invalid_option;
|
if (b < 0 || b >= func[f]->num_bb) goto invalid_option;
|
if (b < 0 || b >= func[f]->num_bb) goto invalid_option;
|
if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option;
|
if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option;
|
|
|
/* select an option */
|
/* select an option */
|
func[f]->bb[b].selected_tim = -1;
|
func[f]->bb[b].selected_tim = -1;
|
}
|
}
|
} else PRINTF ("Invalid function.\n");
|
} else PRINTF ("Invalid function.\n");
|
}
|
}
|
|
|
/* options command */
|
/* options command */
|
} else if (strcmp (tmp1, "o") == 0 || strcmp (tmp1, "options") == 0) {
|
} else if (strcmp (tmp1, "o") == 0 || strcmp (tmp1, "options") == 0) {
|
int any = 0;
|
int any = 0;
|
PRINTF ("Available options:\n");
|
PRINTF ("Available options:\n");
|
for (i = 0; i < prof_nfuncs; i++)
|
for (i = 0; i < prof_nfuncs; i++)
|
if (func[i]) {
|
if (func[i]) {
|
options_cmd (i, func[i]);
|
options_cmd (i, func[i]);
|
any = 1;
|
any = 1;
|
}
|
}
|
if (any) PRINTF ("-----------------------------------------------------------------------------\n");
|
if (any) PRINTF ("-----------------------------------------------------------------------------\n");
|
else PRINTF ("Sorry. No available options.\n");
|
else PRINTF ("Sorry. No available options.\n");
|
|
|
/* Ignore empty string */
|
/* Ignore empty string */
|
} else if (strcmp (tmp1, "") == 0) {
|
} else if (strcmp (tmp1, "") == 0) {
|
|
|
/* help command */
|
/* help command */
|
} else {
|
} else {
|
if (strcmp (tmp1, "h") != 0 && strcmp (tmp1, "help") != 0)
|
if (strcmp (tmp1, "h") != 0 && strcmp (tmp1, "help") != 0)
|
PRINTF ("Unknown command.\n");
|
PRINTF ("Unknown command.\n");
|
PRINTF ("OpenRISC Custom Unit Compiler command prompt\n");
|
PRINTF ("OpenRISC Custom Unit Compiler command prompt\n");
|
PRINTF ("Available commands:\n");
|
PRINTF ("Available commands:\n");
|
PRINTF (" h | help displays this help\n");
|
PRINTF (" h | help displays this help\n");
|
PRINTF (" q | quit returns to or1ksim prompt\n");
|
PRINTF (" q | quit returns to or1ksim prompt\n");
|
PRINTF (" p | profile displays function profiling\n");
|
PRINTF (" p | profile displays function profiling\n");
|
PRINTF (" d | debug # sets debug level (0-9)\n");
|
PRINTF (" d | debug # sets debug level (0-9)\n");
|
PRINTF (" o | options displays available options\n");
|
PRINTF (" o | options displays available options\n");
|
PRINTF (" s | select func [option] selects an option/function\n");
|
PRINTF (" s | select func [option] selects an option/function\n");
|
PRINTF (" u | unselect func [option] unselects an option/function\n");
|
PRINTF (" u | unselect func [option] unselects an option/function\n");
|
PRINTF (" g | generate generates verilog file\n");
|
PRINTF (" g | generate generates verilog file\n");
|
PRINTF (" l | list displays selected functions\n");
|
PRINTF (" l | list displays selected functions\n");
|
}
|
}
|
}
|
}
|
|
|
/* Dispose memory */
|
/* Dispose memory */
|
for (i = 0; i < prof_nfuncs -1; i++)
|
for (i = 0; i < prof_nfuncs -1; i++)
|
if (func[i]) free_func (func[i]);
|
if (func[i]) free_func (func[i]);
|
|
|
fclose (flog);
|
fclose (flog);
|
}
|
}
|
|
|
|
|