Line 25... |
Line 25... |
#include <assert.h>
|
#include <assert.h>
|
#include "sim-config.h"
|
#include "sim-config.h"
|
#include "cuc.h"
|
#include "cuc.h"
|
#include "insn.h"
|
#include "insn.h"
|
#include "profiler.h"
|
#include "profiler.h"
|
|
#include "opcode/or32.h"
|
|
|
FILE *flog;
|
FILE *flog;
|
|
int cuc_debug = 0;
|
|
|
/* Last used registers by software convention */
|
/* Last used registers by software convention */
|
const int call_saved[MAX_REGS] = {
|
const int call_saved[MAX_REGS] = {
|
0, 0, 0, 1, 1, 1, 1, 1,
|
0, 0, 0, 1, 1, 1, 1, 1,
|
1, 1, 0, 1, 0, 1, 0, 1,
|
1, 1, 0, 1, 0, 1, 0, 1,
|
Line 228... |
Line 230... |
ii->opt[1] = ii->opt[2];
|
ii->opt[1] = ii->opt[2];
|
ii->opt[2] = t;
|
ii->opt[2] = t;
|
t = ii->op[1];
|
t = ii->op[1];
|
ii->op[1] = ii->op[2];
|
ii->op[1] = ii->op[2];
|
ii->op[2] = t;
|
ii->op[2] = t;
|
modified = 1; debug (2, "%08x:<>\n", REF(b, i));
|
modified = 1; cucdebug (2, "%08x:<>\n", REF(b, i));
|
}
|
}
|
|
|
/* Try to do the promotion */
|
/* Try to do the promotion */
|
/* We have two consecutive expressions, containing constants,
|
/* We have two consecutive expressions, containing constants,
|
* if previous is a simple expression we can handle it simply: */
|
* if previous is a simple expression we can handle it simply: */
|
Line 243... |
Line 245... |
&& f->INSN(ii->op[j]).opt[2] & OPT_CONST
|
&& f->INSN(ii->op[j]).opt[2] & OPT_CONST
|
&& f->INSN(ii->op[j]).op[2] == 0
|
&& f->INSN(ii->op[j]).op[2] == 0
|
&& !(ii->type & IT_MEMORY && t->type & IT_MEMADD)
|
&& !(ii->type & IT_MEMORY && t->type & IT_MEMADD)
|
&& !(ii->type & IT_BRANCH) && !(t->type & IT_COND)) {
|
&& !(ii->type & IT_BRANCH) && !(t->type & IT_COND)) {
|
/* do not promote through add-mem, and branches */
|
/* do not promote through add-mem, and branches */
|
modified = 1; debug (2, "%8x:promote%i %8x %8x\n", REF (b, i), j, ii->op[j], t->op[1]);
|
modified = 1; cucdebug (2, "%8x:promote%i %8x %8x\n", REF (b, i), j, ii->op[j], t->op[1]);
|
ii->op[j] = t->op[1]; ii->opt[j] = t->opt[1];
|
ii->op[j] = t->op[1]; ii->opt[j] = t->opt[1];
|
}
|
}
|
}
|
}
|
|
|
/* In case of x = cmov x, y; or x = cmov y, x; we have
|
/* In case of x = cmov x, y; or x = cmov y, x; we have
|
Line 257... |
Line 259... |
if ((ii->opt[1] & OPT_REF) && ii->op[1] == REF (b, i)) f = 1;
|
if ((ii->opt[1] & OPT_REF) && ii->op[1] == REF (b, i)) f = 1;
|
if ((ii->opt[2] & OPT_REF) && ii->op[2] == REF (b, i)) f = 2;
|
if ((ii->opt[2] & OPT_REF) && ii->op[2] == REF (b, i)) f = 2;
|
if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) f = 2;
|
if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) f = 2;
|
if (f) {
|
if (f) {
|
change_insn_type (ii, II_ADD);
|
change_insn_type (ii, II_ADD);
|
debug (2, "%8x:cmov %i\n", REF(b, i), f);
|
cucdebug (2, "%8x:cmov %i\n", REF(b, i), f);
|
ii->opt[f] = OPT_CONST;
|
ii->opt[f] = OPT_CONST;
|
ii->op[f] = 0;
|
ii->op[f] = 0;
|
ii->opt[3] = OPT_NONE;
|
ii->opt[3] = OPT_NONE;
|
modified = 1;
|
modified = 1;
|
continue;
|
continue;
|
Line 297... |
Line 299... |
if (ok) {
|
if (ok) {
|
change_insn_type (ii, II_ADD);
|
change_insn_type (ii, II_ADD);
|
ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
|
ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
|
ii->op[1] = value; ii->opt[1] = OPT_CONST;
|
ii->op[1] = value; ii->opt[1] = OPT_CONST;
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
modified = 1; debug (2, "%8x:const\n", REF (b, i));
|
modified = 1; cucdebug (2, "%8x:const\n", REF (b, i));
|
}
|
}
|
} else if (ii->opt[1] & OPT_REF) {
|
} else if (ii->opt[1] & OPT_REF) {
|
cuc_insn *prev = &f->INSN(ii->op[1]);
|
cuc_insn *prev = &f->INSN(ii->op[1]);
|
/* Is this just a link? */
|
/* Is this just a link? */
|
if (ii->index == II_ADD
|
if (ii->index == II_ADD
|
&& !(ii->type & IT_MEMADD) && ii->op[2] == 0) {
|
&& !(ii->type & IT_MEMADD) && ii->op[2] == 0) {
|
int b1, i1, j1;
|
int b1, i1, j1;
|
debug (2, "%8x:link %8x: ", REF(b, i), ii->op[1]);
|
cucdebug (2, "%8x:link %8x: ", REF(b, i), ii->op[1]);
|
for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD))
|
for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD))
|
for (i1 = 0; i1 < f->bb[b1].ninsn; i1++)
|
for (i1 = 0; i1 < f->bb[b1].ninsn; i1++)
|
for (j1 = 0; j1 < MAX_OPERANDS; j1++)
|
for (j1 = 0; j1 < MAX_OPERANDS; j1++)
|
if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF)
|
if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF)
|
&& f->bb[b1].insn[i1].op[j1] == REF(b, i)) {
|
&& f->bb[b1].insn[i1].op[j1] == REF(b, i)) {
|
debug (2, "%x ", REF (b1, i1));
|
cucdebug (2, "%x ", REF (b1, i1));
|
f->bb[b1].insn[i1].op[j1] = ii->op[1];
|
f->bb[b1].insn[i1].op[j1] = ii->op[1];
|
}
|
}
|
debug (2, "\n");
|
cucdebug (2, "\n");
|
change_insn_type (ii, II_NOP);
|
change_insn_type (ii, II_NOP);
|
} else if (prev->opt[2] & OPT_CONST) {
|
} else if (prev->opt[2] & OPT_CONST) {
|
/* Handle some common cases */
|
/* Handle some common cases */
|
/* add - add joining */
|
/* add - add joining */
|
if (ii->index == II_ADD && prev->index == II_ADD) {
|
if (ii->index == II_ADD && prev->index == II_ADD) {
|
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
|
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
|
ii->op[2] += prev->op[2];
|
ii->op[2] += prev->op[2];
|
modified = 1; debug (2, "%8x: add-add\n", REF(b, i));
|
modified = 1; cucdebug (2, "%8x: add-add\n", REF(b, i));
|
} else /* add - sub joining */
|
} else /* add - sub joining */
|
if (ii->index == II_ADD && prev->index == II_SUB) {
|
if (ii->index == II_ADD && prev->index == II_SUB) {
|
change_insn_type (&insn[i], II_SUB);
|
change_insn_type (&insn[i], II_SUB);
|
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
|
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
|
ii->op[2] += prev->op[2];
|
ii->op[2] += prev->op[2];
|
modified = 1; debug (2, "%8x: add-sub\n", REF(b, i));
|
modified = 1; cucdebug (2, "%8x: add-sub\n", REF(b, i));
|
} else /* sub - add joining */
|
} else /* sub - add joining */
|
if (ii->index == II_SUB && prev->index == II_ADD) {
|
if (ii->index == II_SUB && prev->index == II_ADD) {
|
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
|
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
|
ii->op[2] += prev->op[2];
|
ii->op[2] += prev->op[2];
|
modified = 1; debug (2, "%8x: sub-add\n", REF(b, i));
|
modified = 1; cucdebug (2, "%8x: sub-add\n", REF(b, i));
|
}
|
}
|
}
|
}
|
}
|
}
|
}
|
}
|
}
|
}
|
Line 360... |
Line 362... |
}
|
}
|
f->bb[b].ninsn = d;
|
f->bb[b].ninsn = d;
|
|
|
/* Relocate references from all basic blocks */
|
/* Relocate references from all basic blocks */
|
for (c = 0; c < f->num_bb; c++)
|
for (c = 0; c < f->num_bb; c++)
|
for (i = 0; i < f->bb[c].ninsn; i++)
|
for (i = 0; i < f->bb[c].ninsn; i++) {
|
|
dep_list *d = f->bb[c].insn[i].dep;
|
for (j = 0; j < MAX_OPERANDS; j++)
|
for (j = 0; j < MAX_OPERANDS; j++)
|
if ((f->bb[c].insn[i].opt[j] & OPT_REF)
|
if ((f->bb[c].insn[i].opt[j] & OPT_REF)
|
&& REF_BB(f->bb[c].insn[i].op[j]) == b)
|
&& REF_BB(f->bb[c].insn[i].op[j]) == b)
|
f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]);
|
f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]);
|
|
|
|
while (d) {
|
|
if (REF_BB(d->ref) == b) d->ref = REF (b, reloc[REF_I (d->ref)]);
|
|
d = d->next;
|
|
}
|
|
}
|
}
|
}
|
}
|
}
|
|
|
/* Remove unused assignments */
|
/* Remove unused assignments */
|
void remove_dead (cuc_func *f)
|
void remove_dead (cuc_func *f)
|
Line 412... |
Line 421... |
if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1;
|
if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1;
|
change_insn_type (&insn[i], II_NOP);
|
change_insn_type (&insn[i], II_NOP);
|
}
|
}
|
}
|
}
|
}
|
}
|
if (DEBUG > 2) {
|
if (cuc_debug >= 2) {
|
printf ("saved regs ");
|
printf ("saved regs ");
|
for (i = 0; i < MAX_REGS; i++) printf ("%i:%i ", i, f->saved_regs[i]);
|
for (i = 0; i < MAX_REGS; i++) printf ("%i:%i ", i, f->saved_regs[i]);
|
printf ("\n");
|
printf ("\n");
|
}
|
}
|
remove_nops (f);
|
remove_nops (f);
|
}
|
}
|
|
|
|
/* Determine inputs and outputs */
|
|
void set_io (cuc_func *f)
|
|
{
|
|
int b, i, j;
|
|
/* Determine register usage */
|
|
for (i = 0; i < MAX_REGS; i++) {
|
|
f->lur[i] = -1;
|
|
f->used_regs[i] = 0;
|
|
}
|
|
for (b = 0; b < f->num_bb; b++) {
|
|
for (i = 0; i < f->bb[b].ninsn; i++)
|
|
for (j = 0; j < MAX_OPERANDS; j++)
|
|
if (f->bb[b].insn[i].opt[j] & OPT_REGISTER && f->bb[b].insn[i].op[j] >= 0)
|
|
if (f->bb[b].insn[i].opt[j] & OPT_DEST) f->lur[f->bb[b].insn[i].op[j]] = REF (b, i);
|
|
else f->used_regs[f->bb[b].insn[i].op[j]] = 1;
|
|
}
|
|
}
|
|
|
/* relocate all accesses inside of BB b to back/fwd */
|
/* relocate all accesses inside of BB b to back/fwd */
|
static void relocate_bb (cuc_bb *bb, int b, int back, int fwd)
|
static void relocate_bb (cuc_bb *bb, int b, int back, int fwd)
|
{
|
{
|
int i, j;
|
int i, j;
|
for (i = 0; i < bb->ninsn; i++)
|
for (i = 0; i < bb->ninsn; i++)
|
Line 454... |
Line 481... |
}
|
}
|
|
|
/* Relocate */
|
/* Relocate */
|
for (b1 = 0; b1 < f->num_bb; b1++)
|
for (b1 = 0; b1 < f->num_bb; b1++)
|
for (i = 0; i < f->bb[b1].ninsn; i++) {
|
for (i = 0; i < f->bb[b1].ninsn; i++) {
|
|
dep_list *d = f->bb[b1].insn[i].dep;
|
for (j = 0; j < MAX_OPERANDS; j++)
|
for (j = 0; j < MAX_OPERANDS; j++)
|
if (f->bb[b1].insn[i].opt[j] & OPT_REF) {
|
if (f->bb[b1].insn[i].opt[j] & OPT_REF) {
|
int t = f->bb[b1].insn[i].op[j];
|
int t = f->bb[b1].insn[i].op[j];
|
if (REF_BB(t) == b && f->INSN(t).tmp != 0)
|
if (REF_BB(t) == b && f->INSN(t).tmp != 0)
|
f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t));
|
f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t));
|
}
|
}
|
|
while (d) {
|
|
if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0)
|
|
d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref));
|
|
d = d->next;
|
|
}
|
}
|
}
|
|
|
/* Delete unused instructions */
|
/* Delete unused instructions */
|
for (j = 0; j <= mg; j++) {
|
for (j = 0; j <= mg; j++) {
|
if (j == 0) b1 = b;
|
if (j == 0) b1 = b;
|
Line 562... |
Line 595... |
/* If destination instruction is latched, use register instead */
|
/* If destination instruction is latched, use register instead */
|
if (f->bb[b].insn[i].opt[j] == OPT_REF
|
if (f->bb[b].insn[i].opt[j] == OPT_REF
|
&& f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) {
|
&& f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) {
|
int b1, i1;
|
int b1, i1;
|
b1 = REF_BB (f->bb[b].insn[i].op[j]);
|
b1 = REF_BB (f->bb[b].insn[i].op[j]);
|
//debug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]);
|
//cucdebug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]);
|
if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) {
|
if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) {
|
for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) {
|
for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) {
|
assert (f->bb[b1].insn[i1].index == II_REG);
|
assert (f->bb[b1].insn[i1].index == II_REG);
|
if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) {
|
if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) {
|
f->bb[b].insn[i].op[j] = REF (b1, i1);
|
f->bb[b].insn[i].op[j] = REF (b1, i1);
|
Line 578... |
Line 611... |
}
|
}
|
|
|
cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j)
|
cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j)
|
{
|
{
|
cuc_func *func;
|
cuc_func *func;
|
debug (2, "BB%i unroll %i times preroll %i times\n", b, j, i);
|
cucdebug (2, "BB%i unroll %i times preroll %i times\n", b, j, i);
|
func = preunroll_loop (f, b, i, j, bb_filename);
|
func = preunroll_loop (f, b, i, j, bb_filename);
|
if (DEBUG > 2) print_cuc_bb (func, "AFTER_PREUNROLL");
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_PREUNROLL");
|
|
|
log ("Optimizing.\n");
|
log ("Optimizing.\n");
|
optimize_tree (func);
|
optimize_tree (func);
|
if (DEBUG > 6) //print_cuc_bb (func, "AFTER_OPT_TREE1");
|
if (cuc_debug >= 6) //print_cuc_bb (func, "AFTER_OPT_TREE1");
|
remove_nops (func);
|
remove_nops (func);
|
if (DEBUG > 6) //print_cuc_bb (func, "NO_NOPS");
|
if (cuc_debug >= 6) //print_cuc_bb (func, "NO_NOPS");
|
remove_dead (func);
|
remove_dead (func);
|
if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD1");
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD1");
|
optimize_bb (func);
|
optimize_bb (func);
|
if (DEBUG > 5) print_cuc_bb (func, "AFTER_OPT_BB");
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_OPT_BB");
|
remove_dead_bb (func);
|
remove_dead_bb (func);
|
if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD_BB");
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD_BB");
|
optimize_tree (func);
|
optimize_tree (func);
|
if (DEBUG > 3) print_cuc_bb (func, "AFTER_OPT_TREE");
|
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_OPT_TREE");
|
|
log ("Common subexpression elimination.\n");
|
|
cse (func);
|
|
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_CSE");
|
remove_dead (func);
|
remove_dead (func);
|
if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD");
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD");
|
remove_trivial_regs (func);
|
remove_trivial_regs (func);
|
if (DEBUG > 2) print_cuc_bb (func, "AFTER_TRIVIAL");
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL");
|
|
add_latches (func);
|
|
if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES");
|
|
set_io (func);
|
add_memory_dep (func, memory_order);
|
add_memory_dep (func, memory_order);
|
if (DEBUG > 7) print_cuc_bb (func, "AFTER_MEMORY_DEP");
|
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP");
|
add_data_dep (func);
|
add_data_dep (func);
|
if (DEBUG > 8) print_cuc_bb (func, "AFTER_DATA_DEP");
|
if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP");
|
schedule_memory (func, memory_order);
|
schedule_memory (func, memory_order);
|
if (DEBUG > 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM");
|
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM");
|
add_latches (func);
|
|
if (DEBUG > 0) print_cuc_bb (func, "AFTER_LATCHES");
|
|
|
|
analyse_timings (func, timings);
|
analyse_timings (func, timings);
|
debug (2, "new_time = %i, old_time = %i, size = %f\n",
|
cucdebug (2, "new_time = %i, old_time = %i, size = %f\n",
|
timings->new_time, func->orig_time, timings->size);
|
timings->new_time, func->orig_time, timings->size);
|
log ("new time = %icyc, old_time = %icyc, size = %.0f gates\n",
|
log ("new time = %icyc, old_time = %icyc, size = %.0f gates\n",
|
timings->new_time, func->orig_time, timings->size);
|
timings->new_time, func->orig_time, timings->size);
|
//output_verilog (func, argv[1]);
|
//output_verilog (func, argv[1]);
|
free_func (func);
|
free_func (func);
|
timings->b = b;
|
timings->b = b;
|
timings->unroll = j;
|
timings->unroll = j;
|
timings->preroll = i;
|
timings->preroll = i;
|
|
timings->nshared = 0;
|
return timings;
|
return timings;
|
}
|
}
|
|
|
int tim_comp (cuc_timings *a, cuc_timings *b)
|
int tim_comp (cuc_timings *a, cuc_timings *b)
|
{
|
{
|
Line 643... |
Line 681... |
func->orig_time = orig_time;
|
func->orig_time = orig_time;
|
func->start_addr = start_addr;
|
func->start_addr = start_addr;
|
func->end_addr = end_addr;
|
func->end_addr = end_addr;
|
|
|
sprintf (tmp1, "%s.bin", module_name);
|
sprintf (tmp1, "%s.bin", module_name);
|
|
cucdebug (2, "Loading %s.bin\n", module_name);
|
cuc_load (tmp1);
|
cuc_load (tmp1);
|
|
|
log ("Detecting basic blocks\n");
|
log ("Detecting basic blocks\n");
|
detect_bb (func);
|
detect_bb (func);
|
if (DEBUG > 2) print_cuc_insns ("WITH_BB_LIMITS", 0);
|
if (cuc_debug >= 2) print_cuc_insns ("WITH_BB_LIMITS", 0);
|
|
|
//sprintf (tmp1, "%s.bin.mp", module_name);
|
//sprintf (tmp1, "%s.bin.mp", module_name);
|
sprintf (tmp2, "%s.bin.bb", module_name);
|
sprintf (tmp2, "%s.bin.bb", module_name);
|
generate_bb_seq (func, config.sim.mprof_fn, tmp2);
|
generate_bb_seq (func, config.sim.mprof_fn, tmp2);
|
|
|
build_bb (func);
|
build_bb (func);
|
if (DEBUG > 5) print_cuc_bb (func, "AFTER_BUILD_BB");
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_BUILD_BB");
|
reg_dep (func);
|
reg_dep (func);
|
|
|
log ("Detecting dependencies\n");
|
log ("Detecting dependencies\n");
|
if (DEBUG > 2) print_cuc_bb (func, "AFTER_REG_DEP");
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_REG_DEP");
|
optimize_tree (func);
|
optimize_tree (func);
|
log ("Optimizing.\n");
|
log ("Optimizing.\n");
|
if (DEBUG > 2) print_cuc_bb (func, "AFTER_OPT_TREE1");
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_OPT_TREE1");
|
remove_nops (func);
|
remove_nops (func);
|
if (DEBUG > 6) print_cuc_bb (func, "NO_NOPS");
|
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
|
remove_dead (func);
|
remove_dead (func);
|
if (DEBUG > 6) print_cuc_bb (func, "AFTER_DEAD1");
|
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_DEAD1");
|
optimize_bb (func);
|
optimize_bb (func);
|
if (DEBUG > 6) print_cuc_bb (func, "AFTER_OPT_BB");
|
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_BB");
|
remove_dead_bb (func);
|
remove_dead_bb (func);
|
if (DEBUG > 2) print_cuc_bb (func, "AFTER_DEAD_BB");
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_DEAD_BB");
|
optimize_tree (func);
|
optimize_tree (func);
|
if (DEBUG > 2) print_cuc_bb (func, "AFTER_OPT_TREE");
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_OPT_TREE");
|
|
log ("Common subexpression elimination.\n");
|
|
cse (func);
|
|
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_CSE");
|
remove_dead (func);
|
remove_dead (func);
|
if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD");
|
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD");
|
remove_trivial_regs (func);
|
remove_trivial_regs (func);
|
if (DEBUG > 2) print_cuc_bb (func, "AFTER_TRIVIAL");
|
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL");
|
|
|
|
csm (func);
|
assert (saved = dup_func (func));
|
assert (saved = dup_func (func));
|
|
|
|
timings.preroll = timings.unroll = 1;
|
|
timings.nshared = 0;
|
|
add_latches (func);
|
|
set_io (func);
|
|
|
|
if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES");
|
|
analyse_timings (func, &timings);
|
add_memory_dep (func, memory_order);
|
add_memory_dep (func, memory_order);
|
if (DEBUG > 7) print_cuc_bb (func, "AFTER_MEMORY_DEP");
|
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP");
|
add_data_dep (func);
|
add_data_dep (func);
|
if (DEBUG > 8) print_cuc_bb (func, "AFTER_DATA_DEP");
|
if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP");
|
schedule_memory (func, memory_order);
|
schedule_memory (func, memory_order);
|
if (DEBUG > 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM");
|
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM");
|
add_latches (func);
|
|
if (DEBUG > 0) print_cuc_bb (func, "AFTER_LATCHES");
|
|
analyse_timings (func, &timings);
|
|
|
|
output_verilog (func, module_name);
|
//output_verilog (func, module_name);
|
free_func (func);
|
free_func (func);
|
|
log ("Base option: pre%i,un%i,sha%i: %icyc %.1f\n",
|
|
timings.preroll, timings.unroll, timings.nshared, timings.new_time, timings.size);
|
|
saved->timings = timings;
|
|
|
#if 1
|
#if 1
|
/* detect and unroll simple loops */
|
/* detect and unroll simple loops */
|
for (b = 0; b < saved->num_bb; b++) {
|
for (b = 0; b < saved->num_bb; b++) {
|
cuc_timings t[MAX_UNROLL * MAX_PREROLL];
|
cuc_timings t[MAX_UNROLL * MAX_PREROLL];
|
cuc_timings *ut;
|
cuc_timings *ut;
|
cuc_timings *cut = &t[0];
|
cuc_timings *cut = &t[0];
|
int nt = 1;
|
int nt = 1;
|
double csize;
|
double csize;
|
|
|
saved->bb[b].ntim = 0;
|
|
saved->bb[b].tim = NULL;
|
|
|
|
/* Is it a loop? */
|
/* Is it a loop? */
|
if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue;
|
if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue;
|
t[0] = timings;
|
t[0] = timings;
|
t[0].b = b;
|
t[0].b = b;
|
t[0].preroll = 1;
|
t[0].preroll = 1;
|
t[0].unroll = 1;
|
t[0].unroll = 1;
|
|
t[0].nshared = 0;
|
|
|
sprintf (tmp1, "%s.bin.bb", module_name);
|
sprintf (tmp1, "%s.bin.bb", module_name);
|
i = 1;
|
i = 1;
|
do {
|
do {
|
cuc_timings *pt;
|
cuc_timings *pt;
|
Line 728... |
Line 776... |
ut = cut;
|
ut = cut;
|
cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i);
|
cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i);
|
} while (i <= MAX_UNROLL && ut->new_time >= cut->new_time);
|
} while (i <= MAX_UNROLL && ut->new_time >= cut->new_time);
|
|
|
/* Sort the timings */
|
/* Sort the timings */
|
if (DEBUG > 3)
|
#if 0
|
|
if (cuc_debug >= 3)
|
for (i = 0; i < nt; i++) printf ("%i:%i,%i: %icyc\n",
|
for (i = 0; i < nt; i++) printf ("%i:%i,%i: %icyc\n",
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time);
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time);
|
|
#endif
|
|
|
|
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
|
|
|
|
/* Delete timings, that have worst time and bigger size than other */
|
|
j = 1;
|
|
csize = t[0].size;
|
|
for (i = 1; i < nt; i++)
|
|
if (t[i].size < csize) t[j++] = t[i];
|
|
nt = j;
|
|
|
|
cucdebug (1, "Available options\n");
|
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
|
/* Add results from CSM */
|
|
j = nt;
|
|
for (i = 0; i < saved->bb[b].ntim; i++) {
|
|
int i1;
|
|
for (i1 = 0; i1 < nt; i1++) {
|
|
t[j] = t[i1];
|
|
t[j].size += saved->bb[b].tim[i].size - timings.size;
|
|
t[j].new_time += saved->bb[b].tim[i].new_time - timings.new_time;
|
|
t[j].nshared = saved->bb[b].tim[i].nshared;
|
|
t[j].shared = saved->bb[b].tim[i].shared;
|
|
if (++j >= MAX_UNROLL * MAX_PREROLL) goto full;
|
|
}
|
|
}
|
|
|
|
full:
|
|
nt = j;
|
|
|
|
cucdebug (1, "Available options:\n");
|
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
|
|
|
/* Sort again with new timings added */
|
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
|
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
|
|
|
/* Delete timings, that have worst time and bigger size than other */
|
/* Delete timings, that have worst time and bigger size than other */
|
j = 1;
|
j = 1;
|
csize = t[0].size;
|
csize = t[0].size;
|
for (i = 1; i < nt; i++)
|
for (i = 1; i < nt; i++)
|
if (t[i].size < csize) t[j++] = t[i];
|
if (t[i].size < csize) t[j++] = t[i];
|
nt = j;
|
nt = j;
|
|
|
printf ("A\n");
|
cucdebug (1, "Available options:\n");
|
for (i = 0; i < nt; i++) printf ("%i:%i,%i: %icyc %.1f\n",
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
|
|
|
if (saved->bb[b].ntim) free (saved->bb[b].tim);
|
saved->bb[b].ntim = nt;
|
saved->bb[b].ntim = nt;
|
assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt));
|
assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt));
|
|
|
/* Copy options in reverse order -- smallest first */
|
/* Copy options in reverse order -- smallest first */
|
for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i];
|
for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i];
|
|
|
|
log ("Available options:\n");
|
|
for (i = 0; i < saved->bb[b].ntim; i++) {
|
|
log ("%i:pre%i,un%i,sha%i: %icyc %.1f\n",
|
|
saved->bb[b].tim[i].b, saved->bb[b].tim[i].preroll, saved->bb[b].tim[i].unroll,
|
|
saved->bb[b].tim[i].nshared, saved->bb[b].tim[i].new_time, saved->bb[b].tim[i].size);
|
|
}
|
}
|
}
|
#endif
|
#endif
|
return saved;
|
return saved;
|
}
|
}
|
|
|
|
void options_cmd (cuc_func *f, char *name)
|
|
{
|
|
int b, i;
|
|
printf ("%-12s :pre%i,un%i,sha%i: time = %i cyc; size = %.f gates (old time = %i)\n", name,
|
|
f->timings.preroll, f->timings.unroll, f->timings.nshared,
|
|
f->timings.new_time, f->timings.size, f->orig_time);
|
|
for (b = 0; b < f->num_bb; b++) {
|
|
/* Print out results */
|
|
for (i = 0; i < f->bb[b].ntim; i++) {
|
|
printf ("%-12sBB%-2i:pre%i,un%i,sha%i: time = %i cyc; size = %.f gates\n", name,
|
|
f->bb[b].tim[i].b, f->bb[b].tim[i].preroll, f->bb[b].tim[i].unroll,
|
|
f->bb[b].tim[i].nshared, f->bb[b].tim[i].new_time, f->bb[b].tim[i].size);
|
|
}
|
|
}
|
|
}
|
|
|
/* Dumps specified function to file (hex) */
|
/* Dumps specified function to file (hex) */
|
unsigned long extract_function (char *out_fn, unsigned long start_addr)
|
unsigned long extract_function (char *out_fn, unsigned long start_addr)
|
{
|
{
|
FILE *fo;
|
FILE *fo;
|
unsigned long a = start_addr;
|
unsigned long a = start_addr;
|
Line 781... |
Line 889... |
|
|
static cuc_func *func[MAX_FUNCS];
|
static cuc_func *func[MAX_FUNCS];
|
|
|
void main_cuc (char *filename)
|
void main_cuc (char *filename)
|
{
|
{
|
int i;
|
int i, j;
|
char tmp1[256];
|
char tmp1[256];
|
|
|
printf ("Entering Data Fusion command prompt\n");
|
printf ("Entering OpenRISC Custom Unit Compiler command prompt\n");
|
printf ("Using profile file \"%s\" and memory profile file \"%s\"\n", config.sim.prof_fn, config.sim.mprof_fn);
|
printf ("Using profile file \"%s\" and memory profile file \"%s\".\n", config.sim.prof_fn, config.sim.mprof_fn);
|
sprintf (tmp1, "%s.log", filename);
|
sprintf (tmp1, "%s.log", filename);
|
|
printf ("Analyzing. (log file \"%s\").\n", tmp1);
|
assert (flog = fopen (tmp1, "wt+"));
|
assert (flog = fopen (tmp1, "wt+"));
|
|
|
/* Loads in the specified timings table */
|
/* Loads in the specified timings table */
|
load_timing_table ("virtex.tim");
|
load_timing_table ("virtex.tim");
|
|
|
prof_set (1, 0);
|
prof_set (1, 0);
|
assert (prof_acquire (config.sim.prof_fn) == 0);
|
assert (prof_acquire (config.sim.prof_fn) == 0);
|
|
|
for (i = 0; i < prof_nfuncs; i++)
|
|
printf ("%-24s addr %08x cycles %i (%3.1f%%)\n", prof_func[i].name,
|
|
prof_func[i].addr, prof_func[i].cum_cycles,
|
|
100. * prof_func[i].cum_cycles / prof_cycles);
|
|
|
|
cycle_duration = 40.;
|
cycle_duration = 40.;
|
|
|
/* Try all functions except "total" */
|
/* Try all functions except "total" */
|
for (i = 0; i < prof_nfuncs - 1; i++) {
|
for (i = 0; i < prof_nfuncs - 1; i++) {
|
long orig_time;
|
long orig_time;
|
Line 817... |
Line 921... |
|
|
log ("Testing function %s (%08x - %08x)\n", prof_func[i].name, start_addr, end_addr);
|
log ("Testing function %s (%08x - %08x)\n", prof_func[i].name, start_addr, end_addr);
|
func[i] = analyse_function (prof_func[i].name, orig_time, start_addr, end_addr);
|
func[i] = analyse_function (prof_func[i].name, orig_time, start_addr, end_addr);
|
}
|
}
|
|
|
|
while (1) {
|
|
char *s;
|
|
printf ("(cuc) ");
|
|
fflush (stdout);
|
|
fgets(tmp1, sizeof tmp1, stdin);
|
|
for (s = tmp1; *s != '\0' && *s != '\n' && *s != '\r'; s++);
|
|
*s = '\0';
|
|
|
|
if (strcmp (tmp1, "q") == 0 || strcmp (tmp1, "quit") == 0) {
|
|
break;
|
|
} else if (strcmp (tmp1, "p") == 0 || strcmp (tmp1, "profile") == 0) {
|
|
printf ("----------------------------------------------------------------------------\n");
|
|
printf ("|function name |addr |# calls |avg cycles | old% | impr. f. |\n");
|
|
printf ("|-------------------------+--------+--------+------------+------+----------|\n");
|
|
for (j = 0; j < prof_nfuncs; j++) {
|
|
int bestcyc = 0, besti = 0;
|
|
for (i = 0; i < prof_nfuncs; i++)
|
|
if (prof_func[i].cum_cycles > bestcyc) {
|
|
bestcyc = prof_func[i].cum_cycles;
|
|
besti = i;
|
|
}
|
|
i = besti;
|
|
printf ("| %-24s|%08X|%8i|%12.1f| %3.0f%% |",
|
|
prof_func[i].name, prof_func[i].addr, prof_func[i].calls,
|
|
((double)prof_func[i].cum_cycles / prof_func[i].calls),
|
|
(100. * prof_func[i].cum_cycles / prof_cycles));
|
|
if (func[i]) {
|
|
printf ("%9.2f |\n", 1.f * prof_func[i].cum_cycles / func[i]->timings.new_time);
|
|
} else printf (" N/A |\n");
|
|
prof_func[i].cum_cycles = -1;
|
|
}
|
|
printf ("----------------------------------------------------------------------------\n");
|
|
printf ("Total %i functions, %i cycles.\n", prof_nfuncs, prof_cycles);
|
|
} else if (strncmp (tmp1, "d", 1) == 0 || strncmp (tmp1, "debug", 5) == 0) {
|
|
sscanf (tmp1, "%*s %i", &cuc_debug);
|
|
if (cuc_debug < 0) cuc_debug = 0;
|
|
if (cuc_debug > 9) cuc_debug = 9;
|
|
} else if (strcmp (tmp1, "g") == 0 || strcmp (tmp1, "generate") == 0) {
|
|
for (i = 0; i < prof_nfuncs; i++);
|
|
} else if (strcmp (tmp1, "o") == 0 || strcmp (tmp1, "options") == 0) {
|
|
printf ("Available options:\n");
|
|
for (i = 0; i < prof_nfuncs; i++)
|
|
if (func[i]) options_cmd (func[i], prof_func[i].name);
|
|
} else {
|
|
if (strcmp (tmp1, "h") != 0 && strcmp (tmp1, "help") != 0)
|
|
printf ("Unknown command.\n");
|
|
printf ("OpenRISC Custom Unit Compiler command prompt\n");
|
|
printf ("h|help displays this help\n");
|
|
printf ("q|quit returns to or1ksim prompt\n");
|
|
printf ("p|profile displays function profiling\n");
|
|
printf ("d|debug # sets debug level (0-9)\n");
|
|
printf ("o|options displays available options\n");
|
|
printf ("g|generate generates verilog file\n");
|
|
}
|
|
}
|
|
|
/* Dispose memory */
|
/* Dispose memory */
|
for (i = 0; i < prof_nfuncs -1; i++)
|
for (i = 0; i < prof_nfuncs -1; i++)
|
if (func[i]) free_func (func[i]);
|
if (func[i]) free_func (func[i]);
|
|
|
fclose (flog);
|
fclose (flog);
|