URL
https://opencores.org/ocsvn/or1k/or1k/trunk
Subversion Repositories or1k
[/] [or1k/] [tags/] [nog_patch_69/] [or1ksim/] [cuc/] [cuc.c] - Rev 879
Go to most recent revision | Compare with Previous | Blame | View Log
/* cuc.c -- OpenRISC Custom Unit Compiler * Copyright (C) 2002 Marko Mlinar, markom@opencores.org * * This file is part of OpenRISC 1000 Architectural Simulator. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* Main file, including code optimization and command prompt */ #include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include <assert.h> #include "sim-config.h" #include "cuc.h" #include "insn.h" #include "profiler.h" FILE *flog; /* Last used registers by software convention */ const int call_saved[MAX_REGS] = { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1}; /* Prints out instructions */ void print_insns (cuc_insn *insn, int ninsn, int verbose) { int i, j; for (i = 0; i < ninsn; i++) { dep_list *l = insn[i].dep; printf ("%4x%c %-4s ", i, insn[i].index >= 0 ? ':' : '?', cuc_insn_name (&insn[i])); if (verbose) { printf ("%-20s insn = %08x, index = %i, type = %04x ", insn[i].disasm, insn[i].insn, insn[i].index, insn[i].type); } else printf ("type = %04x ", insn[i].type); for (j = 0; j < MAX_OPERANDS; j++) { if (insn[i].opt[j] & OPT_DEST) printf ("*"); switch (insn[i].opt[j] & ~OPT_DEST) { case OPT_NONE: break; case OPT_CONST: printf ("0x%08x, ", insn[i].op[j]); break; case OPT_JUMP: printf ("J%x ", insn[i].op[j]); break; case OPT_REGISTER: printf ("r%i, ", insn[i].op[j]); break; case OPT_REF: printf ("[%x.%x], ", REF_BB(insn[i].op[j]), REF_I(insn[i].op[j])); break; case OPT_BB: printf ("BB%x, ", insn[i].op[j]); break; case OPT_LRBB: printf ("LRBB, "); break; default: fprintf (stderr, "Invalid operand type %s(%x.%x) = %x\n", cuc_insn_name (&insn[i]), i, j, insn[i].opt[j]); assert (0); } } if (l) { printf ("\n\tdep:"); while (l) { printf (" [%x.%x],", REF_BB (l->ref), REF_I (l->ref)); l = l->next; } } printf ("\n"); } } void add_dep (dep_list **list, int dep) { dep_list *ndep; dep_list **tmp = list; while (*tmp) { if ((*tmp)->ref == dep) return; /* already there */ tmp = &((*tmp)->next); } ndep = (dep_list *)malloc (sizeof (dep_list)); ndep->ref = dep; ndep->next = NULL; *tmp = ndep; } void dispose_list (dep_list **list) { while (*list) { dep_list *tmp = *list; *list = tmp->next; free (tmp); } } void add_data_dep (cuc_func *f) { int b, i, j; dep_list *tmp; for (b = 0; b < f->num_bb; b++) { cuc_insn *insn = f->bb[b].insn; for (i = 0; i < f->bb[b].ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) { fflush (stdout); if (insn[i].opt[j] & OPT_REF) { /* Copy list from predecessor */ dep_list *l = f->INSN(insn[i].op[j]).dep; while (l) { add_dep (&insn[i].dep, l->ref); l = l->next; } /* add predecessor */ add_dep (&insn[i].dep, insn[i].op[j]); } } } } /* returns nonzero, if instruction was simplified */ int apply_edge_condition (cuc_insn *ii) { unsigned int c = ii->op[2]; if (ii->index == II_AND) { if (ii->opt[2] & OPT_CONST && c == 0) { change_insn_type (ii, II_ADD); ii->op[1] = 0; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_OR) { if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { change_insn_type (ii, II_ADD); ii->op[1] = c; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_SUB) { if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { change_insn_type (ii, II_ADD); ii->op[1] = 0; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_MUL) { if (ii->opt[2] & OPT_CONST && c == 0) { change_insn_type (ii, II_ADD); ii->op[1] = 0; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } else if (ii->opt[2] & OPT_CONST && c == 1) { change_insn_type (ii, II_ADD); ii->op[1] = c; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } else if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { change_insn_type (ii, II_SUB); ii->op[2] = ii->op[1]; ii->opt[2] = ii->opt[1]; ii->op[1] = 0; ii->opt[1] = OPT_CONST; return 1; } } else if (ii->index == II_SRL) { if (ii->opt[2] & OPT_CONST && c == 0) { change_insn_type (ii, II_ADD); ii->op[1] = c; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } else if (ii->opt[2] & OPT_CONST && c >= 32) { change_insn_type (ii, II_ADD); ii->op[1] = 0; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_SLL) { if (ii->opt[2] & OPT_CONST && c == 0) { change_insn_type (ii, II_ADD); ii->op[1] = c; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } else if (ii->opt[2] & OPT_CONST && c >= 32) { change_insn_type (ii, II_ADD); ii->op[1] = 0; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_SRA) { if (ii->opt[2] & OPT_CONST && c == 0) { change_insn_type (ii, II_ADD); ii->op[1] = c; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_CMOV) { if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { change_insn_type (ii, II_ADD); ii->op[2] = 0; ii->opt[2] = OPT_CONST; ii->opt[3] = OPT_NONE; return 1; } } return 0; } /* Optimizes dataflow tree */ void optimize_tree (cuc_func *f) { int b, i, j; int modified; do { modified = 0; for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) { for (i = 0; i < f->bb[b].ninsn; i++) { cuc_insn *ii = &f->bb[b].insn[i]; /* We tend to have the third parameter const if instruction is cumutative */ if ((ii->opt[1] & OPT_CONST) && !(ii->opt[2] & OPT_CONST) && known[ii->index].comutative) { unsigned long t = ii->opt[1]; ii->opt[1] = ii->opt[2]; ii->opt[2] = t; t = ii->op[1]; ii->op[1] = ii->op[2]; ii->op[2] = t; modified = 1; debug (2, "%08x:<>\n", REF(b, i)); } /* Try to do the promotion */ /* We have two consecutive expressions, containing constants, * if previous is a simple expression we can handle it simply: */ for (j = 0; j < MAX_OPERANDS; j++) if (ii->opt[j] & OPT_REF) { cuc_insn *t = &f->INSN(ii->op[j]); if (f->INSN(ii->op[j]).index == II_ADD && f->INSN(ii->op[j]).opt[2] & OPT_CONST && f->INSN(ii->op[j]).op[2] == 0 && !(ii->type & IT_MEMORY && t->type & IT_MEMADD) && !(ii->type & IT_BRANCH) && !(t->type & IT_COND)) { /* do not promote through add-mem, and branches */ modified = 1; debug (2, "%8x:promote%i %8x %8x\n", REF (b, i), j, ii->op[j], t->op[1]); ii->op[j] = t->op[1]; ii->opt[j] = t->opt[1]; } } /* In case of x = cmov x, y; or x = cmov y, x; we have asynchroneous loop -> remove it */ if (ii->index == II_CMOV) { int f = 0; if ((ii->opt[1] & OPT_REF) && ii->op[1] == REF (b, i)) f = 1; if ((ii->opt[2] & OPT_REF) && ii->op[2] == REF (b, i)) f = 2; if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) f = 2; if (f) { change_insn_type (ii, II_ADD); debug (2, "%8x:cmov %i\n", REF(b, i), f); ii->opt[f] = OPT_CONST; ii->op[f] = 0; ii->opt[3] = OPT_NONE; modified = 1; continue; } } /* Do nothing to volatile instructions */ if (ii->type & IT_VOLATILE) continue; /* Check whether we can simplify the instruction */ if (apply_edge_condition (ii)) { modified = 1; continue; } /* We cannot do anything more if at least one is not constant */ if (!(ii->opt[2] & OPT_CONST)) continue; if (ii->opt[1] & OPT_CONST) { /* We have constant expression */ unsigned long value; int ok = 1; /* Was constant expression already? */ if (ii->index == II_ADD && !ii->op[2]) continue; if (ii->index == II_ADD) value = ii->op[1] + ii->op[2]; else if (ii->index == II_SUB) value = ii->op[1] - ii->op[2]; else if (ii->index == II_SLL) value = ii->op[1] << ii->op[2]; else if (ii->index == II_SRL) value = ii->op[1] >> ii->op[2]; else if (ii->index == II_MUL) value = ii->op[1] * ii->op[2]; else if (ii->index == II_OR) value = ii->op[1] | ii->op[2]; else if (ii->index == II_XOR) value = ii->op[1] ^ ii->op[2]; else if (ii->index == II_AND) value = ii->op[1] & ii->op[2]; else ok = 0; if (ok) { change_insn_type (ii, II_ADD); ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; ii->op[1] = value; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; modified = 1; debug (2, "%8x:const\n", REF (b, i)); } } else if (ii->opt[1] & OPT_REF) { cuc_insn *prev = &f->INSN(ii->op[1]); /* Is this just a link? */ if (ii->index == II_ADD && !(ii->type & IT_MEMADD) && ii->op[2] == 0) { int b1, i1, j1; debug (2, "%8x:link %8x: ", REF(b, i), ii->op[1]); for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD)) for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) for (j1 = 0; j1 < MAX_OPERANDS; j1++) if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF) && f->bb[b1].insn[i1].op[j1] == REF(b, i)) { debug (2, "%x ", REF (b1, i1)); f->bb[b1].insn[i1].op[j1] = ii->op[1]; } debug (2, "\n"); change_insn_type (ii, II_NOP); } else if (prev->opt[2] & OPT_CONST) { /* Handle some common cases */ /* add - add joining */ if (ii->index == II_ADD && prev->index == II_ADD) { ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; ii->op[2] += prev->op[2]; modified = 1; debug (2, "%8x: add-add\n", REF(b, i)); } else /* add - sub joining */ if (ii->index == II_ADD && prev->index == II_SUB) { change_insn_type (&insn[i], II_SUB); ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; ii->op[2] += prev->op[2]; modified = 1; debug (2, "%8x: add-sub\n", REF(b, i)); } else /* sub - add joining */ if (ii->index == II_SUB && prev->index == II_ADD) { ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; ii->op[2] += prev->op[2]; modified = 1; debug (2, "%8x: sub-add\n", REF(b, i)); } } } } } } while (modified); } /* Remove nop instructions */ void remove_nops (cuc_func *f) { int b; for (b = 0; b < f->num_bb; b++) { int c, d = 0, i, j; cuc_insn *insn = f->bb[b].insn; for (i = 0; i < f->bb[b].ninsn; i++) if (insn[i].index != II_NOP) { reloc [i] = d; insn[d++] = insn[i]; } else { reloc[i] = d; /* For jumps only */ } f->bb[b].ninsn = d; /* Relocate references from all basic blocks */ for (c = 0; c < f->num_bb; c++) for (i = 0; i < f->bb[c].ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) if ((f->bb[c].insn[i].opt[j] & OPT_REF) && REF_BB(f->bb[c].insn[i].op[j]) == b) f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]); } } /* Remove unused assignments */ void remove_dead (cuc_func *f) { int b, i, j; for (b = 0; b < f->num_bb; b++) for (i = 0; i < f->bb[b].ninsn; i++) if (!(f->bb[b].insn[i].type & (IT_VOLATILE | IT_OUTPUT))) f->bb[b].insn[i].type |= IT_UNUSED; for (b = 0; b < f->num_bb; b++) { for (i = 0; i < f->bb[b].ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] & OPT_REF) { f->INSN(f->bb[b].insn[i].op[j]).type &= ~IT_UNUSED; } } for (b = 0; b < f->num_bb; b++) for (i = 0; i < f->bb[b].ninsn; i++) if (f->bb[b].insn[i].type & IT_UNUSED) { change_insn_type (&f->bb[b].insn[i], II_NOP); } remove_nops (f); } /* Removes trivial register assignments */ void remove_trivial_regs (cuc_func *f) { int b, i; for (i = 0; i < MAX_REGS; i++) f->saved_regs[i] = call_saved[i]; for (b = 0; b < f->num_bb; b++) { cuc_insn *insn = f->bb[b].insn; for (i = 0; i < f->bb[b].ninsn; i++) { if (insn[i].index == II_ADD && insn[i].opt[0] & OPT_REGISTER && insn[i].opt[1] & OPT_REGISTER && insn[i].op[0] == insn[i].op[1] && insn[i].opt[2] & OPT_CONST && insn[i].op[2] == 0) { if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1; change_insn_type (&insn[i], II_NOP); } } } if (DEBUG > 2) { printf ("saved regs "); for (i = 0; i < MAX_REGS; i++) printf ("%i:%i ", i, f->saved_regs[i]); printf ("\n"); } remove_nops (f); } /* relocate all accesses inside of BB b to back/fwd */ static void relocate_bb (cuc_bb *bb, int b, int back, int fwd) { int i, j; for (i = 0; i < bb->ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) if (bb->insn[i].opt[j] & OPT_REF && REF_BB (bb->insn[i].op[j]) == b) { int t = REF_I (bb->insn[i].op[j]); if (t < i) bb->insn[i].op[j] = REF (back, t); else bb->insn[i].op[j] = REF (fwd, t); } } /* split the BB, based on the group numbers in .tmp */ void expand_bb (cuc_func *f, int b) { int n = f->num_bb; int mg = 0; int b1, i, j; for (i = 0; i < f->bb[b].ninsn; i++) if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp; /* Create copies */ for (b1 = 1; b1 <= mg; b1++) { assert (f->num_bb < MAX_BB); cpy_bb (&f->bb[f->num_bb], &f->bb[b]); f->num_bb++; } /* Relocate */ for (b1 = 0; b1 < f->num_bb; b1++) for (i = 0; i < f->bb[b1].ninsn; i++) { for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b1].insn[i].opt[j] & OPT_REF) { int t = f->bb[b1].insn[i].op[j]; if (REF_BB(t) == b && f->INSN(t).tmp != 0) f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t)); } } /* Delete unused instructions */ for (j = 0; j <= mg; j++) { if (j == 0) b1 = b; else b1 = n + j - 1; for (i = 0; i < f->bb[b1].ninsn; i++) { if (f->bb[b1].insn[i].tmp != j) change_insn_type (&f->bb[b1].insn[i], II_NOP); f->bb[b1].insn[i].tmp = 0; } if (j < mg) { f->bb[b1].next[0] = n + j; f->bb[b1].next[1] = -1; f->bb[n + j].prev[0] = b1; f->bb[n + j].prev[1] = -1; } else { i = f->bb[b1].next[0]; f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1; f->bb[n + j].prev[1] = -1; if (i >= 0) { if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; } i = f->bb[b1].next[1]; if (i >= 0) { if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; } } } } /* Latch outputs in loops */ void add_latches (cuc_func *f) { int b, i, j; //print_cuc_bb (f, "ADD_LATCHES a"); /* Cuts the tree and marks registers */ mark_cut (f); /* Split BBs with more than one group */ for (b = 0; b < f->num_bb; b++) expand_bb (f, b); remove_nops (f); //print_cuc_bb (f, "ADD_LATCHES 0"); /* Convert accesses in BB_INLOOP type block to latched */ for (b = 0; b < f->num_bb; b++) { int j; for (i = 0; i < f->bb[b].ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] == OPT_REF) { int t = f->bb[b].insn[i].op[j]; /* If we are pointing to a INLOOP block from outside, or forward (= previous loop iteration) we must register that data */ if ((f->bb[REF_BB(t)].type & BB_INLOOP || no_multicycle) && !(f->INSN(t).type & (IT_BRANCH | IT_COND)) && (REF_BB(t) != b || REF_I(t) >= i)) { f->INSN(t).type |= IT_LATCHED; } } } //print_cuc_bb (f, "ADD_LATCHES 1"); /* Add latches at the end of blocks as needed */ for (b = 0; b < f->num_bb; b++) { int nreg = 0; cuc_insn *insn; for (i = 0; i < f->bb[b].ninsn; i++) if (f->bb[b].insn[i].type & IT_LATCHED) nreg++; if (nreg) { insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (f->bb[b].ninsn + nreg)); j = 0; for (i = 0; i < f->bb[b].ninsn; i++) { insn[i] = f->bb[b].insn[i]; if (insn[i].type & IT_LATCHED) { cuc_insn *ii = &insn[f->bb[b].ninsn + j++]; change_insn_type (ii, II_REG); ii->op[0] = -1; ii->opt[0] = OPT_DEST | OPT_REGISTER; ii->op[1] = REF (b, i); ii->opt[1] = OPT_REF; ii->opt[2] = ii->opt[3] = OPT_NONE; ii->dep = NULL; ii->type = IT_VOLATILE; sprintf (ii->disasm, "reg %i_%i", b, i); } } f->bb[b].ninsn += nreg; free (f->bb[b].insn); f->bb[b].insn = insn; } } //print_cuc_bb (f, "ADD_LATCHES 2"); /* Repair references */ for (b = 0; b < f->num_bb; b++) for (i = 0; i < f->bb[b].ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) /* If destination instruction is latched, use register instead */ if (f->bb[b].insn[i].opt[j] == OPT_REF && f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) { int b1, i1; b1 = REF_BB (f->bb[b].insn[i].op[j]); //debug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]); if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) { for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) { assert (f->bb[b1].insn[i1].index == II_REG); if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) { f->bb[b].insn[i].op[j] = REF (b1, i1); break; } } } } } cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j) { cuc_func *func; debug (2, "BB%i unroll %i times preroll %i times\n", b, j, i); func = preunroll_loop (f, b, i, j, bb_filename); if (DEBUG > 2) print_cuc_bb (func, "AFTER_PREUNROLL"); log ("Optimizing.\n"); optimize_tree (func); if (DEBUG > 6) //print_cuc_bb (func, "AFTER_OPT_TREE1"); remove_nops (func); if (DEBUG > 6) //print_cuc_bb (func, "NO_NOPS"); remove_dead (func); if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD1"); optimize_bb (func); if (DEBUG > 5) print_cuc_bb (func, "AFTER_OPT_BB"); remove_dead_bb (func); if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD_BB"); optimize_tree (func); if (DEBUG > 3) print_cuc_bb (func, "AFTER_OPT_TREE"); remove_dead (func); if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD"); remove_trivial_regs (func); if (DEBUG > 2) print_cuc_bb (func, "AFTER_TRIVIAL"); add_memory_dep (func, memory_order); if (DEBUG > 7) print_cuc_bb (func, "AFTER_MEMORY_DEP"); add_data_dep (func); if (DEBUG > 8) print_cuc_bb (func, "AFTER_DATA_DEP"); schedule_memory (func, memory_order); if (DEBUG > 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM"); add_latches (func); if (DEBUG > 0) print_cuc_bb (func, "AFTER_LATCHES"); analyse_timings (func, timings); debug (2, "new_time = %i, old_time = %i, size = %f\n", timings->new_time, func->orig_time, timings->size); log ("new time = %icyc, old_time = %icyc, size = %.0f gates\n", timings->new_time, func->orig_time, timings->size); //output_verilog (func, argv[1]); free_func (func); timings->b = b; timings->unroll = j; timings->preroll = i; return timings; } int tim_comp (cuc_timings *a, cuc_timings *b) { if (a->new_time < b->new_time) return -1; else if (a->new_time > b->new_time) return 1; else return 0; } cuc_func *analyse_function (char *module_name, long orig_time, unsigned long start_addr, unsigned long end_addr) { cuc_timings timings; cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func)); cuc_func *saved; int b, i, j; char tmp1[256]; char tmp2[256]; func->orig_time = orig_time; func->start_addr = start_addr; func->end_addr = end_addr; sprintf (tmp1, "%s.bin", module_name); cuc_load (tmp1); log ("Detecting basic blocks\n"); detect_bb (func); if (DEBUG > 2) print_cuc_insns ("WITH_BB_LIMITS", 0); //sprintf (tmp1, "%s.bin.mp", module_name); sprintf (tmp2, "%s.bin.bb", module_name); generate_bb_seq (func, config.sim.mprof_fn, tmp2); build_bb (func); if (DEBUG > 5) print_cuc_bb (func, "AFTER_BUILD_BB"); reg_dep (func); log ("Detecting dependencies\n"); if (DEBUG > 2) print_cuc_bb (func, "AFTER_REG_DEP"); optimize_tree (func); log ("Optimizing.\n"); if (DEBUG > 2) print_cuc_bb (func, "AFTER_OPT_TREE1"); remove_nops (func); if (DEBUG > 6) print_cuc_bb (func, "NO_NOPS"); remove_dead (func); if (DEBUG > 6) print_cuc_bb (func, "AFTER_DEAD1"); optimize_bb (func); if (DEBUG > 6) print_cuc_bb (func, "AFTER_OPT_BB"); remove_dead_bb (func); if (DEBUG > 2) print_cuc_bb (func, "AFTER_DEAD_BB"); optimize_tree (func); if (DEBUG > 2) print_cuc_bb (func, "AFTER_OPT_TREE"); remove_dead (func); if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD"); remove_trivial_regs (func); if (DEBUG > 2) print_cuc_bb (func, "AFTER_TRIVIAL"); assert (saved = dup_func (func)); add_memory_dep (func, memory_order); if (DEBUG > 7) print_cuc_bb (func, "AFTER_MEMORY_DEP"); add_data_dep (func); if (DEBUG > 8) print_cuc_bb (func, "AFTER_DATA_DEP"); schedule_memory (func, memory_order); if (DEBUG > 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM"); add_latches (func); if (DEBUG > 0) print_cuc_bb (func, "AFTER_LATCHES"); analyse_timings (func, &timings); output_verilog (func, module_name); free_func (func); #if 1 /* detect and unroll simple loops */ for (b = 0; b < saved->num_bb; b++) { cuc_timings t[MAX_UNROLL * MAX_PREROLL]; cuc_timings *ut; cuc_timings *cut = &t[0]; int nt = 1; double csize; saved->bb[b].ntim = 0; saved->bb[b].tim = NULL; /* Is it a loop? */ if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue; t[0] = timings; t[0].b = b; t[0].preroll = 1; t[0].unroll = 1; sprintf (tmp1, "%s.bin.bb", module_name); i = 1; do { cuc_timings *pt; cuc_timings *cpt = cut; j = 1; do { pt = cpt; cpt = preunroll_bb (tmp1, saved, &t[nt++], b, ++j, i); } while (j <= MAX_PREROLL && pt->new_time >= cpt->new_time); i++; ut = cut; cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i); } while (i <= MAX_UNROLL && ut->new_time >= cut->new_time); /* Sort the timings */ if (DEBUG > 3) for (i = 0; i < nt; i++) printf ("%i:%i,%i: %icyc\n", t[i].b, t[i].preroll, t[i].unroll, t[i].new_time); qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp); /* Delete timings, that have worst time and bigger size than other */ j = 1; csize = t[0].size; for (i = 1; i < nt; i++) if (t[i].size < csize) t[j++] = t[i]; nt = j; printf ("A\n"); for (i = 0; i < nt; i++) printf ("%i:%i,%i: %icyc %.1f\n", t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size); saved->bb[b].ntim = nt; assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt)); /* Copy options in reverse order -- smallest first */ for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i]; } #endif return saved; } /* Dumps specified function to file (hex) */ unsigned long extract_function (char *out_fn, unsigned long start_addr) { FILE *fo; unsigned long a = start_addr; int x = 0; assert (fo = fopen (out_fn, "wt+")); do { unsigned long d = evalsim_mem32 (a); int index = insn_decode (d); assert (index >= 0); if (x) x++; if (strcmp (insn_name (index), "l.jr") == 0) x = 1; a += 4; fprintf (fo, "%08x\n", d); } while (x < 2); fclose (fo); return a - 4; } static cuc_func *func[MAX_FUNCS]; void main_cuc (char *filename) { int i; char tmp1[256]; printf ("Entering Data Fusion command prompt\n"); printf ("Using profile file \"%s\" and memory profile file \"%s\"\n", config.sim.prof_fn, config.sim.mprof_fn); sprintf (tmp1, "%s.log", filename); assert (flog = fopen (tmp1, "wt+")); /* Loads in the specified timings table */ load_timing_table ("virtex.tim"); prof_set (1, 0); assert (prof_acquire (config.sim.prof_fn) == 0); for (i = 0; i < prof_nfuncs; i++) printf ("%-24s addr %08x cycles %i (%3.1f%%)\n", prof_func[i].name, prof_func[i].addr, prof_func[i].cum_cycles, 100. * prof_func[i].cum_cycles / prof_cycles); cycle_duration = 40.; /* Try all functions except "total" */ for (i = 0; i < prof_nfuncs - 1; i++) { long orig_time; unsigned long start_addr, end_addr; orig_time = prof_func[i].cum_cycles; start_addr = prof_func[i].addr; /* Extract the function from the binary */ sprintf (tmp1, "%s.bin", prof_func[i].name); end_addr = extract_function (tmp1, start_addr); log ("Testing function %s (%08x - %08x)\n", prof_func[i].name, start_addr, end_addr); func[i] = analyse_function (prof_func[i].name, orig_time, start_addr, end_addr); } /* Dispose memory */ for (i = 0; i < prof_nfuncs -1; i++) if (func[i]) free_func (func[i]); fclose (flog); }
Go to most recent revision | Compare with Previous | Blame | View Log