URL
https://opencores.org/ocsvn/or1k/or1k/trunk
Subversion Repositories or1k
[/] [or1k/] [trunk/] [or1ksim/] [cuc/] [insn.c] - Rev 903
Go to most recent revision | Compare with Previous | Blame | View Log
/* insn.c -- OpenRISC Custom Unit Compiler, instruction support * Copyright (C) 2002 Marko Mlinar, markom@opencores.org * * This file is part of OpenRISC 1000 Architectural Simulator. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include <assert.h> #include "sim-config.h" #include "cuc.h" #include "insn.h" /* Table of known instructions. Watch out for indexes I_*! */ const cuc_known_insn known[II_LAST + 1] = { {"add", 1, "assign \1 = \2 + \3;"}, {"sub", 0, "assign \1 = \2 - \3;"}, {"and", 1, "assign \1 = \2 & \3;"}, {"or", 1, "assign \1 = \2 | \3;"}, {"xor", 1, "assign \1 = \2 ^ \3;"}, {"mul", 1, "assign \1 = \2 * \3;"}, {"srl", 0, "assign \1 = \2 >> \3;"}, {"sll", 0, "assign \1 = \2 << \3;"}, {"sra", 0, "assign \1 = ({32{\2[31]}} << (6'd32-{1'b0, \3}))\n\ | \2 >> \3;"}, {"lb", 0, "always @(posedge clk or posedge rst)"}, {"lh", 0, "always @(posedge clk or posedge rst)"}, {"lw", 0, "always @(posedge clk or posedge rst)"}, {"sb", 0, "/* mem8[\2] = \1 */"}, {"sh", 0, "/* mem16[\2] = \1 */"}, {"sw", 0, "/* mem32[\2] = \1 */"}, {"sfeq", 1, "assign \1 = \2 == \3;"}, {"sfne", 1, "assign \1 = \2 != \3;"}, {"sfle", 0, "assign \1 = \2 <= \3;"}, {"sflt", 0, "assign \1 = \2 < \3;"}, {"sfgt", 0, "assign \1 = \2 > \3;"}, {"sfge", 0, "assign \1 = \2 >= \3;"}, {"sfor", 1, "assign \1 = \2 || \3;"}, {"bf", 0, ""}, {"lrbb", 0,"always @(posedge clk or posedge rst)"}, {"cmov", 0,"assign \1 = \4 ? \2 : \3;"}, {"reg", 0, "always @(posedge clk or posedge rst)"}, {"nop", 0, NULL}}; /* Find known instruction and attach them to insn */ void change_insn_type (cuc_insn *i, int index) { int j; assert (index >= 0 && index <= II_LAST); i->index = index; if (i->index == II_NOP) { for (j = 0; j < MAX_OPERANDS; j++) i->opt[j] = OPT_NONE; i->type = 0; i->dep = NULL; } } /* Returns instruction name */ const char *cuc_insn_name (cuc_insn *ii) { if (ii->index < 0 || ii->index > II_LAST) return "???"; else return known[ii->index].name; } /* Prints out instructions */ void print_insns (cuc_insn *insn, int ninsn, int verbose) { int i, j; for (i = 0; i < ninsn; i++) { dep_list *l = insn[i].dep; printf ("%4x%c %-4s ", i, insn[i].index >= 0 ? ':' : '?', cuc_insn_name (&insn[i])); if (verbose) { printf ("%-20s insn = %08x, index = %i, type = %04x ", insn[i].disasm, insn[i].insn, insn[i].index, insn[i].type); } else printf ("type = %04x ", insn[i].type); for (j = 0; j < MAX_OPERANDS; j++) { if (insn[i].opt[j] & OPT_DEST) printf ("*"); switch (insn[i].opt[j] & ~OPT_DEST) { case OPT_NONE: break; case OPT_CONST: printf ("0x%08x, ", insn[i].op[j]); break; case OPT_JUMP: printf ("J%x ", insn[i].op[j]); break; case OPT_REGISTER: printf ("r%i, ", insn[i].op[j]); break; case OPT_REF: printf ("[%x.%x], ", REF_BB(insn[i].op[j]), REF_I(insn[i].op[j])); break; case OPT_BB: printf ("BB%x, ", insn[i].op[j]); break; case OPT_LRBB: printf ("LRBB, "); break; default: fprintf (stderr, "Invalid operand type %s(%x.%x) = %x\n", cuc_insn_name (&insn[i]), i, j, insn[i].opt[j]); assert (0); } } if (l) { printf ("\n\tdep:"); while (l) { printf (" [%x.%x],", REF_BB (l->ref), REF_I (l->ref)); l = l->next; } } printf ("\n"); } } void add_dep (dep_list **list, int dep) { dep_list *ndep; dep_list **tmp = list; while (*tmp) { if ((*tmp)->ref == dep) return; /* already there */ tmp = &((*tmp)->next); } ndep = (dep_list *)malloc (sizeof (dep_list)); ndep->ref = dep; ndep->next = NULL; *tmp = ndep; } void dispose_list (dep_list **list) { while (*list) { dep_list *tmp = *list; *list = tmp->next; free (tmp); } } void add_data_dep (cuc_func *f) { int b, i, j; dep_list *tmp; for (b = 0; b < f->num_bb; b++) { cuc_insn *insn = f->bb[b].insn; for (i = 0; i < f->bb[b].ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) { fflush (stdout); if (insn[i].opt[j] & OPT_REF) { /* Copy list from predecessor */ dep_list *l = f->INSN(insn[i].op[j]).dep; while (l) { add_dep (&insn[i].dep, l->ref); l = l->next; } /* add predecessor */ add_dep (&insn[i].dep, insn[i].op[j]); } } } } /* returns nonzero, if instruction was simplified */ int apply_edge_condition (cuc_insn *ii) { unsigned int c = ii->op[2]; if (ii->index == II_AND) { if (ii->opt[2] & OPT_CONST && c == 0) { change_insn_type (ii, II_ADD); ii->op[1] = 0; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_OR) { if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { change_insn_type (ii, II_ADD); ii->op[1] = c; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_SUB) { if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { change_insn_type (ii, II_ADD); ii->op[1] = 0; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_MUL) { if (ii->opt[2] & OPT_CONST && c == 0) { change_insn_type (ii, II_ADD); ii->op[1] = 0; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } else if (ii->opt[2] & OPT_CONST && c == 1) { change_insn_type (ii, II_ADD); ii->op[1] = c; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } else if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { change_insn_type (ii, II_SUB); ii->op[2] = ii->op[1]; ii->opt[2] = ii->opt[1]; ii->op[1] = 0; ii->opt[1] = OPT_CONST; return 1; } } else if (ii->index == II_SRL) { if (ii->opt[2] & OPT_CONST && c == 0) { change_insn_type (ii, II_ADD); ii->op[1] = c; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } else if (ii->opt[2] & OPT_CONST && c >= 32) { change_insn_type (ii, II_ADD); ii->op[1] = 0; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_SLL) { if (ii->opt[2] & OPT_CONST && c == 0) { change_insn_type (ii, II_ADD); ii->op[1] = c; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } else if (ii->opt[2] & OPT_CONST && c >= 32) { change_insn_type (ii, II_ADD); ii->op[1] = 0; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_SRA) { if (ii->opt[2] & OPT_CONST && c == 0) { change_insn_type (ii, II_ADD); ii->op[1] = c; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; return 1; } } else if (ii->index == II_CMOV) { if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { change_insn_type (ii, II_ADD); ii->op[2] = 0; ii->opt[2] = OPT_CONST; ii->opt[3] = OPT_NONE; return 1; } } return 0; } /* First primary input */ static unsigned long tmp_op, tmp_opt; /* Recursive function that searches for primary inputs; returns 0 if cmov can be replaced by add */ static int cmov_needed (cuc_func *f, int ref) { cuc_insn *ii = &f->INSN(ref); int j; cucdebug (4, " %x", ref); /* mark visited, if already marked, we have a loop, ignore */ if (ii->tmp) return 0; ii->tmp = 1; /* handle normal movs separately */ if ((ii->index == II_ADD || !(ii->type & IT_VOLATILE)) && ii->opt[2] == OPT_CONST && ii->op[2] == 0) { if (ii->opt[1] == OPT_REF) { if (cmov_needed (f, ii->op[1])) { ii->tmp = 0; return 1; } } else { if (tmp_opt == OPT_NONE) { tmp_op = ii->op[1]; tmp_opt = ii->opt[1]; } else if (tmp_opt != ii->opt[1] || tmp_op != ii->op[1]) { ii->tmp = 0; return 1; } } ii->tmp = 0; return 0; } /* Is this instruction CMOV? no => add to primary inputs */ if (ii->index != II_CMOV || ii->type & IT_VOLATILE) if (tmp_opt == OPT_NONE) { tmp_op = ref; tmp_opt = OPT_REF; ii->tmp = 0; return 0; } else if (tmp_opt != OPT_REF || tmp_op != ref) { ii->tmp = 0; return 1; } for (j = 1; j < 3; j++) { cucdebug (4, "(%x:%i)", ref, j); if (ii->opt[j] == OPT_REF) { if (cmov_needed (f, ii->op[j])) { ii->tmp = 0; return 1; } } else { if (tmp_opt == OPT_NONE) { tmp_op = ii->op[j]; tmp_opt = ii->opt[j]; } else if (tmp_opt != ii->opt[j] || tmp_op != ii->op[j]) { ii->tmp = 0; return 1; } } } ii->tmp = 0; return 0; } /* Search and optimize complex cmov assignments */ void optimize_cmovs (cuc_func *f) { int b, i, j; /* Mark all instructions unvisited */ for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) for (i = 0; i < f->bb[b].ninsn; i++) f->bb[b].insn[i].tmp = 0; for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) { for (i = 0; i < f->bb[b].ninsn; i++) { cuc_insn *ii = &f->bb[b].insn[i]; if (ii->index == II_CMOV && !(ii->type & IT_VOLATILE)) { tmp_opt = OPT_NONE; cucdebug (4, "\n"); if (!cmov_needed (f, REF(b, i))) { assert (tmp_opt != OPT_NONE); change_insn_type (ii, II_ADD); ii->op[1] = tmp_op; ii->opt[1] = tmp_opt; ii->op[2] = 0; ii->opt[2] = OPT_CONST; ii->opt[3] = OPT_NONE; } } } } } /* Optimizes dataflow tree */ void optimize_tree (cuc_func *f) { int b, i, j; int modified; do { modified = 0; for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) { for (i = 0; i < f->bb[b].ninsn; i++) { cuc_insn *ii = &f->bb[b].insn[i]; /* We tend to have the third parameter const if instruction is cumutative */ if ((ii->opt[1] & OPT_CONST) && !(ii->opt[2] & OPT_CONST) && known[ii->index].comutative) { unsigned long t = ii->opt[1]; ii->opt[1] = ii->opt[2]; ii->opt[2] = t; t = ii->op[1]; ii->op[1] = ii->op[2]; ii->op[2] = t; modified = 1; cucdebug (2, "%08x:<>\n", REF(b, i)); } /* Try to do the promotion */ /* We have two consecutive expressions, containing constants, * if previous is a simple expression we can handle it simply: */ for (j = 0; j < MAX_OPERANDS; j++) if (ii->opt[j] & OPT_REF) { cuc_insn *t = &f->INSN(ii->op[j]); if (f->INSN(ii->op[j]).index == II_ADD && f->INSN(ii->op[j]).opt[2] & OPT_CONST && f->INSN(ii->op[j]).op[2] == 0 && !(ii->type & IT_MEMORY && t->type & IT_MEMADD) && !(ii->type & IT_BRANCH) && !(t->type & IT_COND)) { /* do not promote through add-mem, and branches */ modified = 1; cucdebug (2, "%8x:promote%i %8x %8x\n", REF (b, i), j, ii->op[j], t->op[1]); ii->op[j] = t->op[1]; ii->opt[j] = t->opt[1]; } } /* In case of x = cmov x, y; or x = cmov y, x; we have asynchroneous loop -> remove it */ if (ii->index == II_CMOV) { int f = 0; if ((ii->opt[1] & OPT_REF) && ii->op[1] == REF (b, i)) f = 1; if ((ii->opt[2] & OPT_REF) && ii->op[2] == REF (b, i)) f = 2; if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) f = 2; if (f) { change_insn_type (ii, II_ADD); cucdebug (2, "%8x:cmov %i\n", REF(b, i), f); ii->opt[f] = OPT_CONST; ii->op[f] = 0; ii->opt[3] = OPT_NONE; modified = 1; continue; } } /* Do nothing to volatile instructions */ if (ii->type & IT_VOLATILE) continue; /* Check whether we can simplify the instruction */ if (apply_edge_condition (ii)) { modified = 1; continue; } /* We cannot do anything more if at least one is not constant */ if (!(ii->opt[2] & OPT_CONST)) continue; if (ii->opt[1] & OPT_CONST) { /* We have constant expression */ unsigned long value; int ok = 1; /* Was constant expression already? */ if (ii->index == II_ADD && !ii->op[2]) continue; if (ii->index == II_ADD) value = ii->op[1] + ii->op[2]; else if (ii->index == II_SUB) value = ii->op[1] - ii->op[2]; else if (ii->index == II_SLL) value = ii->op[1] << ii->op[2]; else if (ii->index == II_SRL) value = ii->op[1] >> ii->op[2]; else if (ii->index == II_MUL) value = ii->op[1] * ii->op[2]; else if (ii->index == II_OR) value = ii->op[1] | ii->op[2]; else if (ii->index == II_XOR) value = ii->op[1] ^ ii->op[2]; else if (ii->index == II_AND) value = ii->op[1] & ii->op[2]; else ok = 0; if (ok) { change_insn_type (ii, II_ADD); ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; ii->op[1] = value; ii->opt[1] = OPT_CONST; ii->op[2] = 0; ii->opt[2] = OPT_CONST; modified = 1; cucdebug (2, "%8x:const\n", REF (b, i)); } } else if (ii->opt[1] & OPT_REF) { cuc_insn *prev = &f->INSN(ii->op[1]); /* Is this just a link? */ if (ii->index == II_ADD && !(ii->type & IT_MEMADD) && ii->op[2] == 0) { int b1, i1, j1; cucdebug (2, "%8x:link %8x: ", REF(b, i), ii->op[1]); for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD)) for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) for (j1 = 0; j1 < MAX_OPERANDS; j1++) if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF) && f->bb[b1].insn[i1].op[j1] == REF(b, i)) { cucdebug (2, "%x ", REF (b1, i1)); f->bb[b1].insn[i1].op[j1] = ii->op[1]; } cucdebug (2, "\n"); change_insn_type (ii, II_NOP); } else if (prev->opt[2] & OPT_CONST) { /* Handle some common cases */ /* add - add joining */ if (ii->index == II_ADD && prev->index == II_ADD) { ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; ii->op[2] += prev->op[2]; modified = 1; cucdebug (2, "%8x: add-add\n", REF(b, i)); } else /* add - sub joining */ if (ii->index == II_ADD && prev->index == II_SUB) { change_insn_type (&insn[i], II_SUB); ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; ii->op[2] += prev->op[2]; modified = 1; cucdebug (2, "%8x: add-sub\n", REF(b, i)); } else /* sub - add joining */ if (ii->index == II_SUB && prev->index == II_ADD) { ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; ii->op[2] += prev->op[2]; modified = 1; cucdebug (2, "%8x: sub-add\n", REF(b, i)); } } } } } } while (modified); } /* Remove nop instructions */ void remove_nops (cuc_func *f) { int b; for (b = 0; b < f->num_bb; b++) { int c, d = 0, i, j; cuc_insn *insn = f->bb[b].insn; for (i = 0; i < f->bb[b].ninsn; i++) if (insn[i].index != II_NOP) { reloc [i] = d; insn[d++] = insn[i]; } else { reloc[i] = d; /* For jumps only */ } f->bb[b].ninsn = d; /* Relocate references from all basic blocks */ for (c = 0; c < f->num_bb; c++) for (i = 0; i < f->bb[c].ninsn; i++) { dep_list *d = f->bb[c].insn[i].dep; for (j = 0; j < MAX_OPERANDS; j++) if ((f->bb[c].insn[i].opt[j] & OPT_REF) && REF_BB(f->bb[c].insn[i].op[j]) == b) f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]); while (d) { if (REF_BB(d->ref) == b) d->ref = REF (b, reloc[REF_I (d->ref)]); d = d->next; } } } } /* Remove unused assignments */ void remove_dead (cuc_func *f) { int b, i, j; for (b = 0; b < f->num_bb; b++) for (i = 0; i < f->bb[b].ninsn; i++) if (!(f->bb[b].insn[i].type & (IT_VOLATILE | IT_OUTPUT))) f->bb[b].insn[i].type |= IT_UNUSED; for (b = 0; b < f->num_bb; b++) { for (i = 0; i < f->bb[b].ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] & OPT_REF) { f->INSN(f->bb[b].insn[i].op[j]).type &= ~IT_UNUSED; } } for (b = 0; b < f->num_bb; b++) for (i = 0; i < f->bb[b].ninsn; i++) if (f->bb[b].insn[i].type & IT_UNUSED) { change_insn_type (&f->bb[b].insn[i], II_NOP); } remove_nops (f); } /* Removes trivial register assignments */ void remove_trivial_regs (cuc_func *f) { int b, i; for (i = 0; i < MAX_REGS; i++) f->saved_regs[i] = call_saved[i]; for (b = 0; b < f->num_bb; b++) { cuc_insn *insn = f->bb[b].insn; for (i = 0; i < f->bb[b].ninsn; i++) { if (insn[i].index == II_ADD && insn[i].opt[0] & OPT_REGISTER && insn[i].opt[1] & OPT_REGISTER && insn[i].op[0] == insn[i].op[1] && insn[i].opt[2] & OPT_CONST && insn[i].op[2] == 0) { if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1; change_insn_type (&insn[i], II_NOP); } } } if (cuc_debug >= 2) { printf ("saved regs "); for (i = 0; i < MAX_REGS; i++) printf ("%i:%i ", i, f->saved_regs[i]); printf ("\n"); } remove_nops (f); } /* Determine inputs and outputs */ void set_io (cuc_func *f) { int b, i, j; /* Determine register usage */ for (i = 0; i < MAX_REGS; i++) { f->lur[i] = -1; f->used_regs[i] = 0; } if (cuc_debug > 5) print_cuc_bb (f, "SET_IO"); for (b = 0; b < f->num_bb; b++) { for (i = 0; i < f->bb[b].ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] & OPT_REGISTER && f->bb[b].insn[i].op[j] >= 0) if (f->bb[b].insn[i].opt[j] & OPT_DEST) f->lur[f->bb[b].insn[i].op[j]] = REF (b, i); else f->used_regs[f->bb[b].insn[i].op[j]] = 1; } } /* relocate all accesses inside of BB b to back/fwd */ static void relocate_bb (cuc_bb *bb, int b, int back, int fwd) { int i, j; for (i = 0; i < bb->ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) if (bb->insn[i].opt[j] & OPT_REF && REF_BB (bb->insn[i].op[j]) == b) { int t = REF_I (bb->insn[i].op[j]); if (t < i) bb->insn[i].op[j] = REF (back, t); else bb->insn[i].op[j] = REF (fwd, t); } } /* Latch outputs in loops */ void add_latches (cuc_func *f) { int b, i, j; //print_cuc_bb (f, "ADD_LATCHES a"); /* Cuts the tree and marks registers */ mark_cut (f); /* Split BBs with more than one group */ for (b = 0; b < f->num_bb; b++) expand_bb (f, b); remove_nops (f); //print_cuc_bb (f, "ADD_LATCHES 0"); /* Convert accesses in BB_INLOOP type block to latched */ for (b = 0; b < f->num_bb; b++) { int j; for (i = 0; i < f->bb[b].ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] == OPT_REF) { int t = f->bb[b].insn[i].op[j]; /* If we are pointing to a INLOOP block from outside, or forward (= previous loop iteration) we must register that data */ if ((f->bb[REF_BB(t)].type & BB_INLOOP || config.cuc.no_multicycle) && !(f->INSN(t).type & (IT_BRANCH | IT_COND)) && (REF_BB(t) != b || REF_I(t) >= i)) { f->INSN(t).type |= IT_LATCHED; } } } //print_cuc_bb (f, "ADD_LATCHES 1"); /* Add latches at the end of blocks as needed */ for (b = 0; b < f->num_bb; b++) { int nreg = 0; cuc_insn *insn; for (i = 0; i < f->bb[b].ninsn; i++) if (f->bb[b].insn[i].type & IT_LATCHED) nreg++; if (nreg) { insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (f->bb[b].ninsn + nreg)); j = 0; for (i = 0; i < f->bb[b].ninsn; i++) { insn[i] = f->bb[b].insn[i]; if (insn[i].type & IT_LATCHED) { cuc_insn *ii = &insn[f->bb[b].ninsn + j++]; change_insn_type (ii, II_REG); ii->op[0] = -1; ii->opt[0] = OPT_DEST | OPT_REGISTER; ii->op[1] = REF (b, i); ii->opt[1] = OPT_REF; ii->opt[2] = ii->opt[3] = OPT_NONE; ii->dep = NULL; ii->type = IT_VOLATILE; sprintf (ii->disasm, "reg %i_%i", b, i); } } f->bb[b].ninsn += nreg; free (f->bb[b].insn); f->bb[b].insn = insn; } } //print_cuc_bb (f, "ADD_LATCHES 2"); /* Repair references */ for (b = 0; b < f->num_bb; b++) for (i = 0; i < f->bb[b].ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) /* If destination instruction is latched, use register instead */ if (f->bb[b].insn[i].opt[j] == OPT_REF && f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) { int b1, i1; b1 = REF_BB (f->bb[b].insn[i].op[j]); //cucdebug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]); if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) { for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) { assert (f->bb[b1].insn[i1].index == II_REG); if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) { f->bb[b].insn[i].op[j] = REF (b1, i1); break; } } } } } /* CSE -- common subexpression elimination */ void cse (cuc_func *f) { int b, i, j, b1, i1, b2, i2, j2; for (b1 = 0; b1 < f->num_bb; b1++) for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) for (b2 = 0; b2 < f->num_bb; b2++) for (i2 = 0; i2 < f->bb[b2].ninsn; i2++) { cuc_insn *ii1 = &f->bb[b1].insn[i1]; cuc_insn *ii2 = &f->bb[b2].insn[i2]; /* Do we have an exact match? */ if (ii1->index == ii2->index) continue; if (ii1->type & IT_VOLATILE) continue; if (ii1->op[1] != ii2->op[1] || ii1->opt[1] != ii2->opt[1]) continue; if (ii1->op[2] != ii2->op[2] || ii1->opt[2] != ii2->opt[2]) continue; if (ii1->opt[3] != ii2->opt[3]) continue; if (ii1->opt[3] != OPT_NONE && ii1->op[3] != ii2->op[3]) continue; /* Check if we drive outputs? */ if ((ii1->opt[0] & OPT_REGISTER) && ii1->op[0] >= 0) if ((ii2->opt[0] & OPT_REGISTER) && ii2->op[0] >= 0) continue; else ii2->op[0] = ii1->op[0]; /* remove duplicated instruction and relink the references */ change_insn_type (ii2, II_NOP); for (b = 0; b < f->num_bb; b++) for (i = 0; i < f->bb[b].ninsn; i++) for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == REF (b2, i2)) f->bb[b].insn[i].op[j] = REF (b1, i1); } } static int count_cmovs (cuc_insn *ii, int match) { int c = 0, j; if (match & 2) { for (j = 0; j < MAX_OPERANDS; j++) if (ii->opt[j] & OPT_DEST) c++; } if (match & 1) { for (j = 0; j < MAX_OPERANDS; j++) if (!(ii->opt[j] & OPT_DEST) && ii->opt[j] & OPT_REF) c++; } else { for (j = 0; j < MAX_OPERANDS; j++) if (!(ii->opt[j] & OPT_DEST) && ii->opt[j] != OPT_NONE) c++; } return c; } static void search_csm (int iter, cuc_func *f, cuc_shared_list *list); static cuc_shared_list *main_list; static int *iteration; /* CSM -- common subexpression matching -- resource sharing We try to match tree of instruction inside a BB with as many matches as possible. All possibilities are collected and options, making situation worse are removed */ void csm (cuc_func *f) { int b, i, j; int cnt; cuc_shared_list *list; cuc_timings timings; analyse_timings (f, &timings); main_list = NULL; for (b = 0; b < f->num_bb; b++) { assert (iteration = (int *)malloc (sizeof (int) * f->bb[b].ninsn)); for (i = 0; i < f->bb[b].ninsn; i++) { int cnt = 0, cntc = 0; double size = 0., sizec = 0.; int j2 = 0; for (j = 0; j < f->bb[b].ninsn; j++) if (f->bb[b].insn[i].index == f->bb[b].insn[j].index) { int ok = 1; for (j2 = 0; j2 < MAX_OPERANDS; j2++) if (!(f->bb[b].insn[j].opt[j2] & OPT_REF)) if (f->bb[b].insn[j].opt[j2] != f->bb[b].insn[i].opt[j2] || f->bb[b].insn[j].op[j2] != f->bb[b].insn[i].opt[j2]) { ok = 0; break; } if (ok) { cntc++; sizec = sizec + insn_size (&f->bb[b].insn[j]); } else { cnt++; size = size + insn_size (&f->bb[b].insn[j]); } iteration[j] = 0; } else iteration[j] = -1; if (cntc > 1) { assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); list->next = main_list; list->from = NULL; list->ref = REF (b, i); list->cnt = cnt; list->cmatch = 1; list->cmovs = count_cmovs (&f->bb[b].insn[i], 3); list->osize = sizec; list->size = ii_size (f->bb[b].insn[i].index, 1); main_list = list; search_csm (0, f, list); } if (cnt > 1) { assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); list->next = main_list; list->from = NULL; list->ref = REF (b, i); list->cnt = cnt + cntc; list->cmatch = 0; list->cmovs = count_cmovs (&f->bb[b].insn[i], 2); list->osize = size + sizec; list->size = ii_size (f->bb[b].insn[i].index, 0); main_list = list; search_csm (0, f, list); } } free (iteration); } for (list = main_list; list; list = list->next) list->dead = 0; cnt = 0; for (list = main_list; list; list = list->next) if (!list->dead) cnt++; cucdebug (1, "noptions = %i\n", cnt); /* Now we will check the real size of the 'improvements'; if the size actually increases, we abandom the option */ for (list = main_list; list; list = list->next) if (list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size >= list->osize) list->dead = 1; cnt = 0; for (list = main_list; list; list = list->next) if (!list->dead) cnt++; cucdebug (1, "noptions = %i\n", cnt); /* Count number of instructions grouped */ for (list = main_list; list; list = list->next) { cuc_shared_list *l = list; int c = 0; while (l) { c++; if (f->INSN(l->ref).type & (IT_VOLATILE | IT_MEMORY | IT_MEMADD)) list->dead = 1; l = l->from; } list->ninsn = c; } cnt = 0; for (list = main_list; list; list = list->next) if (!list->dead) cnt++; cucdebug (1, "noptions = %i\n", cnt); #if 1 /* We can get a lot of options here, so we will delete duplicates */ for (list = main_list; list; list = list->next) if (!list->dead) { cuc_shared_list *l; for (l = list->next; l; l = l->next) if (!l->dead) { int ok = 1; cuc_shared_list *t1 = list; cuc_shared_list *t2 = l; while (ok && t1 && t2) { if (f->INSN(t1->ref).index == f->INSN(t2->ref).index) { /* If other operands are matching, we must check for them also */ if (t1->cmatch) { int j; for (j = 0; j < MAX_OPERANDS; j++) if (!(f->INSN(t1->ref).opt[j] & OPT_REF) || !(f->INSN(t2->ref).opt[j] & OPT_REF) || f->INSN(t1->ref).opt[j] != f->INSN(t2->ref).opt[j] || f->INSN(t1->ref).op[j] != f->INSN(t2->ref).op[j]) { ok = 0; break; } } /* This option is duplicate, remove */ if (ok) t1->dead = 1; } t1 = t1->from; t2 = t2->from; } } } cnt = 0; for (list = main_list; list; list = list->next) if (!list->dead) cnt++; cucdebug (1, "noptions = %i\n", cnt); #endif /* Print out */ for (list = main_list; list; list = list->next) if (!list->dead) { cuc_shared_list *l = list; cucdebug (1, "%-4s cnt %3i ninsn %3i size %8.1f osize %8.1f cmovs %3i @", cuc_insn_name (&f->INSN(list->ref)), list->cnt, list->ninsn, list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size, list->osize, list->cmovs); while (l) { cucdebug (1, "%c%x,", l->cmatch ? '.' : '!', l->ref); l = l->from; } cucdebug (1, "\n"); } /* Calculate estimated timings */ for (b = 0; b < f->num_bb; b++) { cnt = 0; for (list = main_list; list; list = list->next) if (!list->dead && REF_BB(list->ref) == b) cnt++; f->bb[b].ntim = cnt; if (!cnt) { f->bb[b].tim = NULL; continue; } assert (f->bb[b].tim = (cuc_timings *)malloc (sizeof (cuc_timings) * cnt)); cnt = 0; for (list = main_list; list; list = list->next) if (!list->dead && REF_BB(list->ref) == b) { cuc_shared_list *l = list; f->bb[b].tim[cnt].b = b; f->bb[b].tim[cnt].preroll = f->bb[b].tim[cnt].unroll = 1; f->bb[b].tim[cnt].nshared = list->ninsn; assert (f->bb[b].tim[cnt].shared = (cuc_shared_item *) malloc (sizeof(cuc_shared_item) * list->ninsn)); for (i = 0; i < list->ninsn; i++, l = l->from) { f->bb[b].tim[cnt].shared[i].ref = l->ref; f->bb[b].tim[cnt].shared[i].cmatch = l->cmatch; } f->bb[b].tim[cnt].new_time = timings.new_time + f->bb[b].cnt * (list->cnt - 1); f->bb[b].tim[cnt].size = timings.size + list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size - list->osize; cnt++; } } } /* Recursive function for searching through instruction graph */ static void search_csm (int iter, cuc_func *f, cuc_shared_list *list) { int b, i, j, i1; cuc_shared_list *l; b = REF_BB(list->ref); i = REF_I(list->ref); for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] & OPT_REF) { int t = f->bb[b].insn[i].op[j]; int cnt = 0, cntc = 0; double size = 0., sizec = 0.; /* Mark neighbours */ for (i1 = 0; i1 < f->bb[b].ninsn; i1++) { if (iteration[i1] == iter && f->bb[b].insn[i1].opt[j] & OPT_REF) { int t2 = f->bb[b].insn[i1].op[j]; if (f->INSN(t).index == f->INSN(t2).index && f->INSN(t2).opt[j] & OPT_REF) { int j2; int ok = 1; iteration[REF_I(t2)] = iter + 1; for (j2 = 0; j2 < MAX_OPERANDS; j2++) if (!(f->bb[b].insn[i1].opt[j2] & OPT_REF)) if (f->bb[b].insn[i1].opt[j2] != f->bb[b].insn[i].opt[j2] || f->bb[b].insn[i1].op[j2] != f->bb[b].insn[i].opt[j2]) { ok = 0; break; } if (ok) { cntc++; sizec = sizec + insn_size (&f->bb[b].insn[i1]); } else { cnt++; size = size + insn_size (&f->bb[b].insn[i1]); } } } } if (cntc > 1) { assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); l->next = main_list; main_list = l; l->from = list; l->ref = t; l->cnt = cnt; l->cmatch = 1; l->cmovs = list->cmovs + count_cmovs (&f->bb[b].insn[i], 1) - 1; l->size = list->size + ii_size (f->bb[b].insn[i].index, 1); l->osize = sizec; search_csm (iter + 1, f, l); } if (cnt > 1) { assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); l->next = main_list; main_list = l; l->from = list; l->ref = t; l->cnt = cnt + cntc; l->cmatch = 0; l->osize = size + sizec; l->cmovs = list->cmovs + count_cmovs (&f->bb[b].insn[i], 0) - 1; l->size = list->size + ii_size (f->bb[b].insn[i].index, 0); search_csm (iter + 1, f, l); } /* Unmark them back */ for (i1 = 0; i1 < f->bb[b].ninsn; i1++) if (iteration[i1] > iter) iteration[i1] = -1; } } /* Displays shared instructions */ void print_shared (cuc_func *rf, cuc_shared_item *shared, int nshared) { int i, first = 1; for (i = 0; i < nshared; i++) { printf ("%s%s%s", first ? "" : "-", cuc_insn_name (rf->INSN(shared[i].ref).index), shared[i].cmatch ? "!" : ""); first = 0; } } /* Common subexpression matching -- resource sharing, generation pass Situation here is much simpler than with analysis -- we know the instruction sequence we are going to share, but we are going to do this on whole function, not just one BB. We can find sequence in reference function, as pointed from "shared" */ void csm_gen (cuc_func *f, cuc_func *rf, cuc_shared_item *shared, int nshared) { int b, i, j, cnt = 0; #warning some code here (2) printf ("Replacing: "); print_shared (rf, shared, nshared); for (b = 0; b < f->num_bb; b++) for (i = 0; i < f->bb[b].ninsn; i++) { } printf ("\nFound %i matches.\n", cnt); }
Go to most recent revision | Compare with Previous | Blame | View Log