URL
https://opencores.org/ocsvn/or1k_old/or1k_old/trunk
Subversion Repositories or1k_old
Compare Revisions
- This comparison shows the changes necessary to convert path
/
- from Rev 896 to Rev 897
- ↔ Reverse comparison
Rev 896 → Rev 897
/trunk/or1ksim/configure
2144,7 → 2144,7
-I\${top_srcdir}/cpu/$CPU_ARCH -I\${top_srcdir}/cache -I\${top_srcdir}/mmu \ |
-I\${top_srcdir}/bpb -I\${top_srcdir}/peripheral -I\${top_srcdir}/tick \ |
-I\${top_srcdir}/pm -I\${top_srcdir}/pic -I\${top_srcdir}/debug \ |
-I\${top_srcdir}/vapi -I\${top_srcdir}/support" |
-I\${top_srcdir}/vapi -I\${top_srcdir}/support -I\${top_srcdir}/cuc" |
|
|
trap '' 1 2 15 |
/trunk/or1ksim/sim-config.h
259,6 → 259,14
struct { |
int enabled; /* Whether power menagement is operational */ |
} pm; |
|
struct { |
char timings_fn[STR_SIZE]; /* Filename of the timing table */ |
int memory_order; /* Memory access stricness */ |
int calling_convention; /* Whether functions follow standard calling convention */ |
int enable_bursts; /* Whether burst are enabled */ |
int no_multicycle; /* When enabled no multicycle paths are generated */ |
} cuc; |
}; |
|
struct runtime { |
314,6 → 322,13
FILE *vapi_file; /* vapi file */ |
int server_port; /* A user specified port number for services */ |
} vapi; |
|
/* CUC configuration parameters */ |
struct { |
int mdelay[4]; /* average memory delays in cycles |
{read single, read burst, write single, write burst} */ |
double cycle_duration; /* in ns */ |
} cuc; |
}; |
|
#if FAST_SIM |
/trunk/or1ksim/sim.cfg
1,5 → 1,5
/* sim.cfg -- Simulator configuration script file |
Copyright (C) 2001, Marko Mlinar, markom@opencores.org |
Copyright (C) 2001-2002, Marko Mlinar, markom@opencores.org |
|
This file is part of OpenRISC 1000 Architectural Simulator. |
It contains the default configuration and help about configuring |
872,3 → 872,37
dev_packet1 = 0 |
enddevice |
end |
|
|
/* CUC SECTION |
|
This section configures the OpenRISC Custom Unit Compiler |
|
memory_order = none/weak/strong/exact |
none different memory ordering, even if there are dependencies, |
burst can be made, width can change |
weak different memory ordering, if there cannot be dependencies |
burst can be made, width can change |
strong same memory ordering, burst can be made, width can change |
exact exacltly the same memory ordering and widths |
|
calling_convention = 0/1 |
whether programs follow OpenRISC calling conventions |
|
enable_bursts = 0/1 |
whether burst are detected |
|
no_multicycle = 0/1 |
if selected no multicycle logic paths will be generated |
|
timings_fn = "<filename>" |
*/ |
|
section cuc |
memory_order = weak |
calling_convention = 1 |
enable_bursts = 1 |
no_multicycle = 1 |
timings_fn = "virtex.tim" |
end |
|
/trunk/or1ksim/cpu/or32/execute.c
484,7 → 484,7
char temp[100]; |
|
dumpmemory(iqueue[0].insn_addr, iqueue[0].insn_addr + 4, 1, 0); |
generate_time_pretty (temp, runtime.sim.cycles); |
generate_time_pretty (temp, runtime.sim.cycles * config.sim.clkcycle_ps); |
printf(" (executed) [time %s, #%i]\n", temp, runtime.cpu.instructions); |
if (config.cpu.superscalar) |
printf ("Superscalar CYCLES: %u", runtime.cpu.supercycles); |
/trunk/or1ksim/cpu/common/parse.h
31,8 → 31,17
#define whitespace(a) ((a) == '\t' ? 1 : ((a) == ' '? 1 : 0)) |
#endif |
|
/* Strips white spaces at beginning and at the end of the string */ |
char *stripwhite (char *string); |
|
/* Duplicates the string */ |
char *dupstr (char *s); |
|
/* This function is very similar to strncpy, except it null terminates the string */ |
char *strstrip (char *dst, const char *src, int n); |
|
/* Returns n-th token from string */ |
char *strtoken(char *in, char *out, int which); |
|
/* Parses string line and puts up to maxparam parameters into argv[]; number of parameters is returned */ |
int tokenize_line (char *str, char *argv[], int maxparam); |
/trunk/or1ksim/cpu/common/abstract.c
756,7 → 756,7
|
/* Outputs time in pretty form to dest string */ |
|
void generate_time_pretty (char *dest, long time_ps) |
char *generate_time_pretty (char *dest, long time_ps) |
{ |
int exp3 = 0; |
if (time_ps) { |
766,4 → 766,5
} |
} |
sprintf (dest, "%i%cs", time_ps, "pnum"[exp3]); |
return dest; |
} |
/trunk/or1ksim/cpu/common/abstract.h
126,7 → 126,7
struct dev_memarea *verify_memoryarea(unsigned long addr); |
|
/* Outputs time in pretty form to dest string */ |
void generate_time_pretty (char *dest, long time_ps); |
char *generate_time_pretty (char *dest, long time_ps); |
|
/* Temporary variable to increase speed. */ |
extern struct dev_memarea *cur_area; |
/trunk/or1ksim/cpu/common/parse.c
104,6 → 104,14
return (r); |
} |
|
/* This function is very similar to strncpy, except it null terminates the string */ |
char *strstrip (char *dst, const char *src, int n) |
{ |
strncpy (dst, src, n); |
*(dst + n) = '\0'; |
return dst; |
} |
|
/* Parses string line and puts up to maxparam parameters into argv[]; number of parameters is returned */ |
int tokenize_line (char *str, char *argv[], int maxparam) |
{ |
535,7 → 543,7
} |
else if(ELF_LONG_H(elf_spnt->sh_type) == SHT_SYMTAB) { |
|
if((sym_tbl = (char *)malloc(ELF_LONG_H(elf_spnt->sh_size))) == NULL) { |
if((sym_tbl = (struct elf32_sym *)malloc(ELF_LONG_H(elf_spnt->sh_size))) == NULL) { |
perror("readfile_elf"); |
exit(1); |
} |
/trunk/or1ksim/cuc/cuc.h
62,6 → 62,13
#define MT_WRITE 0x20 /* This memory access does a write */ |
#define MT_SIGNED 0x40 /* Signed memory access */ |
|
#define MO_NONE 0 /* different memory ordering, even if there are dependencies, |
burst can be made, width can change */ |
#define MO_WEAK 1 /* different memory ordering, if there cannot be dependencies, |
burst can be made, width can change */ |
#define MO_STRONG 2 /* Same memory ordering, burst can be made, width can change */ |
#define MO_EXACT 3 /* Exacltly the same memory ordering and widths */ |
|
#define BB_INLOOP 0x01 /* This block is inside a loop */ |
#define BB_OPTIONAL 0x02 |
#define BB_END 0x04 /* Last block in a function */ |
80,10 → 87,6
/* Options */ |
/* Whether we are debugging cuc (0-9) */ |
extern int cuc_debug; |
static const int calling_convention = 1; |
static const int memory_order = 2; |
static const int enable_bursts = 1; |
static const int no_multicycle = 1; |
|
/* Temporary registers by software convention */ |
extern const int call_saved[MAX_REGS]; |
104,8 → 107,14
int ninsn; /* Number of associated instructions */ |
struct _csm_list *from; |
struct _csm_list *next; |
} cuc_shared; |
} cuc_shared_list; |
|
/* Shared resource item definition */ |
typedef struct { |
int ref; |
int cmatch; |
} cuc_shared_item; |
|
/* Implementation specific timings */ |
typedef struct { |
int b; /* Basic block # this timing is referring to */ |
112,7 → 121,7
int preroll; /* How many times was this BB pre/unrolled */ |
int unroll; |
int nshared; |
int *shared; /* List of shared resources */ |
cuc_shared_item *shared; /* List of shared resources */ |
int new_time; |
double size; |
} cuc_timings; |
142,8 → 151,10
int nmemory; |
int cnt; /* how many times was this block executed */ |
int unrolled; /* how many times has been this block unrolled */ |
int ntim; /* Basic block options */ |
|
int ntim; /* Basic block options */ |
cuc_timings *tim; |
int selected_tim; /* Selected option, -1 if none */ |
} cuc_bb; |
|
/* Function entity */ |
168,6 → 179,7
cuc_timings timings; /* Base timings */ |
unsigned long start_addr; /* Address of first instruction inn function */ |
unsigned long end_addr; /* Address of last instruction inn function */ |
int memory_order; /* Memory order */ |
} cuc_func; |
|
/* Instructions from function */ |
177,7 → 189,7
extern FILE *flog; |
|
/* Loads from file into global array insn */ |
void cuc_load (char *in_fn); |
int cuc_load (char *in_fn); |
|
/* Scans sequence of BBs and set bb[].cnt */ |
void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename); |
197,9 → 209,12
/* Common subexpression elimination */ |
void cse (cuc_func *f); |
|
/* Common subexpression matching -- resource sharing */ |
/* Common subexpression matching -- resource sharing, analysis pass */ |
void csm (cuc_func *f); |
|
/* Common subexpression matching -- resource sharing, generation pass */ |
void csm_gen (cuc_func *f, cuc_func *rf, cuc_shared_item *shared, int nshared); |
|
/* Set the BB limits */ |
void detect_bb (cuc_func *func); |
|
/trunk/or1ksim/cuc/load.c
21,6 → 21,7
#include <stdlib.h> |
#include <stdarg.h> |
#include <assert.h> |
#include "sim-config.h" |
#include "cuc.h" |
#include "opcode/or32.h" |
#include "insn.h" |
248,40 → 249,41
void expand_signed () |
{ |
int i, j, num_sig = 0, d; |
for (i = 0; i < num_insn; i++) if (insn[i].type & IT_SIGNED) num_sig++; |
for (i = 0; i < num_insn; i++) |
if (insn[i].type & IT_SIGNED && !(insn[i].type & IT_MEMORY)) num_sig++; |
|
d = num_insn + num_sig * 2; |
assert (d < MAX_INSNS); |
|
/* Split signed instructions */ |
for (i = num_insn - 1; i >= 0; i--) if (insn[i].type & IT_SIGNED) { |
for (i = num_insn - 1; i >= 0; i--) |
/* We will expand signed memory later */ |
if (insn[i].type & IT_MEMORY) continue; |
insn[--d] = insn[i]; |
insn[d].op[1] = d - 2; insn[d].opt[1] = OPT_REF; |
insn[d].op[2] = d - 1; insn[d].opt[2] = OPT_REF; |
if (insn[i].type & IT_SIGNED && !(insn[i].type & IT_MEMORY)) { |
insn[--d] = insn[i]; |
insn[d].op[1] = d - 2; insn[d].opt[1] = OPT_REF; |
insn[d].op[2] = d - 1; insn[d].opt[2] = OPT_REF; |
|
insn[--d] = insn[i]; |
change_insn_type (&insn[d], II_ADD); |
insn[d].type = 0; |
insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; |
insn[d].op[1] = insn[d].op[2]; insn[d].opt[1] = insn[d].opt[2]; |
insn[d].op[2] = 0x20000000; insn[d].opt[2] = OPT_CONST; |
insn[d].opt[3] = OPT_NONE; |
insn[--d] = insn[i]; |
change_insn_type (&insn[d], II_ADD); |
insn[d].type = 0; |
insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; |
insn[d].op[1] = insn[d].op[2]; insn[d].opt[1] = insn[d].opt[2]; |
insn[d].op[2] = 0x20000000; insn[d].opt[2] = OPT_CONST; |
insn[d].opt[3] = OPT_NONE; |
|
insn[--d] = insn[i]; |
change_insn_type (&insn[d], II_ADD); |
insn[d].type = 0; |
insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; |
insn[d].op[1] = insn[d].op[1]; insn[d].opt[1] = insn[d].opt[1]; |
insn[d].op[2] = 0x20000000; insn[d].opt[2] = OPT_CONST; |
insn[d].opt[3] = OPT_NONE; |
insn[--d] = insn[i]; |
change_insn_type (&insn[d], II_ADD); |
insn[d].type = 0; |
insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; |
insn[d].op[1] = insn[d].op[1]; insn[d].opt[1] = insn[d].opt[1]; |
insn[d].op[2] = 0x20000000; insn[d].opt[2] = OPT_CONST; |
insn[d].opt[3] = OPT_NONE; |
|
reloc[i] = d; |
} else { |
insn[--d] = insn[i]; |
reloc[i] = d; |
} |
reloc[i] = d; |
} else { |
insn[--d] = insn[i]; |
reloc[i] = d; |
} |
num_insn += num_sig * 2; |
for (i = 0; i < num_insn; i++) if (insn[i].type & IT_MEMORY || !(insn[i].type & IT_SIGNED)) { |
for (j = 0; j < MAX_OPERANDS; j++) |
290,8 → 292,9
} else insn[i].type &= ~IT_SIGNED; |
} |
|
/* Loads from file into global array insn */ |
void cuc_load (char *in_fn) |
/* Loads function from file into global array insn. |
Function returns nonzero if function cannot be converted. */ |
int cuc_load (char *in_fn) |
{ |
int i, j, in_delay; |
FILE *fi; |
327,8 → 330,9
change_insn_type (&insn[i], II_NOP); |
continue; |
} else { |
fprintf (stderr, "Instruction #%i: \"%s\" not supported.\n", i, name); |
exit (1); |
cucdebug (1, "Instruction #%i: \"%s\" not supported.\n", i, name); |
log ("Instruction #%i: \"%s\" not supported.\n", i, name); |
return 1; |
} |
if (f < 0) { /* l.j */ |
/* repair params */ |
370,9 → 374,10
change_insn_type (&insn[i], conv[j].to & II_MASK); |
break; |
} |
if (insn[i].index < 0) { |
fprintf (stderr, "Instruction #%i: \"%s\" not supported (2).\n", i, name); |
exit (1); |
if (insn[i].index < 0 || insn[i].index == II_NOP && insn[i].op[0] != 0) { |
cucdebug (1, "Instruction #%i: \"%s\" not supported (2).\n", i, name); |
log ("Instruction #%i: \"%s\" not supported (2).\n", i, name); |
return 1; |
} |
} |
} |
379,8 → 384,9
num_insn = i; |
fclose (fi); |
if (func_return != 2) { |
fprintf (stderr, "Unsupported function structure.\n"); |
exit (1); |
cucdebug (1, "Unsupported function structure.\n"); |
log ("Unsupported function structure.\n"); |
return 1; |
} |
|
log ("Number of instructions loaded = %i\n", num_insn); |
390,7 → 396,7
remove_dslots (); |
if (cuc_debug >= 6) print_cuc_insns ("NO_DELAY_SLOTS", 0); |
|
if (calling_convention) { |
if (config.cuc.calling_convention) { |
detect_locals (); |
if (cuc_debug >= 7) print_cuc_insns ("AFTER_LOCALS", 0); |
} |
399,4 → 405,5
|
expand_signed (); |
if (cuc_debug >= 3) print_cuc_insns ("AFTER_EXP_SIG", 0); |
return 0; |
} |
/trunk/or1ksim/cuc/bb.c
21,6 → 21,8
#include <stdlib.h> |
#include <stdarg.h> |
#include <assert.h> |
#include "sim-config.h" |
#include "abstract.h" |
#include "cuc.h" |
#include "insn.h" |
#include "support/profile.h" |
46,12 → 48,14
if (f->bb[i].insn) print_insns (f->bb[i].insn, f->bb[i].ninsn, 0); |
} |
printf ("\n"); |
fflush (stdout); |
} |
|
/* Copies src basic block into destination */ |
cuc_bb *cpy_bb (cuc_bb *dest, cuc_bb *src) |
{ |
int i; |
int i, j; |
assert (dest != src); |
*dest = *src; |
assert (dest->insn = malloc (sizeof (cuc_insn) * src->ninsn)); |
for (i = 0; i < src->ninsn; i++) |
58,7 → 62,14
dest->insn[i] = src->insn[i]; |
if (src->ntim) { |
assert (dest->tim = malloc (sizeof (cuc_timings) * src->ntim)); |
for (i = 0; i < src->ntim; i++) dest->tim[i] = src->tim[i]; |
for (i = 0; i < src->ntim; i++) { |
dest->tim[i] = src->tim[i]; |
if (src->tim[i].nshared) { |
assert (dest->tim[i].shared = malloc (sizeof (int) * src->tim[i].nshared)); |
for (j = 0; j < src->tim[i].nshared; j++) |
dest->tim[i].shared[j] = src->tim[i].shared[j]; |
} |
} |
} |
} |
|
92,6 → 103,9
for (i = 0; i < f->bb[b].ninsn; i++) |
dispose_list (&f->bb[b].insn[i].dep); |
if (f->bb[b].insn) free (f->bb[b].insn); |
for (i = 0; i < f->bb[b].ntim; i++) |
if (f->bb[b].tim[i].nshared && f->bb[b].tim[i].shared) |
free (f->bb[b].tim[i].shared); |
if (f->bb[b].tim && f->bb[b].ntim) free (f->bb[b].tim); |
} |
free (f); |
312,6 → 326,8
f->bb[i].insn[j].op[0] = f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = j; |
f->bb[i].insn[j].op[3] = LRBB_REG; f->bb[i].insn[j].opt[3] = OPT_REGISTER; |
} |
|
/* Relocate instructions */ |
for (j = MAX_REGS - 1; j < f->bb[i].ninsn; j++) { |
f->bb[i].insn[j] = insn[f->bb[i].first + j - (MAX_REGS - 1)]; |
for (k = 0; k < MAX_OPERANDS; k++) |
326,6 → 342,15
if (f->bb[i].insn[j].type & IT_MEMORY) f->bb[i].nmemory++; |
} |
} |
|
/* We do a quick check if there are some anomalies */ |
for (i = 0; i < f->num_bb; i++) |
for (j = 0; j < f->bb[i].ninsn; j++) |
for (k = 0; k < MAX_OPERANDS; k++) |
if (f->bb[i].insn[j].opt[k] & OPT_REF) { |
int t = f->bb[i].insn[j].op[k]; |
assert (REF_I (t) < f->bb[REF_BB(t)].ninsn); |
} |
} |
|
/* type == 0; keep predecessor condition |
337,8 → 362,9
unsigned long cond_op, cond_opt; |
cuc_insn *insn; |
|
//printf ("%i <= %i+%i (%i)\n", pred, pred, succ, type); |
//printf ("%i %i\n", f->bb[pred].ninsn, f->bb[succ].ninsn); |
cucdebug (3, "%x <= %x+%x (%i)\n", pred, pred, succ, type); |
cucdebug (3, "%x %x\n", f->bb[pred].ninsn, f->bb[succ].ninsn); |
if (cuc_debug >= 3) fflush (stdout); |
|
add = f->bb[pred].ninsn; |
if (f->bb[pred].ninsn <= 0 |
528,9 → 554,10
for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) |
if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* one predecessor */ |
int p = f->bb[i].prev[0]; |
if (f->bb[p].next[0] == i && f->bb[p].next[1] == f->bb[p].next[1]) |
join_bb (f, f->bb[i].prev[0], i, 2); |
goto remove_lrbb; |
if (f->bb[p].next[0] == i && f->bb[p].next[1] == f->bb[p].next[1]) { |
join_bb (f, f->bb[i].prev[0], i, 2); |
goto remove_lrbb; |
} |
} |
#endif |
} |
752,6 → 779,71
} |
} |
|
/* split the BB, based on the group numbers in .tmp */ |
void expand_bb (cuc_func *f, int b) |
{ |
int n = f->num_bb; |
int mg = 0; |
int b1, i, j; |
|
for (i = 0; i < f->bb[b].ninsn; i++) |
if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp; |
|
/* Create copies */ |
for (b1 = 1; b1 <= mg; b1++) { |
assert (f->num_bb < MAX_BB); |
cpy_bb (&f->bb[f->num_bb], &f->bb[b]); |
f->num_bb++; |
} |
|
/* Relocate */ |
for (b1 = 0; b1 < f->num_bb; b1++) |
for (i = 0; i < f->bb[b1].ninsn; i++) { |
dep_list *d = f->bb[b1].insn[i].dep; |
for (j = 0; j < MAX_OPERANDS; j++) |
if (f->bb[b1].insn[i].opt[j] & OPT_REF) { |
int t = f->bb[b1].insn[i].op[j]; |
if (REF_BB(t) == b && f->INSN(t).tmp != 0) |
f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t)); |
} |
while (d) { |
if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0) |
d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref)); |
d = d->next; |
} |
} |
|
/* Delete unused instructions */ |
for (j = 0; j <= mg; j++) { |
if (j == 0) b1 = b; |
else b1 = n + j - 1; |
for (i = 0; i < f->bb[b1].ninsn; i++) { |
if (f->bb[b1].insn[i].tmp != j) |
change_insn_type (&f->bb[b1].insn[i], II_NOP); |
f->bb[b1].insn[i].tmp = 0; |
} |
if (j < mg) { |
f->bb[b1].next[0] = n + j; |
f->bb[b1].next[1] = -1; |
f->bb[n + j].prev[0] = b1; |
f->bb[n + j].prev[1] = -1; |
} else { |
i = f->bb[b1].next[0]; |
f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1; |
f->bb[n + j].prev[1] = -1; |
if (i >= 0) { |
if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; |
if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; |
} |
i = f->bb[b1].next[1]; |
if (i >= 0) { |
if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; |
if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; |
} |
} |
} |
} |
|
/* Scans sequence of BBs and set bb[].cnt */ |
void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename) |
{ |
764,6 → 856,10
int curbb, prevbb = -1; |
unsigned long addr = -1; |
unsigned long prevaddr = -1; |
int mssum = 0; |
int mlsum = 0; |
int mscnt = 0; |
int mlcnt = 0; |
|
assert (fi = fopen (mp_filename, "rb")); |
assert (fo = fopen (bb_filename, "wb+")); |
805,6 → 901,10
f->bb[curbb].cnt++; |
prevbb = curbb; |
} |
} else { |
if (verify_memoryarea(buf[i].addr)) |
if (buf[i].type & MPROF_WRITE) mscnt++, mssum += cur_area->delayw; |
else mlcnt++, mlsum += cur_area->delayw; |
} |
} |
//printf ("\n"); |
811,6 → 911,9
} while (r == bufsize); |
//printf ("\n"); |
|
runtime.cuc.mdelay[0] = (1. * mlsum) / mlcnt; |
runtime.cuc.mdelay[1] = (1. * mlsum) / mlcnt; |
runtime.cuc.mdelay[2] = runtime.cuc.mdelay[3] = 1; |
f->num_runs = f->bb[0].cnt; |
fclose (fi); |
fclose (fo); |
985,8 → 1088,8
/* repair BB after loop, to point back to latest artificial BB */ |
b1 = n->bb[prevart_b].next[0]; |
if (b1 >= 0) { |
if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = b1; |
else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = b1; |
if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = prevart_b; |
else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = prevart_b; |
else assert (0); |
} |
|
1127,8 → 1230,8
/* repair BB after loop, to point back to latest artificial BB */ |
b1 = n->bb[prevart_b].next[0]; |
if (b1 >= 0) { |
if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = b1; |
else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = b1; |
if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = prevart_b; |
else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = prevart_b; |
else assert (0); |
} |
|
1178,12 → 1281,11
} else n = t; |
} else { |
b1 = b; |
if (unroll > 1) |
n = unroll_loop (f, b1, unroll); |
else n = dup_func (n); |
if (unroll > 1) n = unroll_loop (f, b1, unroll); |
else return dup_func (f); |
} |
|
/* Assign new count to functions */ |
/* Assign new counts to functions */ |
assert (counts = (int *)malloc (sizeof (int) * (preroll - 1 + unroll))); |
count_bb_seq (n, b, bb_filename, counts, preroll, unroll); |
for (i = 0; i < preroll - 1 + unroll; i++) { |
/trunk/or1ksim/cuc/insn.c
21,6 → 21,7
#include <stdlib.h> |
#include <stdarg.h> |
#include <assert.h> |
#include "sim-config.h" |
#include "cuc.h" |
#include "insn.h" |
|
79,6 → 80,511
else return known[ii->index].name; |
} |
|
/* Prints out instructions */ |
void print_insns (cuc_insn *insn, int ninsn, int verbose) |
{ |
int i, j; |
for (i = 0; i < ninsn; i++) { |
dep_list *l = insn[i].dep; |
printf ("%4x%c %-4s ", i, insn[i].index >= 0 ? ':' : '?', cuc_insn_name (&insn[i])); |
if (verbose) { |
printf ("%-20s insn = %08x, index = %i, type = %04x ", |
insn[i].disasm, insn[i].insn, insn[i].index, insn[i].type); |
} else printf ("type = %04x ", insn[i].type); |
for (j = 0; j < MAX_OPERANDS; j++) { |
if (insn[i].opt[j] & OPT_DEST) printf ("*"); |
switch (insn[i].opt[j] & ~OPT_DEST) { |
case OPT_NONE: break; |
case OPT_CONST: printf ("0x%08x, ", insn[i].op[j]); break; |
case OPT_JUMP: printf ("J%x ", insn[i].op[j]); break; |
case OPT_REGISTER: printf ("r%i, ", insn[i].op[j]); break; |
case OPT_REF: printf ("[%x.%x], ", REF_BB(insn[i].op[j]), REF_I(insn[i].op[j])); break; |
case OPT_BB: printf ("BB%x, ", insn[i].op[j]); break; |
case OPT_LRBB: printf ("LRBB, "); break; |
default: |
fprintf (stderr, "Invalid operand type %s(%x.%x) = %x\n", |
cuc_insn_name (&insn[i]), i, j, insn[i].opt[j]); |
assert (0); |
} |
} |
if (l) { |
printf ("\n\tdep:"); |
while (l) { |
printf (" [%x.%x],", REF_BB (l->ref), REF_I (l->ref)); |
l = l->next; |
} |
} |
printf ("\n"); |
} |
} |
|
void add_dep (dep_list **list, int dep) |
{ |
dep_list *ndep; |
dep_list **tmp = list; |
|
while (*tmp) { |
if ((*tmp)->ref == dep) return; /* already there */ |
tmp = &((*tmp)->next); |
} |
ndep = (dep_list *)malloc (sizeof (dep_list)); |
ndep->ref = dep; |
ndep->next = NULL; |
*tmp = ndep; |
} |
|
void dispose_list (dep_list **list) |
{ |
while (*list) { |
dep_list *tmp = *list; |
*list = tmp->next; |
free (tmp); |
} |
} |
|
void add_data_dep (cuc_func *f) |
{ |
int b, i, j; |
dep_list *tmp; |
for (b = 0; b < f->num_bb; b++) { |
cuc_insn *insn = f->bb[b].insn; |
for (i = 0; i < f->bb[b].ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) { |
fflush (stdout); |
if (insn[i].opt[j] & OPT_REF) { |
/* Copy list from predecessor */ |
dep_list *l = f->INSN(insn[i].op[j]).dep; |
while (l) { |
add_dep (&insn[i].dep, l->ref); |
l = l->next; |
} |
/* add predecessor */ |
add_dep (&insn[i].dep, insn[i].op[j]); |
} |
} |
} |
} |
|
/* returns nonzero, if instruction was simplified */ |
int apply_edge_condition (cuc_insn *ii) |
{ |
unsigned int c = ii->op[2]; |
|
if (ii->index == II_AND) { |
if (ii->opt[2] & OPT_CONST && c == 0) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_OR) { |
if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = c; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_SUB) { |
if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_MUL) { |
if (ii->opt[2] & OPT_CONST && c == 0) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} else |
if (ii->opt[2] & OPT_CONST && c == 1) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = c; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} else |
if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { |
change_insn_type (ii, II_SUB); |
ii->op[2] = ii->op[1]; ii->opt[2] = ii->opt[1]; |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_SRL) { |
if (ii->opt[2] & OPT_CONST && c == 0) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = c; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} else if (ii->opt[2] & OPT_CONST && c >= 32) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_SLL) { |
if (ii->opt[2] & OPT_CONST && c == 0) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = c; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} else if (ii->opt[2] & OPT_CONST && c >= 32) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_SRA) { |
if (ii->opt[2] & OPT_CONST && c == 0) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = c; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_CMOV) { |
if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { |
change_insn_type (ii, II_ADD); |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
ii->opt[3] = OPT_NONE; |
return 1; |
} |
} |
return 0; |
} |
|
/* Optimizes dataflow tree */ |
void optimize_tree (cuc_func *f) |
{ |
int b, i, j; |
int modified; |
|
do { |
modified = 0; |
for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) { |
for (i = 0; i < f->bb[b].ninsn; i++) { |
cuc_insn *ii = &f->bb[b].insn[i]; |
/* We tend to have the third parameter const if instruction is cumutative */ |
if ((ii->opt[1] & OPT_CONST) && !(ii->opt[2] & OPT_CONST) |
&& known[ii->index].comutative) { |
unsigned long t = ii->opt[1]; |
ii->opt[1] = ii->opt[2]; |
ii->opt[2] = t; |
t = ii->op[1]; |
ii->op[1] = ii->op[2]; |
ii->op[2] = t; |
modified = 1; cucdebug (2, "%08x:<>\n", REF(b, i)); |
} |
|
/* Try to do the promotion */ |
/* We have two consecutive expressions, containing constants, |
* if previous is a simple expression we can handle it simply: */ |
for (j = 0; j < MAX_OPERANDS; j++) |
if (ii->opt[j] & OPT_REF) { |
cuc_insn *t = &f->INSN(ii->op[j]); |
if (f->INSN(ii->op[j]).index == II_ADD |
&& f->INSN(ii->op[j]).opt[2] & OPT_CONST |
&& f->INSN(ii->op[j]).op[2] == 0 |
&& !(ii->type & IT_MEMORY && t->type & IT_MEMADD) |
&& !(ii->type & IT_BRANCH) && !(t->type & IT_COND)) { |
/* do not promote through add-mem, and branches */ |
modified = 1; cucdebug (2, "%8x:promote%i %8x %8x\n", REF (b, i), j, ii->op[j], t->op[1]); |
ii->op[j] = t->op[1]; ii->opt[j] = t->opt[1]; |
} |
} |
|
/* In case of x = cmov x, y; or x = cmov y, x; we have |
asynchroneous loop -> remove it */ |
if (ii->index == II_CMOV) { |
int f = 0; |
if ((ii->opt[1] & OPT_REF) && ii->op[1] == REF (b, i)) f = 1; |
if ((ii->opt[2] & OPT_REF) && ii->op[2] == REF (b, i)) f = 2; |
if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) f = 2; |
if (f) { |
change_insn_type (ii, II_ADD); |
cucdebug (2, "%8x:cmov %i\n", REF(b, i), f); |
ii->opt[f] = OPT_CONST; |
ii->op[f] = 0; |
ii->opt[3] = OPT_NONE; |
modified = 1; |
continue; |
} |
} |
|
/* Do nothing to volatile instructions */ |
if (ii->type & IT_VOLATILE) continue; |
|
/* Check whether we can simplify the instruction */ |
if (apply_edge_condition (ii)) { |
modified = 1; |
continue; |
} |
/* We cannot do anything more if at least one is not constant */ |
if (!(ii->opt[2] & OPT_CONST)) continue; |
|
if (ii->opt[1] & OPT_CONST) { /* We have constant expression */ |
unsigned long value; |
int ok = 1; |
/* Was constant expression already? */ |
if (ii->index == II_ADD && !ii->op[2]) continue; |
|
if (ii->index == II_ADD) value = ii->op[1] + ii->op[2]; |
else if (ii->index == II_SUB) value = ii->op[1] - ii->op[2]; |
else if (ii->index == II_SLL) value = ii->op[1] << ii->op[2]; |
else if (ii->index == II_SRL) value = ii->op[1] >> ii->op[2]; |
else if (ii->index == II_MUL) value = ii->op[1] * ii->op[2]; |
else if (ii->index == II_OR) value = ii->op[1] | ii->op[2]; |
else if (ii->index == II_XOR) value = ii->op[1] ^ ii->op[2]; |
else if (ii->index == II_AND) value = ii->op[1] & ii->op[2]; |
else ok = 0; |
if (ok) { |
change_insn_type (ii, II_ADD); |
ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; |
ii->op[1] = value; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
modified = 1; cucdebug (2, "%8x:const\n", REF (b, i)); |
} |
} else if (ii->opt[1] & OPT_REF) { |
cuc_insn *prev = &f->INSN(ii->op[1]); |
/* Is this just a link? */ |
if (ii->index == II_ADD |
&& !(ii->type & IT_MEMADD) && ii->op[2] == 0) { |
int b1, i1, j1; |
cucdebug (2, "%8x:link %8x: ", REF(b, i), ii->op[1]); |
for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD)) |
for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) |
for (j1 = 0; j1 < MAX_OPERANDS; j1++) |
if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF) |
&& f->bb[b1].insn[i1].op[j1] == REF(b, i)) { |
cucdebug (2, "%x ", REF (b1, i1)); |
f->bb[b1].insn[i1].op[j1] = ii->op[1]; |
} |
cucdebug (2, "\n"); |
change_insn_type (ii, II_NOP); |
} else if (prev->opt[2] & OPT_CONST) { |
/* Handle some common cases */ |
/* add - add joining */ |
if (ii->index == II_ADD && prev->index == II_ADD) { |
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; |
ii->op[2] += prev->op[2]; |
modified = 1; cucdebug (2, "%8x: add-add\n", REF(b, i)); |
} else /* add - sub joining */ |
if (ii->index == II_ADD && prev->index == II_SUB) { |
change_insn_type (&insn[i], II_SUB); |
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; |
ii->op[2] += prev->op[2]; |
modified = 1; cucdebug (2, "%8x: add-sub\n", REF(b, i)); |
} else /* sub - add joining */ |
if (ii->index == II_SUB && prev->index == II_ADD) { |
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; |
ii->op[2] += prev->op[2]; |
modified = 1; cucdebug (2, "%8x: sub-add\n", REF(b, i)); |
} |
} |
} |
} |
} |
} while (modified); |
} |
|
/* Remove nop instructions */ |
void remove_nops (cuc_func *f) |
{ |
int b; |
for (b = 0; b < f->num_bb; b++) { |
int c, d = 0, i, j; |
cuc_insn *insn = f->bb[b].insn; |
for (i = 0; i < f->bb[b].ninsn; i++) |
if (insn[i].index != II_NOP) { |
reloc [i] = d; |
insn[d++] = insn[i]; |
} else { |
reloc[i] = d; /* For jumps only */ |
} |
f->bb[b].ninsn = d; |
|
/* Relocate references from all basic blocks */ |
for (c = 0; c < f->num_bb; c++) |
for (i = 0; i < f->bb[c].ninsn; i++) { |
dep_list *d = f->bb[c].insn[i].dep; |
for (j = 0; j < MAX_OPERANDS; j++) |
if ((f->bb[c].insn[i].opt[j] & OPT_REF) |
&& REF_BB(f->bb[c].insn[i].op[j]) == b) |
f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]); |
|
while (d) { |
if (REF_BB(d->ref) == b) d->ref = REF (b, reloc[REF_I (d->ref)]); |
d = d->next; |
} |
} |
} |
} |
|
/* Remove unused assignments */ |
void remove_dead (cuc_func *f) |
{ |
int b, i, j; |
for (b = 0; b < f->num_bb; b++) |
for (i = 0; i < f->bb[b].ninsn; i++) |
if (!(f->bb[b].insn[i].type & (IT_VOLATILE | IT_OUTPUT))) |
f->bb[b].insn[i].type |= IT_UNUSED; |
|
for (b = 0; b < f->num_bb; b++) { |
for (i = 0; i < f->bb[b].ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) |
if (f->bb[b].insn[i].opt[j] & OPT_REF) { |
f->INSN(f->bb[b].insn[i].op[j]).type &= ~IT_UNUSED; |
} |
} |
|
for (b = 0; b < f->num_bb; b++) |
for (i = 0; i < f->bb[b].ninsn; i++) |
if (f->bb[b].insn[i].type & IT_UNUSED) { |
change_insn_type (&f->bb[b].insn[i], II_NOP); |
} |
|
remove_nops (f); |
} |
|
/* Removes trivial register assignments */ |
void remove_trivial_regs (cuc_func *f) |
{ |
int b, i; |
for (i = 0; i < MAX_REGS; i++) f->saved_regs[i] = call_saved[i]; |
|
for (b = 0; b < f->num_bb; b++) { |
cuc_insn *insn = f->bb[b].insn; |
for (i = 0; i < f->bb[b].ninsn; i++) { |
if (insn[i].index == II_ADD |
&& insn[i].opt[0] & OPT_REGISTER |
&& insn[i].opt[1] & OPT_REGISTER && insn[i].op[0] == insn[i].op[1] |
&& insn[i].opt[2] & OPT_CONST && insn[i].op[2] == 0) { |
if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1; |
change_insn_type (&insn[i], II_NOP); |
} |
} |
} |
if (cuc_debug >= 2) { |
printf ("saved regs "); |
for (i = 0; i < MAX_REGS; i++) printf ("%i:%i ", i, f->saved_regs[i]); |
printf ("\n"); |
} |
remove_nops (f); |
} |
|
/* Determine inputs and outputs */ |
void set_io (cuc_func *f) |
{ |
int b, i, j; |
/* Determine register usage */ |
for (i = 0; i < MAX_REGS; i++) { |
f->lur[i] = -1; |
f->used_regs[i] = 0; |
} |
for (b = 0; b < f->num_bb; b++) { |
for (i = 0; i < f->bb[b].ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) |
if (f->bb[b].insn[i].opt[j] & OPT_REGISTER && f->bb[b].insn[i].op[j] >= 0) |
if (f->bb[b].insn[i].opt[j] & OPT_DEST) f->lur[f->bb[b].insn[i].op[j]] = REF (b, i); |
else f->used_regs[f->bb[b].insn[i].op[j]] = 1; |
} |
} |
|
/* relocate all accesses inside of BB b to back/fwd */ |
static void relocate_bb (cuc_bb *bb, int b, int back, int fwd) |
{ |
int i, j; |
for (i = 0; i < bb->ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) |
if (bb->insn[i].opt[j] & OPT_REF |
&& REF_BB (bb->insn[i].op[j]) == b) { |
int t = REF_I (bb->insn[i].op[j]); |
if (t < i) bb->insn[i].op[j] = REF (back, t); |
else bb->insn[i].op[j] = REF (fwd, t); |
} |
} |
|
/* Latch outputs in loops */ |
void add_latches (cuc_func *f) |
{ |
int b, i, j; |
|
//print_cuc_bb (f, "ADD_LATCHES a"); |
/* Cuts the tree and marks registers */ |
mark_cut (f); |
|
/* Split BBs with more than one group */ |
for (b = 0; b < f->num_bb; b++) expand_bb (f, b); |
remove_nops (f); |
//print_cuc_bb (f, "ADD_LATCHES 0"); |
|
/* Convert accesses in BB_INLOOP type block to latched */ |
for (b = 0; b < f->num_bb; b++) { |
int j; |
for (i = 0; i < f->bb[b].ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] == OPT_REF) { |
int t = f->bb[b].insn[i].op[j]; |
/* If we are pointing to a INLOOP block from outside, or forward |
(= previous loop iteration) we must register that data */ |
if ((f->bb[REF_BB(t)].type & BB_INLOOP || config.cuc.no_multicycle) |
&& !(f->INSN(t).type & (IT_BRANCH | IT_COND)) |
&& (REF_BB(t) != b || REF_I(t) >= i)) { |
f->INSN(t).type |= IT_LATCHED; |
} |
} |
} |
//print_cuc_bb (f, "ADD_LATCHES 1"); |
|
/* Add latches at the end of blocks as needed */ |
for (b = 0; b < f->num_bb; b++) { |
int nreg = 0; |
cuc_insn *insn; |
for (i = 0; i < f->bb[b].ninsn; i++) |
if (f->bb[b].insn[i].type & IT_LATCHED) nreg++; |
if (nreg) { |
insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (f->bb[b].ninsn + nreg)); |
j = 0; |
for (i = 0; i < f->bb[b].ninsn; i++) { |
insn[i] = f->bb[b].insn[i]; |
if (insn[i].type & IT_LATCHED) { |
cuc_insn *ii = &insn[f->bb[b].ninsn + j++]; |
change_insn_type (ii, II_REG); |
ii->op[0] = -1; ii->opt[0] = OPT_DEST | OPT_REGISTER; |
ii->op[1] = REF (b, i); ii->opt[1] = OPT_REF; |
ii->opt[2] = ii->opt[3] = OPT_NONE; |
ii->dep = NULL; |
ii->type = IT_VOLATILE; |
sprintf (ii->disasm, "reg %i_%i", b, i); |
} |
} |
f->bb[b].ninsn += nreg; |
free (f->bb[b].insn); |
f->bb[b].insn = insn; |
} |
} |
//print_cuc_bb (f, "ADD_LATCHES 2"); |
|
/* Repair references */ |
for (b = 0; b < f->num_bb; b++) |
for (i = 0; i < f->bb[b].ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) |
/* If destination instruction is latched, use register instead */ |
if (f->bb[b].insn[i].opt[j] == OPT_REF |
&& f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) { |
int b1, i1; |
b1 = REF_BB (f->bb[b].insn[i].op[j]); |
//cucdebug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]); |
if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) { |
for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) { |
assert (f->bb[b1].insn[i1].index == II_REG); |
if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) { |
f->bb[b].insn[i].op[j] = REF (b1, i1); |
break; |
} |
} |
} |
} |
} |
|
/* CSE -- common subexpression elimination */ |
void cse (cuc_func *f) |
{ |
131,16 → 637,19
return c; |
} |
|
static void search_csm (int iter, cuc_func *f, cuc_shared *list); |
static cuc_shared *main_list; |
static void search_csm (int iter, cuc_func *f, cuc_shared_list *list); |
static cuc_shared_list *main_list; |
static int *iteration; |
|
/* CSM -- common subexpression matching -- resource sharing */ |
/* CSM -- common subexpression matching -- resource sharing |
We try to match tree of instruction inside a BB with as many |
matches as possible. All possibilities are collected and |
options, making situation worse are removed */ |
void csm (cuc_func *f) |
{ |
int b, i, j; |
int cnt; |
cuc_shared *list; |
cuc_shared_list *list; |
cuc_timings timings; |
|
analyse_timings (f, &timings); |
170,7 → 679,7
iteration[j] = 0; |
} else iteration[j] = -1; |
if (cntc > 1) { |
assert (list = (cuc_shared *)malloc (sizeof (cuc_shared))); |
assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); |
list->next = main_list; |
list->from = NULL; |
list->ref = REF (b, i); |
183,7 → 692,7
search_csm (0, f, list); |
} |
if (cnt > 1) { |
assert (list = (cuc_shared *)malloc (sizeof (cuc_shared))); |
assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); |
list->next = main_list; |
list->from = NULL; |
list->ref = REF (b, i); |
215,7 → 724,7
|
/* Count number of instructions grouped */ |
for (list = main_list; list; list = list->next) { |
cuc_shared *l = list; |
cuc_shared_list *l = list; |
int c = 0; |
while (l) { |
c++; |
233,11 → 742,11
#if 1 |
/* We can get a lot of options here, so we will delete duplicates */ |
for (list = main_list; list; list = list->next) if (!list->dead) { |
cuc_shared *l; |
cuc_shared_list *l; |
for (l = list->next; l; l = l->next) if (!l->dead) { |
int ok = 1; |
cuc_shared *t1 = list; |
cuc_shared *t2 = l; |
cuc_shared_list *t1 = list; |
cuc_shared_list *t2 = l; |
while (ok && t1 && t2) { |
if (f->INSN(t1->ref).index == f->INSN(t2->ref).index) { |
/* If other operands are matching, we must check for them also */ |
266,7 → 775,7
#endif |
/* Print out */ |
for (list = main_list; list; list = list->next) if (!list->dead) { |
cuc_shared *l = list; |
cuc_shared_list *l = list; |
cucdebug (1, "%-4s cnt %3i ninsn %3i size %8.1f osize %8.1f cmovs %3i @", |
cuc_insn_name (&f->INSN(list->ref)), list->cnt, list->ninsn, |
list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size, list->osize, list->cmovs); |
292,14 → 801,19
|
cnt = 0; |
for (list = main_list; list; list = list->next) if (!list->dead && REF_BB(list->ref) == b) { |
cuc_shared *l = list; |
cuc_shared_list *l = list; |
f->bb[b].tim[cnt].b = b; |
f->bb[b].tim[cnt].preroll = f->bb[b].tim[cnt].unroll = 1; |
f->bb[b].tim[cnt].nshared = list->ninsn; |
assert (f->bb[b].tim[cnt].shared = (int *) malloc (sizeof(int) * list->ninsn)); |
for (i = 0; i < list->ninsn; i++, l = l->from) f->bb[b].tim[cnt].shared[i] = l->ref; |
assert (f->bb[b].tim[cnt].shared = (cuc_shared_item *) |
malloc (sizeof(cuc_shared_item) * list->ninsn)); |
for (i = 0; i < list->ninsn; i++, l = l->from) { |
f->bb[b].tim[cnt].shared[i].ref = l->ref; |
f->bb[b].tim[cnt].shared[i].cmatch = l->cmatch; |
} |
f->bb[b].tim[cnt].new_time = timings.new_time + f->bb[b].cnt * (list->cnt - 1); |
f->bb[b].tim[cnt].size = timings.size + list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size - list->osize; |
f->bb[b].tim[cnt].size = timings.size + |
list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size - list->osize; |
cnt++; |
} |
} |
306,10 → 820,10
} |
|
/* Recursive function for searching through instruction graph */ |
static void search_csm (int iter, cuc_func *f, cuc_shared *list) |
static void search_csm (int iter, cuc_func *f, cuc_shared_list *list) |
{ |
int b, i, j, i1; |
cuc_shared *l; |
cuc_shared_list *l; |
b = REF_BB(list->ref); |
i = REF_I(list->ref); |
|
344,7 → 858,7
} |
|
if (cntc > 1) { |
assert (l = (cuc_shared *)malloc (sizeof (cuc_shared))); |
assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); |
l->next = main_list; |
main_list = l; |
l->from = list; |
357,7 → 871,7
search_csm (iter + 1, f, l); |
} |
if (cnt > 1) { |
assert (l = (cuc_shared *)malloc (sizeof (cuc_shared))); |
assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); |
l->next = main_list; |
main_list = l; |
l->from = list; |
375,3 → 889,33
} |
} |
|
/* Displays shared instructions */ |
void print_shared (cuc_func *rf, cuc_shared_item *shared, int nshared) |
{ |
int i, first = 1; |
for (i = 0; i < nshared; i++) { |
printf ("%s%s%s", first ? "" : "-", cuc_insn_name (rf->INSN(shared[i].ref).index), |
shared[i].cmatch ? "!" : ""); |
first = 0; |
} |
} |
|
/* Common subexpression matching -- resource sharing, generation pass |
|
Situation here is much simpler than with analysis -- we know the instruction sequence |
we are going to share, but we are going to do this on whole function, not just one BB. |
We can find sequence in reference function, as pointed from "shared" */ |
void csm_gen (cuc_func *f, cuc_func *rf, cuc_shared_item *shared, int nshared) |
{ |
int b, i, j, cnt = 0; |
#warning some code here (2) |
printf ("Replacing: "); |
print_shared (rf, shared, nshared); |
|
for (b = 0; b < f->num_bb; b++) |
for (i = 0; i < f->bb[b].ninsn; i++) { |
} |
|
printf ("\nFound %i matches.\n", cnt); |
} |
|
/trunk/or1ksim/cuc/memory.c
21,6 → 21,7
#include <stdlib.h> |
#include <stdarg.h> |
#include <assert.h> |
#include "sim-config.h" |
#include "cuc.h" |
#include "insn.h" |
|
29,10 → 30,10
static int check_memory_conflict (cuc_func *f, cuc_insn *a, cuc_insn *b, int otype) |
{ |
switch (otype) { |
case 0: /* exact */ |
case 1: /* strong */ |
case MO_EXACT: /* exact */ |
case MO_STRONG: /* strong */ |
return 1; |
case 2: /* weak */ |
case MO_WEAK: /* weak */ |
assert (a->type & IT_MEMORY); |
assert (b->type & IT_MEMORY); |
if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD |
51,7 → 52,7
if (b->op[2] >= a->op[2] && b->op[2] < a->op[2] + aw) return 1; |
return 0; |
} else return 1; |
case 3: /* none */ |
case MO_NONE: /* none */ |
return 0; |
default: |
assert (0); |
112,7 → 113,7
for (i = 0; i < f->bb[b].ninsn; i++) |
if (insn[i].type & IT_MEMORY) { |
f->msched[f->nmsched++] = REF (b, i); |
if (otype == 2 || otype == 3) insn[i].type |= IT_FLAG1; /* mark unscheduled */ |
if (otype == MO_NONE || otype == MO_WEAK) insn[i].type |= IT_FLAG1; /* mark unscheduled */ |
} |
} |
#if 0 |
124,7 → 125,7
/* We can reorder just more loose types |
We assume, that memory accesses are currently in valid (but not neccesserly) |
optimal order */ |
if (otype == 2 || otype == 3) { |
if (otype == MO_WEAK || otype == MO_NONE) { |
for (i = 0; i < f->nmsched; i++) { |
int best = i; |
int tmp; |
164,7 → 165,7
if (a->type & IT_SIGNED) f->mtype[i] |= MT_SIGNED; |
} |
|
if (enable_bursts) { |
if (config.cuc.enable_bursts) { |
//printf ("\n"); |
for (i = 1; i < f->nmsched; i++) { |
cuc_insn *a = &f->INSN(f->msched[i - 1]); |
/trunk/or1ksim/cuc/cuc.c
28,6 → 28,7
#include "insn.h" |
#include "profiler.h" |
#include "opcode/or32.h" |
#include "parse.h" |
|
FILE *flog; |
int cuc_debug = 0; |
40,576 → 41,6
0, 1, 0, 1, 0, 1, 0, 1, |
1, 1}; |
|
/* Prints out instructions */ |
void print_insns (cuc_insn *insn, int ninsn, int verbose) |
{ |
int i, j; |
for (i = 0; i < ninsn; i++) { |
dep_list *l = insn[i].dep; |
printf ("%4x%c %-4s ", i, insn[i].index >= 0 ? ':' : '?', cuc_insn_name (&insn[i])); |
if (verbose) { |
printf ("%-20s insn = %08x, index = %i, type = %04x ", |
insn[i].disasm, insn[i].insn, insn[i].index, insn[i].type); |
} else printf ("type = %04x ", insn[i].type); |
for (j = 0; j < MAX_OPERANDS; j++) { |
if (insn[i].opt[j] & OPT_DEST) printf ("*"); |
switch (insn[i].opt[j] & ~OPT_DEST) { |
case OPT_NONE: break; |
case OPT_CONST: printf ("0x%08x, ", insn[i].op[j]); break; |
case OPT_JUMP: printf ("J%x ", insn[i].op[j]); break; |
case OPT_REGISTER: printf ("r%i, ", insn[i].op[j]); break; |
case OPT_REF: printf ("[%x.%x], ", REF_BB(insn[i].op[j]), REF_I(insn[i].op[j])); break; |
case OPT_BB: printf ("BB%x, ", insn[i].op[j]); break; |
case OPT_LRBB: printf ("LRBB, "); break; |
default: |
fprintf (stderr, "Invalid operand type %s(%x.%x) = %x\n", |
cuc_insn_name (&insn[i]), i, j, insn[i].opt[j]); |
assert (0); |
} |
} |
if (l) { |
printf ("\n\tdep:"); |
while (l) { |
printf (" [%x.%x],", REF_BB (l->ref), REF_I (l->ref)); |
l = l->next; |
} |
} |
printf ("\n"); |
} |
} |
|
void add_dep (dep_list **list, int dep) |
{ |
dep_list *ndep; |
dep_list **tmp = list; |
|
while (*tmp) { |
if ((*tmp)->ref == dep) return; /* already there */ |
tmp = &((*tmp)->next); |
} |
ndep = (dep_list *)malloc (sizeof (dep_list)); |
ndep->ref = dep; |
ndep->next = NULL; |
*tmp = ndep; |
} |
|
void dispose_list (dep_list **list) |
{ |
while (*list) { |
dep_list *tmp = *list; |
*list = tmp->next; |
free (tmp); |
} |
} |
|
void add_data_dep (cuc_func *f) |
{ |
int b, i, j; |
dep_list *tmp; |
for (b = 0; b < f->num_bb; b++) { |
cuc_insn *insn = f->bb[b].insn; |
for (i = 0; i < f->bb[b].ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) { |
fflush (stdout); |
if (insn[i].opt[j] & OPT_REF) { |
/* Copy list from predecessor */ |
dep_list *l = f->INSN(insn[i].op[j]).dep; |
while (l) { |
add_dep (&insn[i].dep, l->ref); |
l = l->next; |
} |
/* add predecessor */ |
add_dep (&insn[i].dep, insn[i].op[j]); |
} |
} |
} |
} |
|
/* returns nonzero, if instruction was simplified */ |
int apply_edge_condition (cuc_insn *ii) |
{ |
unsigned int c = ii->op[2]; |
|
if (ii->index == II_AND) { |
if (ii->opt[2] & OPT_CONST && c == 0) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_OR) { |
if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = c; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_SUB) { |
if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_MUL) { |
if (ii->opt[2] & OPT_CONST && c == 0) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} else |
if (ii->opt[2] & OPT_CONST && c == 1) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = c; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} else |
if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { |
change_insn_type (ii, II_SUB); |
ii->op[2] = ii->op[1]; ii->opt[2] = ii->opt[1]; |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_SRL) { |
if (ii->opt[2] & OPT_CONST && c == 0) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = c; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} else if (ii->opt[2] & OPT_CONST && c >= 32) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_SLL) { |
if (ii->opt[2] & OPT_CONST && c == 0) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = c; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} else if (ii->opt[2] & OPT_CONST && c >= 32) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = 0; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_SRA) { |
if (ii->opt[2] & OPT_CONST && c == 0) { |
change_insn_type (ii, II_ADD); |
ii->op[1] = c; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
return 1; |
} |
} else if (ii->index == II_CMOV) { |
if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { |
change_insn_type (ii, II_ADD); |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
ii->opt[3] = OPT_NONE; |
return 1; |
} |
} |
return 0; |
} |
|
/* Optimizes dataflow tree */ |
void optimize_tree (cuc_func *f) |
{ |
int b, i, j; |
int modified; |
|
do { |
modified = 0; |
for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) { |
for (i = 0; i < f->bb[b].ninsn; i++) { |
cuc_insn *ii = &f->bb[b].insn[i]; |
/* We tend to have the third parameter const if instruction is cumutative */ |
if ((ii->opt[1] & OPT_CONST) && !(ii->opt[2] & OPT_CONST) |
&& known[ii->index].comutative) { |
unsigned long t = ii->opt[1]; |
ii->opt[1] = ii->opt[2]; |
ii->opt[2] = t; |
t = ii->op[1]; |
ii->op[1] = ii->op[2]; |
ii->op[2] = t; |
modified = 1; cucdebug (2, "%08x:<>\n", REF(b, i)); |
} |
|
/* Try to do the promotion */ |
/* We have two consecutive expressions, containing constants, |
* if previous is a simple expression we can handle it simply: */ |
for (j = 0; j < MAX_OPERANDS; j++) |
if (ii->opt[j] & OPT_REF) { |
cuc_insn *t = &f->INSN(ii->op[j]); |
if (f->INSN(ii->op[j]).index == II_ADD |
&& f->INSN(ii->op[j]).opt[2] & OPT_CONST |
&& f->INSN(ii->op[j]).op[2] == 0 |
&& !(ii->type & IT_MEMORY && t->type & IT_MEMADD) |
&& !(ii->type & IT_BRANCH) && !(t->type & IT_COND)) { |
/* do not promote through add-mem, and branches */ |
modified = 1; cucdebug (2, "%8x:promote%i %8x %8x\n", REF (b, i), j, ii->op[j], t->op[1]); |
ii->op[j] = t->op[1]; ii->opt[j] = t->opt[1]; |
} |
} |
|
/* In case of x = cmov x, y; or x = cmov y, x; we have |
asynchroneous loop -> remove it */ |
if (ii->index == II_CMOV) { |
int f = 0; |
if ((ii->opt[1] & OPT_REF) && ii->op[1] == REF (b, i)) f = 1; |
if ((ii->opt[2] & OPT_REF) && ii->op[2] == REF (b, i)) f = 2; |
if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) f = 2; |
if (f) { |
change_insn_type (ii, II_ADD); |
cucdebug (2, "%8x:cmov %i\n", REF(b, i), f); |
ii->opt[f] = OPT_CONST; |
ii->op[f] = 0; |
ii->opt[3] = OPT_NONE; |
modified = 1; |
continue; |
} |
} |
|
/* Do nothing to volatile instructions */ |
if (ii->type & IT_VOLATILE) continue; |
|
/* Check whether we can simplify the instruction */ |
if (apply_edge_condition (ii)) { |
modified = 1; |
continue; |
} |
/* We cannot do anything more if at least one is not constant */ |
if (!(ii->opt[2] & OPT_CONST)) continue; |
|
if (ii->opt[1] & OPT_CONST) { /* We have constant expression */ |
unsigned long value; |
int ok = 1; |
/* Was constant expression already? */ |
if (ii->index == II_ADD && !ii->op[2]) continue; |
|
if (ii->index == II_ADD) value = ii->op[1] + ii->op[2]; |
else if (ii->index == II_SUB) value = ii->op[1] - ii->op[2]; |
else if (ii->index == II_SLL) value = ii->op[1] << ii->op[2]; |
else if (ii->index == II_SRL) value = ii->op[1] >> ii->op[2]; |
else if (ii->index == II_MUL) value = ii->op[1] * ii->op[2]; |
else if (ii->index == II_OR) value = ii->op[1] | ii->op[2]; |
else if (ii->index == II_XOR) value = ii->op[1] ^ ii->op[2]; |
else if (ii->index == II_AND) value = ii->op[1] & ii->op[2]; |
else ok = 0; |
if (ok) { |
change_insn_type (ii, II_ADD); |
ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; |
ii->op[1] = value; ii->opt[1] = OPT_CONST; |
ii->op[2] = 0; ii->opt[2] = OPT_CONST; |
modified = 1; cucdebug (2, "%8x:const\n", REF (b, i)); |
} |
} else if (ii->opt[1] & OPT_REF) { |
cuc_insn *prev = &f->INSN(ii->op[1]); |
/* Is this just a link? */ |
if (ii->index == II_ADD |
&& !(ii->type & IT_MEMADD) && ii->op[2] == 0) { |
int b1, i1, j1; |
cucdebug (2, "%8x:link %8x: ", REF(b, i), ii->op[1]); |
for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD)) |
for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) |
for (j1 = 0; j1 < MAX_OPERANDS; j1++) |
if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF) |
&& f->bb[b1].insn[i1].op[j1] == REF(b, i)) { |
cucdebug (2, "%x ", REF (b1, i1)); |
f->bb[b1].insn[i1].op[j1] = ii->op[1]; |
} |
cucdebug (2, "\n"); |
change_insn_type (ii, II_NOP); |
} else if (prev->opt[2] & OPT_CONST) { |
/* Handle some common cases */ |
/* add - add joining */ |
if (ii->index == II_ADD && prev->index == II_ADD) { |
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; |
ii->op[2] += prev->op[2]; |
modified = 1; cucdebug (2, "%8x: add-add\n", REF(b, i)); |
} else /* add - sub joining */ |
if (ii->index == II_ADD && prev->index == II_SUB) { |
change_insn_type (&insn[i], II_SUB); |
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; |
ii->op[2] += prev->op[2]; |
modified = 1; cucdebug (2, "%8x: add-sub\n", REF(b, i)); |
} else /* sub - add joining */ |
if (ii->index == II_SUB && prev->index == II_ADD) { |
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; |
ii->op[2] += prev->op[2]; |
modified = 1; cucdebug (2, "%8x: sub-add\n", REF(b, i)); |
} |
} |
} |
} |
} |
} while (modified); |
} |
|
/* Remove nop instructions */ |
void remove_nops (cuc_func *f) |
{ |
int b; |
for (b = 0; b < f->num_bb; b++) { |
int c, d = 0, i, j; |
cuc_insn *insn = f->bb[b].insn; |
for (i = 0; i < f->bb[b].ninsn; i++) |
if (insn[i].index != II_NOP) { |
reloc [i] = d; |
insn[d++] = insn[i]; |
} else { |
reloc[i] = d; /* For jumps only */ |
} |
f->bb[b].ninsn = d; |
|
/* Relocate references from all basic blocks */ |
for (c = 0; c < f->num_bb; c++) |
for (i = 0; i < f->bb[c].ninsn; i++) { |
dep_list *d = f->bb[c].insn[i].dep; |
for (j = 0; j < MAX_OPERANDS; j++) |
if ((f->bb[c].insn[i].opt[j] & OPT_REF) |
&& REF_BB(f->bb[c].insn[i].op[j]) == b) |
f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]); |
|
while (d) { |
if (REF_BB(d->ref) == b) d->ref = REF (b, reloc[REF_I (d->ref)]); |
d = d->next; |
} |
} |
} |
} |
|
/* Remove unused assignments */ |
void remove_dead (cuc_func *f) |
{ |
int b, i, j; |
for (b = 0; b < f->num_bb; b++) |
for (i = 0; i < f->bb[b].ninsn; i++) |
if (!(f->bb[b].insn[i].type & (IT_VOLATILE | IT_OUTPUT))) |
f->bb[b].insn[i].type |= IT_UNUSED; |
|
for (b = 0; b < f->num_bb; b++) { |
for (i = 0; i < f->bb[b].ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) |
if (f->bb[b].insn[i].opt[j] & OPT_REF) { |
f->INSN(f->bb[b].insn[i].op[j]).type &= ~IT_UNUSED; |
} |
} |
|
for (b = 0; b < f->num_bb; b++) |
for (i = 0; i < f->bb[b].ninsn; i++) |
if (f->bb[b].insn[i].type & IT_UNUSED) { |
change_insn_type (&f->bb[b].insn[i], II_NOP); |
} |
|
remove_nops (f); |
} |
|
/* Removes trivial register assignments */ |
void remove_trivial_regs (cuc_func *f) |
{ |
int b, i; |
for (i = 0; i < MAX_REGS; i++) f->saved_regs[i] = call_saved[i]; |
|
for (b = 0; b < f->num_bb; b++) { |
cuc_insn *insn = f->bb[b].insn; |
for (i = 0; i < f->bb[b].ninsn; i++) { |
if (insn[i].index == II_ADD |
&& insn[i].opt[0] & OPT_REGISTER |
&& insn[i].opt[1] & OPT_REGISTER && insn[i].op[0] == insn[i].op[1] |
&& insn[i].opt[2] & OPT_CONST && insn[i].op[2] == 0) { |
if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1; |
change_insn_type (&insn[i], II_NOP); |
} |
} |
} |
if (cuc_debug >= 2) { |
printf ("saved regs "); |
for (i = 0; i < MAX_REGS; i++) printf ("%i:%i ", i, f->saved_regs[i]); |
printf ("\n"); |
} |
remove_nops (f); |
} |
|
/* Determine inputs and outputs */ |
void set_io (cuc_func *f) |
{ |
int b, i, j; |
/* Determine register usage */ |
for (i = 0; i < MAX_REGS; i++) { |
f->lur[i] = -1; |
f->used_regs[i] = 0; |
} |
for (b = 0; b < f->num_bb; b++) { |
for (i = 0; i < f->bb[b].ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) |
if (f->bb[b].insn[i].opt[j] & OPT_REGISTER && f->bb[b].insn[i].op[j] >= 0) |
if (f->bb[b].insn[i].opt[j] & OPT_DEST) f->lur[f->bb[b].insn[i].op[j]] = REF (b, i); |
else f->used_regs[f->bb[b].insn[i].op[j]] = 1; |
} |
} |
|
/* relocate all accesses inside of BB b to back/fwd */ |
static void relocate_bb (cuc_bb *bb, int b, int back, int fwd) |
{ |
int i, j; |
for (i = 0; i < bb->ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) |
if (bb->insn[i].opt[j] & OPT_REF |
&& REF_BB (bb->insn[i].op[j]) == b) { |
int t = REF_I (bb->insn[i].op[j]); |
if (t < i) bb->insn[i].op[j] = REF (back, t); |
else bb->insn[i].op[j] = REF (fwd, t); |
} |
} |
|
/* split the BB, based on the group numbers in .tmp */ |
void expand_bb (cuc_func *f, int b) |
{ |
int n = f->num_bb; |
int mg = 0; |
int b1, i, j; |
|
for (i = 0; i < f->bb[b].ninsn; i++) |
if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp; |
|
/* Create copies */ |
for (b1 = 1; b1 <= mg; b1++) { |
assert (f->num_bb < MAX_BB); |
cpy_bb (&f->bb[f->num_bb], &f->bb[b]); |
f->num_bb++; |
} |
|
/* Relocate */ |
for (b1 = 0; b1 < f->num_bb; b1++) |
for (i = 0; i < f->bb[b1].ninsn; i++) { |
dep_list *d = f->bb[b1].insn[i].dep; |
for (j = 0; j < MAX_OPERANDS; j++) |
if (f->bb[b1].insn[i].opt[j] & OPT_REF) { |
int t = f->bb[b1].insn[i].op[j]; |
if (REF_BB(t) == b && f->INSN(t).tmp != 0) |
f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t)); |
} |
while (d) { |
if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0) |
d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref)); |
d = d->next; |
} |
} |
|
/* Delete unused instructions */ |
for (j = 0; j <= mg; j++) { |
if (j == 0) b1 = b; |
else b1 = n + j - 1; |
for (i = 0; i < f->bb[b1].ninsn; i++) { |
if (f->bb[b1].insn[i].tmp != j) |
change_insn_type (&f->bb[b1].insn[i], II_NOP); |
f->bb[b1].insn[i].tmp = 0; |
} |
if (j < mg) { |
f->bb[b1].next[0] = n + j; |
f->bb[b1].next[1] = -1; |
f->bb[n + j].prev[0] = b1; |
f->bb[n + j].prev[1] = -1; |
} else { |
i = f->bb[b1].next[0]; |
f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1; |
f->bb[n + j].prev[1] = -1; |
if (i >= 0) { |
if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; |
if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; |
} |
i = f->bb[b1].next[1]; |
if (i >= 0) { |
if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; |
if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; |
} |
} |
} |
} |
|
/* Latch outputs in loops */ |
void add_latches (cuc_func *f) |
{ |
int b, i, j; |
|
//print_cuc_bb (f, "ADD_LATCHES a"); |
/* Cuts the tree and marks registers */ |
mark_cut (f); |
|
/* Split BBs with more than one group */ |
for (b = 0; b < f->num_bb; b++) expand_bb (f, b); |
remove_nops (f); |
//print_cuc_bb (f, "ADD_LATCHES 0"); |
|
/* Convert accesses in BB_INLOOP type block to latched */ |
for (b = 0; b < f->num_bb; b++) { |
int j; |
for (i = 0; i < f->bb[b].ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] == OPT_REF) { |
int t = f->bb[b].insn[i].op[j]; |
/* If we are pointing to a INLOOP block from outside, or forward |
(= previous loop iteration) we must register that data */ |
if ((f->bb[REF_BB(t)].type & BB_INLOOP || no_multicycle) |
&& !(f->INSN(t).type & (IT_BRANCH | IT_COND)) |
&& (REF_BB(t) != b || REF_I(t) >= i)) { |
f->INSN(t).type |= IT_LATCHED; |
} |
} |
} |
//print_cuc_bb (f, "ADD_LATCHES 1"); |
|
/* Add latches at the end of blocks as needed */ |
for (b = 0; b < f->num_bb; b++) { |
int nreg = 0; |
cuc_insn *insn; |
for (i = 0; i < f->bb[b].ninsn; i++) |
if (f->bb[b].insn[i].type & IT_LATCHED) nreg++; |
if (nreg) { |
insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (f->bb[b].ninsn + nreg)); |
j = 0; |
for (i = 0; i < f->bb[b].ninsn; i++) { |
insn[i] = f->bb[b].insn[i]; |
if (insn[i].type & IT_LATCHED) { |
cuc_insn *ii = &insn[f->bb[b].ninsn + j++]; |
change_insn_type (ii, II_REG); |
ii->op[0] = -1; ii->opt[0] = OPT_DEST | OPT_REGISTER; |
ii->op[1] = REF (b, i); ii->opt[1] = OPT_REF; |
ii->opt[2] = ii->opt[3] = OPT_NONE; |
ii->dep = NULL; |
ii->type = IT_VOLATILE; |
sprintf (ii->disasm, "reg %i_%i", b, i); |
} |
} |
f->bb[b].ninsn += nreg; |
free (f->bb[b].insn); |
f->bb[b].insn = insn; |
} |
} |
//print_cuc_bb (f, "ADD_LATCHES 2"); |
|
/* Repair references */ |
for (b = 0; b < f->num_bb; b++) |
for (i = 0; i < f->bb[b].ninsn; i++) |
for (j = 0; j < MAX_OPERANDS; j++) |
/* If destination instruction is latched, use register instead */ |
if (f->bb[b].insn[i].opt[j] == OPT_REF |
&& f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) { |
int b1, i1; |
b1 = REF_BB (f->bb[b].insn[i].op[j]); |
//cucdebug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]); |
if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) { |
for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) { |
assert (f->bb[b1].insn[i1].index == II_REG); |
if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) { |
f->bb[b].insn[i].op[j] = REF (b1, i1); |
break; |
} |
} |
} |
} |
} |
|
cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j) |
{ |
cuc_func *func; |
640,11 → 71,11
add_latches (func); |
if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES"); |
set_io (func); |
add_memory_dep (func, memory_order); |
add_memory_dep (func, func->memory_order); |
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP"); |
add_data_dep (func); |
if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP"); |
schedule_memory (func, memory_order); |
schedule_memory (func, func->memory_order); |
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM"); |
|
analyse_timings (func, timings); |
669,7 → 100,8
} |
|
cuc_func *analyse_function (char *module_name, long orig_time, |
unsigned long start_addr, unsigned long end_addr) |
unsigned long start_addr, unsigned long end_addr, |
int memory_order) |
{ |
cuc_timings timings; |
cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func)); |
681,10 → 113,14
func->orig_time = orig_time; |
func->start_addr = start_addr; |
func->end_addr = end_addr; |
func->memory_order = memory_order; |
|
sprintf (tmp1, "%s.bin", module_name); |
cucdebug (2, "Loading %s.bin\n", module_name); |
cuc_load (tmp1); |
if (cuc_load (tmp1)) { |
free (func); |
return NULL; |
} |
|
log ("Detecting basic blocks\n"); |
detect_bb (func); |
693,6 → 129,8
//sprintf (tmp1, "%s.bin.mp", module_name); |
sprintf (tmp2, "%s.bin.bb", module_name); |
generate_bb_seq (func, config.sim.mprof_fn, tmp2); |
log ("Assuming %i clk cycle load (%i cyc burst)\n", runtime.cuc.mdelay[0], runtime.cuc.mdelay[2]); |
log ("Assuming %i clk cycle store (%i cyc burst)\n", runtime.cuc.mdelay[1], runtime.cuc.mdelay[3]); |
|
build_bb (func); |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_BUILD_BB"); |
721,7 → 159,9
remove_trivial_regs (func); |
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL"); |
|
#if 0 |
csm (func); |
#endif |
assert (saved = dup_func (func)); |
|
timings.preroll = timings.unroll = 1; |
731,7 → 171,7
|
if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES"); |
analyse_timings (func, &timings); |
add_memory_dep (func, memory_order); |
add_memory_dep (func, func->memory_order); |
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP"); |
add_data_dep (func); |
if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP"); |
752,6 → 192,7
cuc_timings *cut = &t[0]; |
int nt = 1; |
double csize; |
saved->bb[b].selected_tim = -1; |
|
/* Is it a loop? */ |
if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue; |
849,22 → 290,136
return saved; |
} |
|
void options_cmd (cuc_func *f, char *name) |
/* Utility option formatting functions */ |
static const char *option_char = "?abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
|
/*static */char *gen_option (char *s, int bb_no, int f_opt) |
{ |
if (bb_no >= 0) sprintf (s, "%i", bb_no); |
assert (f_opt <= strlen (option_char)); |
sprintf (s, "%s%c", s, option_char[f_opt]); |
return s; |
} |
|
/*static */void print_option (int bb_no, int f_opt) |
{ |
char tmp1[10]; |
char tmp2[10]; |
sprintf (tmp2, "%s", gen_option (tmp1, bb_no, f_opt)); |
printf ("%3s", tmp2); |
} |
|
static char *format_func_options (char *s, cuc_func *f) |
{ |
int b, first = 1; |
*s = '\0'; |
for (b = 0; b < f->num_bb; b++) |
if (f->bb[b].selected_tim >= 0) { |
char tmp[10]; |
sprintf (s, "%s%s%s", s, first ? "" : ",", gen_option (tmp, b, f->bb[b].selected_tim)); |
first = 0; |
} |
return s; |
} |
|
static void options_cmd (int func_no, cuc_func *f) |
{ |
int b, i; |
printf ("%-12s :pre%i,un%i,sha%i: time = %i cyc; size = %.f gates (old time = %i)\n", name, |
f->timings.preroll, f->timings.unroll, f->timings.nshared, |
f->timings.new_time, f->timings.size, f->orig_time); |
char tmp[20]; |
char *name = prof_func[func_no].name; |
printf ("--------------------------------------------------------\n"); |
printf ("|%-16s|pre/unrolled|shared| time | gates | old_time = %i \n", |
strstrip (tmp, name, 16), f->orig_time); |
printf ("| BASE |%4i / %4i | %4i |%8i|%8.f|\n", 1, 1, 0, |
f->timings.new_time, f->timings.size); |
for (b = 0; b < f->num_bb; b++) { |
/* Print out results */ |
for (i = 0; i < f->bb[b].ntim; i++) { |
printf ("%-12sBB%-2i:pre%i,un%i,sha%i: time = %i cyc; size = %.f gates\n", name, |
f->bb[b].tim[i].b, f->bb[b].tim[i].preroll, f->bb[b].tim[i].unroll, |
f->bb[b].tim[i].nshared, f->bb[b].tim[i].new_time, f->bb[b].tim[i].size); |
for (i = 1; i < f->bb[b].ntim; i++) { /* First one is base option */ |
int time = f->bb[b].tim[i].new_time - f->timings.new_time; |
double size = f->bb[b].tim[i].size - f->timings.size; |
printf ("| "); |
print_option (b, i); |
printf (" |%4i / %4i | %4i |%+8i|%+8.f|\n", |
f->bb[b].tim[i].preroll, f->bb[b].tim[i].unroll, f->bb[b].tim[i].nshared, |
time, size); |
} |
} |
} |
|
/* Generates a function, based on specified parameters */ |
cuc_func *generate_function (cuc_func *rf, char *name) |
{ |
int b, i, j; |
char tmp[256]; |
cuc_timings tt; |
cuc_func *f; |
assert (f = dup_func (rf)); |
|
log ("Generating function %s.\n", name); |
printf ("Generating function %s.\n", name); |
|
print_cuc_bb (f, "BEFORE_GENERATE"); |
add_latches (f); |
set_io (f); |
if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_LATCHES"); |
|
format_func_options (tmp, rf); |
if (strlen (tmp)) printf ("Applying options: %s\n", tmp); |
else printf ("Basic options.\n"); |
|
/* Generate function as specified by options */ |
for (b = 0; b < f->num_bb; b++) { |
cuc_timings *st; |
if (rf->bb[b].selected_tim < 0) continue; |
st = &rf->bb[b].tim[rf->bb[b].selected_tim]; |
sprintf (tmp, "%s.bin.bb", name); |
preunroll_bb (&tmp[0], f, &tt, b, st->preroll, st->unroll); |
if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_PREUNROLL"); |
} |
for (b = 0; b < f->num_bb; b++) { |
cuc_timings *st; |
if (rf->bb[b].selected_tim < 0) continue; |
st = &rf->bb[b].tim[rf->bb[b].selected_tim]; |
if (!st->nshared) continue; |
assert (0); |
//csm_gen (f, rf, st->nshared, st->shared); |
} |
analyse_timings (f, &tt); |
add_memory_dep (f, f->memory_order); |
if (cuc_debug >= 7) print_cuc_bb (f, "AFTER_MEMORY_DEP"); |
add_data_dep (f); |
if (cuc_debug >= 8) print_cuc_bb (f, "AFTER_DATA_DEP"); |
schedule_memory (f, f->memory_order); |
if (cuc_debug >= 7) print_cuc_bb (f, "AFTER_SCHEDULE_MEM"); |
output_verilog (f, name); |
return f; |
} |
|
/* Calculates required time, based on selected options */ |
int calc_cycles (cuc_func *f) |
{ |
int b, i, ntime = f->timings.new_time; |
for (b = 0; b < f->num_bb; b++) |
if (f->bb[b].selected_tim >= 0) { |
assert (f->bb[b].selected_tim < f->bb[b].ntim); |
ntime += f->bb[b].tim[f->bb[b].selected_tim].new_time - f->timings.new_time; |
} |
return ntime; |
} |
|
/* Calculates required size, based on selected options */ |
double calc_size (cuc_func *f) |
{ |
int b, i; |
double size = f->timings.size; |
for (b = 0; b < f->num_bb; b++) |
if (f->bb[b].selected_tim >= 0) { |
assert (f->bb[b].selected_tim < f->bb[b].ntim); |
size += f->bb[b].tim[f->bb[b].selected_tim].size - f->timings.size; |
} |
return size; |
} |
|
/* Dumps specified function to file (hex) */ |
unsigned long extract_function (char *out_fn, unsigned long start_addr) |
{ |
888,6 → 443,7
} |
|
static cuc_func *func[MAX_FUNCS]; |
static int func_v[MAX_FUNCS]; |
|
void main_cuc (char *filename) |
{ |
901,13 → 457,21
assert (flog = fopen (tmp1, "wt+")); |
|
/* Loads in the specified timings table */ |
load_timing_table ("virtex.tim"); |
printf ("Using timings from \"%s\" at %s\n",config.cuc.timings_fn, |
generate_time_pretty (tmp1, config.sim.clkcycle_ps)); |
load_timing_table (config.cuc.timings_fn); |
runtime.cuc.cycle_duration = 1000. * config.sim.clkcycle_ps; |
printf ("Multicycle logic %s, bursts %s, %s memory order.\n", |
config.cuc.no_multicycle ? "OFF" : "ON", config.cuc.enable_bursts ? "ON" : "OFF", |
config.cuc.memory_order == MO_NONE ? "no" : config.cuc.memory_order == MO_WEAK ? "weak" : |
config.cuc.memory_order == MO_STRONG ? "strong" : "exact"); |
|
prof_set (1, 0); |
assert (prof_acquire (config.sim.prof_fn) == 0); |
|
if (config.cuc.calling_convention) |
printf ("Assuming OpenRISC standard calling convention.\n"); |
|
cycle_duration = 40.; |
|
/* Try all functions except "total" */ |
for (i = 0; i < prof_nfuncs - 1; i++) { |
long orig_time; |
920,7 → 484,10
end_addr = extract_function (tmp1, start_addr); |
|
log ("Testing function %s (%08x - %08x)\n", prof_func[i].name, start_addr, end_addr); |
func[i] = analyse_function (prof_func[i].name, orig_time, start_addr, end_addr); |
printf ("Testing function %s (%08x - %08x)\n", prof_func[i].name, start_addr, end_addr); |
func[i] = analyse_function (prof_func[i].name, orig_time, start_addr, |
end_addr, config.cuc.memory_order); |
func_v[i] = 0; |
} |
|
while (1) { |
934,11 → 501,14
if (strcmp (tmp1, "q") == 0 || strcmp (tmp1, "quit") == 0) { |
break; |
} else if (strcmp (tmp1, "p") == 0 || strcmp (tmp1, "profile") == 0) { |
printf ("----------------------------------------------------------------------------\n"); |
printf ("|function name |addr |# calls |avg cycles | old% | impr. f. |\n"); |
printf ("|-------------------------+--------+--------+------------+------+----------|\n"); |
int ntime = 0; |
int size = 0; |
printf ("-----------------------------------------------------------------------------\n"); |
printf ("|function name |calls|avg cycles |old%| max. f. | impr. f.| options |\n"); |
printf ("|--------------------+-----+------------+----+----------|---------+---------|\n"); |
for (j = 0; j < prof_nfuncs; j++) { |
int bestcyc = 0, besti = 0; |
char tmp[100]; |
for (i = 0; i < prof_nfuncs; i++) |
if (prof_func[i].cum_cycles > bestcyc) { |
bestcyc = prof_func[i].cum_cycles; |
945,37 → 515,130
besti = i; |
} |
i = besti; |
printf ("| %-24s|%08X|%8i|%12.1f| %3.0f%% |", |
prof_func[i].name, prof_func[i].addr, prof_func[i].calls, |
printf ("|%-20s|%5i|%12.1f|%3.0f%%| ", |
strstrip (tmp, prof_func[i].name, 20), prof_func[i].calls, |
((double)prof_func[i].cum_cycles / prof_func[i].calls), |
(100. * prof_func[i].cum_cycles / prof_cycles)); |
if (func[i]) { |
printf ("%9.2f |\n", 1.f * prof_func[i].cum_cycles / func[i]->timings.new_time); |
} else printf (" N/A |\n"); |
prof_func[i].cum_cycles = -1; |
double f = 1.0; |
if (func_v[i]) { |
int nt = calc_cycles (func[i]); |
int s = calc_size (func[i]); |
f = func[i]->orig_time / nt; |
ntime += nt * func[i]->num_runs; |
size += s; |
} else ntime += prof_func[i].cum_cycles; |
printf ("%8.1f |%8.1f | %-8s|\n", 1.f * prof_func[i].cum_cycles / func[i]->timings.new_time, f, format_func_options (tmp, func[i])); |
} else { |
printf (" N/A | N/A | |\n"); |
ntime += prof_func[i].cum_cycles; |
} |
prof_func[i].cum_cycles = -prof_func[i].cum_cycles; |
} |
printf ("----------------------------------------------------------------------------\n"); |
printf ("Total %i functions, %i cycles.\n", prof_nfuncs, prof_cycles); |
for (i = 0; i < prof_nfuncs; i++) |
prof_func[i].cum_cycles = -prof_func[i].cum_cycles; |
printf ("-----------------------------------------------------------------------------\n"); |
printf ("Total %i cycles (was %i), total added gates = %i.\n", ntime, prof_cycles, size); |
} else if (strncmp (tmp1, "d", 1) == 0 || strncmp (tmp1, "debug", 5) == 0) { |
/* debug command */ |
sscanf (tmp1, "%*s %i", &cuc_debug); |
if (cuc_debug < 0) cuc_debug = 0; |
if (cuc_debug > 9) cuc_debug = 9; |
} else if (strcmp (tmp1, "g") == 0 || strcmp (tmp1, "generate") == 0) { |
for (i = 0; i < prof_nfuncs; i++); |
/* generate command */ |
for (i = 0; i < prof_nfuncs; i++) |
if (func[i] && func_v[i]) generate_function (func[i], prof_func[i].name); |
} else if (strncmp (tmp1, "s", 1) == 0 || strncmp (tmp1, "select", 6) == 0) { |
/* select command */ |
char tmp[50], ch; |
int p, o, b, f; |
p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch); |
if (p < 1) printf ("Invalid parameters.\n"); |
else { |
/* Check if we have valid option */ |
for (f = 0; f < prof_nfuncs; f++) |
if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break; |
if (f < prof_nfuncs) { |
if (p == 1) { |
if (func[f]) { |
func_v[f] = 1; |
printf ("Function %s selected for translation.\n", prof_func[f].name); |
} else printf ("Function %s not suitable for translation.\n", prof_func[f].name); |
} else { |
if (!func_v[f]) |
printf ("Function %s not yet selected for translation.\n", prof_func[f].name); |
if (p < 3) goto invalid_option; |
for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++); |
if (!option_char[o]) goto invalid_option; |
if (b < 0 || b >= func[f]->num_bb) goto invalid_option; |
if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option; |
|
/* select an option */ |
func[f]->bb[b].selected_tim = o; |
if (func[f]->bb[b].tim[o].nshared) { |
printf ("Option has shared instructions: "); |
print_shared (func[f], func[f]->bb[b].tim[o].shared, func[f]->bb[b].tim[o].nshared); |
printf ("\n"); |
} |
continue; |
invalid_option: |
printf ("Invalid option.\n"); |
} |
} else printf ("Invalid function.\n"); |
} |
} else if (strncmp (tmp1, "u", 1) == 0 || strncmp (tmp1, "unselect", 8) == 0) { |
/* unselect command */ |
char tmp[50], ch; |
int p, o, b, f; |
p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch); |
if (p < 1) printf ("Invalid parameters.\n"); |
else { |
/* Check if we have valid option */ |
for (f = 0; f < prof_nfuncs; f++) |
if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break; |
if (f < prof_nfuncs) { |
if (p == 1) { |
if (func[f]) { |
func_v[f] = 0; |
printf ("Function %s unselected for translation.\n", prof_func[f].name); |
} else printf ("Function %s not suitable for translation.\n", prof_func[f].name); |
} else { |
if (p < 3) goto invalid_option; |
for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++); |
if (!option_char[o]) goto invalid_option; |
if (b < 0 || b >= func[f]->num_bb) goto invalid_option; |
if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option; |
|
/* select an option */ |
func[f]->bb[b].selected_tim = -1; |
} |
} else printf ("Invalid function.\n"); |
} |
} else if (strcmp (tmp1, "o") == 0 || strcmp (tmp1, "options") == 0) { |
int any = 0; |
/* options command */ |
printf ("Available options:\n"); |
for (i = 0; i < prof_nfuncs; i++) |
if (func[i]) options_cmd (func[i], prof_func[i].name); |
if (func[i]) { |
options_cmd (i, func[i]); |
any = 1; |
} |
if (any) printf ("--------------------------------------------------------\n"); |
else printf ("Sorry. No available options.\n"); |
} else { |
/* help command */ |
if (strcmp (tmp1, "h") != 0 && strcmp (tmp1, "help") != 0) |
printf ("Unknown command.\n"); |
printf ("OpenRISC Custom Unit Compiler command prompt\n"); |
printf ("h|help displays this help\n"); |
printf ("q|quit returns to or1ksim prompt\n"); |
printf ("p|profile displays function profiling\n"); |
printf ("d|debug # sets debug level (0-9)\n"); |
printf ("o|options displays available options\n"); |
printf ("g|generate generates verilog file\n"); |
printf ("Available commands:\n"); |
printf (" h | help displays this help\n"); |
printf (" q | quit returns to or1ksim prompt\n"); |
printf (" p | profile displays function profiling\n"); |
printf (" d | debug # sets debug level (0-9)\n"); |
printf (" o | options displays available options\n"); |
printf (" s | select func [option] selects an option/function\n"); |
printf (" u | unselect func [option] unselects an option/function\n"); |
printf (" g | generate generates verilog file\n"); |
} |
} |
|
/trunk/or1ksim/cuc/timings.c
22,16 → 22,12
#include <stdarg.h> |
#include <assert.h> |
#include <math.h> |
#include "sim-config.h" |
#include "cuc.h" |
#include "insn.h" |
|
/* average memory delays in cycles {read single, read burst, write single, write burst} */ |
static const int mdelay[4] = {4, 1, 3, 1}; |
|
double cycle_duration; |
double max_bb_delay; |
|
static cuc_timing_table *timing_table; |
static double max_bb_delay; |
|
/* Returns instruction delay */ |
double insn_time (cuc_insn *ii) |
89,11 → 85,11
for (i = 0; i < f->nmsched; i++) |
if (REF_BB (f->msched[i]) == b) { |
if (f->mtype[i] & MT_WRITE) { |
if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += mdelay[2]; |
else d += mdelay[3]; |
if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += runtime.cuc.mdelay[2]; |
else d += runtime.cuc.mdelay[3]; |
} else { |
if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += mdelay[0]; |
else d += mdelay[1]; |
if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += runtime.cuc.mdelay[0]; |
else d += runtime.cuc.mdelay[1]; |
} |
} |
//printf ("md%i=%i\n", b, d); |
136,7 → 132,7
bb->insn[i].tmp = mg; |
if (md > sd) { |
bb->insn[i].type |= IT_CUT; |
if (md > cycle_duration) |
if (md > runtime.cuc.cycle_duration) |
log ("WARNING: operation t%x_%x may need to be registered inbetween\n", b, i); |
depths[i] = 0.; |
} else depths[i] = md; |
149,9 → 145,9
{ |
long d; |
double x = max_delay (f, b); |
d = ceil (x / cycle_duration); |
d = ceil (x / runtime.cuc.cycle_duration); |
if (d < 1) d = 1; |
if (cut && x > cycle_duration) cut_tree (f, b, x / d); |
if (cut && x > runtime.cuc.cycle_duration) cut_tree (f, b, x / d); |
|
if (x / d > max_bb_delay) max_bb_delay = x / d; |
|
165,7 → 161,7
for (b = 0; b < f->num_bb; b++) |
for (i = 0; i < f->bb[b].ninsn; i++) |
f->bb[b].insn[i].tmp = 0; /* Set starting groups */ |
if (no_multicycle) |
if (config.cuc.no_multicycle) |
for (b = 0; b < f->num_bb; b++) |
new_bb_cycles (f, b, 1); |
} |
247,7 → 243,8
FILE *fi; |
|
log ("Loading timings from %s\n", filename); |
log ("Using clock delay %.2fns (frequency %.0fMHz)\n", cycle_duration, 1000. / cycle_duration); |
log ("Using clock delay %.2fns (frequency %.0fMHz)\n", runtime.cuc.cycle_duration, |
1000. / runtime.cuc.cycle_duration); |
assert (fi = fopen (filename, "rt")); |
|
timing_table = (cuc_timing_table *)malloc ((II_LAST + 1) * sizeof (cuc_timing_table)); |
/trunk/or1ksim/cuc/Makefile
89,7 → 89,7
CC = gcc |
CFLAGS = -g -O2 -DOR32 |
CPU_ARCH = or32 |
INCLUDES = -I${top_srcdir} -I${top_srcdir}/cpu/common -I${top_srcdir}/cpu/or1k -I${top_srcdir}/cpu/or32 -I${top_srcdir}/cache -I${top_srcdir}/mmu -I${top_srcdir}/bpb -I${top_srcdir}/peripheral -I${top_srcdir}/tick -I${top_srcdir}/pm -I${top_srcdir}/pic -I${top_srcdir}/debug -I${top_srcdir}/vapi -I${top_srcdir}/support |
INCLUDES = -I${top_srcdir} -I${top_srcdir}/cpu/common -I${top_srcdir}/cpu/or1k -I${top_srcdir}/cpu/or32 -I${top_srcdir}/cache -I${top_srcdir}/mmu -I${top_srcdir}/bpb -I${top_srcdir}/peripheral -I${top_srcdir}/tick -I${top_srcdir}/pm -I${top_srcdir}/pic -I${top_srcdir}/debug -I${top_srcdir}/vapi -I${top_srcdir}/support -I${top_srcdir}/cuc |
LOCAL_CFLAGS = |
LOCAL_DEFS = |
LOCAL_LDFLAGS = |
/trunk/or1ksim/cuc/insn.h
62,9 → 62,6
((x) == II_LH || (x) == II_SH) ? 2 :\ |
((x) == II_LW || (x) == II_SW) ? 4 : -1) |
|
/* Clock speed in ns */ |
extern double cycle_duration; |
|
/* List of known instructions and their rtl representation */ |
typedef struct { |
char *name; |
106,5 → 103,8
/* Loads in the specified timings table */ |
void load_timing_table (char *filename); |
|
/* Displays shared instructions */ |
void print_shared (cuc_func *rf, cuc_shared_item *shared, int nshared); |
|
#endif /* _DF_INSN_ */ |
|
/trunk/or1ksim/configure.in
220,7 → 220,7
-I\${top_srcdir}/cpu/$CPU_ARCH -I\${top_srcdir}/cache -I\${top_srcdir}/mmu \ |
-I\${top_srcdir}/bpb -I\${top_srcdir}/peripheral -I\${top_srcdir}/tick \ |
-I\${top_srcdir}/pm -I\${top_srcdir}/pic -I\${top_srcdir}/debug \ |
-I\${top_srcdir}/vapi -I\${top_srcdir}/support" |
-I\${top_srcdir}/vapi -I\${top_srcdir}/support -I\${top_srcdir}/cuc" |
AC_SUBST(INCLUDES) |
|
AC_OUTPUT([Makefile bpb/Makefile cache/Makefile cpu/Makefile |
/trunk/or1ksim/toplevel.c
60,7 → 60,7
#include "atahost.h" |
|
/* CVS revision number. */ |
const char rcsrev[] = "$Revision: 1.89 $"; |
const char rcsrev[] = "$Revision: 1.90 $"; |
|
/* History of execution */ |
int histexec[HISTEXEC_LEN]; |
201,7 → 201,7
fprintf(stderr, "ERROR: Problems opening profile file.\n"); |
exit (1); |
} else |
fprintf(runtime.sim.fprof, "+00000000 FFFFFFFF FFFFFFFF [outside functions]\n"); |
fprintf(runtime.sim.fprof, "+00000000 FFFFFFFF FFFFFFFF [outside_functions]\n"); |
} |
|
if (config.sim.mprofile) { |
/trunk/or1ksim/sim-config.c
32,6 → 32,7
|
#include "profiler.h" |
#include "mprofiler.h" |
#include "cuc.h" |
|
#define WARNING(s) fprintf (stderr, "WARNING: config.%s: %s\n", sections[section].name, (s)) |
#define ERROR(s) {fprintf (stderr, "ERROR: config.%s:%s\n", sections[section].name, s); if (runtime.sim.init) exit (1);} |
150,6 → 151,13
|
/* PM */ |
config.pm.enabled = 0; |
|
/* CUC */ |
strcpy (config.cuc.timings_fn, "virtex.tim"); |
config.cuc.memory_order = MO_STRONG; |
config.cuc.calling_convention = 1; |
config.cuc.enable_bursts = 1; |
config.cuc.no_multicycle = 1; |
#endif |
|
/* Configure runtime */ |
429,6 → 437,7
void dc_nways (); |
void dc_blocksize (); |
void dc_ustates (); |
void cuc_memory_order (); |
|
unsigned long tempL; |
unsigned long tempUL; |
460,7 → 469,8
{"vga", 0}, |
{"fb", 0}, |
{"kbd", 0}, /* 20 */ |
{"ata", 0} |
{"ata", 0}, |
{"cuc", 0} |
}; |
|
/* *INDENT-OFF* */ |
647,6 → 657,12
{21, "dev_size1", "=%i", ata_dev_size1, (void *)(&tempUL), 0}, |
{21, "dev_packet1", "=%i", ata_dev_packet1, (void *)(&tempL ), 0}, |
{21, "enddevice", "", end_device, NULL, 0}, |
|
{22, "calling_convention","=%i", NULL, (void *)&config.cuc.calling_convention, 0}, |
{22, "enable_bursts", "=%i", NULL, (void *)&config.cuc.enable_bursts, 0}, |
{22, "no_multicycle", "=%i", NULL, (void *)&config.cuc.no_multicycle, 0}, |
{22, "memory_order", "=%s ", cuc_memory_order, (void *)&tempS, 0}, |
{22, "timings_fn", "=\"%s\"", NULL, (void *)config.cuc.timings_fn} |
}; |
|
/* *INDENT-ON* */ |
1245,6 → 1261,21
ERROR("invalid device number."); |
} |
|
void cuc_memory_order () { |
if (strcmp (tempS, "none") == 0) |
config.cuc.memory_order = MO_NONE; |
else if (strcmp (tempS, "weak") == 0) |
config.cuc.memory_order = MO_WEAK; |
else if (strcmp (tempS, "strong") == 0) |
config.cuc.memory_order = MO_STRONG; |
else if (strcmp (tempS, "exact") == 0) { |
config.cuc.memory_order = MO_EXACT; |
} else { |
char tmp[200]; |
sprintf (tmp, "invalid memory order '%s'.\n", tempS); |
ERROR(tmp); |
} |
} |
|
/* Read environment from a script file. Does not fail - assumes default configuration instead. |
The syntax of script file is: |