OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /
    from Rev 878 to Rev 879
    Reverse comparison

Rev 878 → Rev 879

/trunk/gen_or1k_isa/sources/or32.c
828,7 → 828,7
sign bit position if sign extension is correct extension. Which extension
is proper is figured out from letter description. */
static unsigned long
unsigned long
extend_imm(unsigned long imm, char l)
{
unsigned long mask;
847,7 → 847,7
return imm;
}
 
static unsigned long
unsigned long
or32_extract(param_ch, enc_initial, insn)
char param_ch;
char *enc_initial;
/trunk/gen_or1k_isa/sources/opcode/or32.c
828,7 → 828,7
sign bit position if sign extension is correct extension. Which extension
is proper is figured out from letter description. */
static unsigned long
unsigned long
extend_imm(unsigned long imm, char l)
{
unsigned long mask;
847,7 → 847,7
return imm;
}
 
static unsigned long
unsigned long
or32_extract(param_ch, enc_initial, insn)
char param_ch;
char *enc_initial;
/trunk/insight/include/opcode/or32.h
213,5 → 213,14
Return the size of the instruction. */
int disassemble_index (unsigned long insn, int index);
 
/* FOR INTERNAL USE ONLY */
/* Automatically does zero- or sign- extension and also finds correct
sign bit position if sign extension is correct extension. Which extension
is proper is figured out from letter description. */
unsigned long extend_imm(unsigned long imm, char l);
 
/* Extracts value from opcode */
unsigned long or32_extract(char param_ch, char *enc_initial, unsigned long insn);
 
#endif
 
/trunk/insight/opcodes/or32.c
828,7 → 828,7
sign bit position if sign extension is correct extension. Which extension
is proper is figured out from letter description. */
static unsigned long
unsigned long
extend_imm(unsigned long imm, char l)
{
unsigned long mask;
847,7 → 847,7
return imm;
}
 
static unsigned long
unsigned long
or32_extract(param_ch, enc_initial, insn)
char param_ch;
char *enc_initial;
/trunk/or1ksim/configure
2249,7 → 2249,7
ac_given_INSTALL="$INSTALL"
 
trap 'rm -fr `echo "Makefile bpb/Makefile cache/Makefile cpu/Makefile
cpu/common/Makefile cpu/or32/Makefile
cpu/common/Makefile cpu/or32/Makefile cuc/Makefile
cpu/or1k/Makefile cpu/dlx/Makefile debug/Makefile
support/Makefile mmu/Makefile peripheral/Makefile tick/Makefile
pm/Makefile pic/Makefile debug/Makefile vapi/Makefile config.h" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15
2368,7 → 2368,7
cat >> $CONFIG_STATUS <<EOF
 
CONFIG_FILES=\${CONFIG_FILES-"Makefile bpb/Makefile cache/Makefile cpu/Makefile
cpu/common/Makefile cpu/or32/Makefile
cpu/common/Makefile cpu/or32/Makefile cuc/Makefile
cpu/or1k/Makefile cpu/dlx/Makefile debug/Makefile
support/Makefile mmu/Makefile peripheral/Makefile tick/Makefile
pm/Makefile pic/Makefile debug/Makefile vapi/Makefile"}
/trunk/or1ksim/Makefile.in
90,17 → 90,20
host_cpu = @host_cpu@
host_os = @host_os@
 
SUBDIRS = cpu bpb support cache mmu peripheral tick pm pic debug vapi
SUBDIRS = cpu bpb support cache mmu peripheral tick pm pic debug vapi cuc
 
bin_PROGRAMS = sim
 
sim_SOURCES = toplevel.c sim-config.c sim-config.h profiler.c mprofiler.c profiler.h mprofiler.h
sim_SOURCES = toplevel.c sim-config.c sim-config.h profiler.c \
mprofiler.c profiler.h mprofiler.h
 
sim_LDADD = cpu/common/libcommon.a cpu/$(CPU_ARCH)/libarch.a \
cpu/or1k/libor1k.a support/libsupport.a mmu/libmmu.a \
bpb/libbpb.a cache/libcache.a peripheral/libperipheral.a \
tick/libtick.a pm/libpm.a pic/libpic.a debug/libdebug.a \
vapi/libvapi.a
vapi/libvapi.a cuc/libcuc.a
 
 
sim_LDFLAGS = #-lreadline
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
117,7 → 120,7
sim_DEPENDENCIES = cpu/common/libcommon.a cpu/$(CPU_ARCH)/libarch.a \
cpu/or1k/libor1k.a support/libsupport.a mmu/libmmu.a bpb/libbpb.a \
cache/libcache.a peripheral/libperipheral.a tick/libtick.a pm/libpm.a \
pic/libpic.a debug/libdebug.a vapi/libvapi.a
pic/libpic.a debug/libdebug.a vapi/libvapi.a cuc/libcuc.a
COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@
/trunk/or1ksim/mprofiler.h
30,7 → 30,7
#define MODE_WIDTH 3
 
/* Input buffer size */
#define BUF_SIZE 100
#define BUF_SIZE 256
 
/* HASH */
#define HASH_SIZE 0x10000
/trunk/or1ksim/sim.cfg
363,7 → 363,7
 
section sim
/* verbose = 1 */
debug = 3
debug = 0
profile = 0
prof_fn = "sim.profile"
mprofile = 0
/trunk/or1ksim/cpu/or32/or32.c
828,7 → 828,7
sign bit position if sign extension is correct extension. Which extension
is proper is figured out from letter description. */
static unsigned long
unsigned long
extend_imm(unsigned long imm, char l)
{
unsigned long mask;
847,7 → 847,7
return imm;
}
 
static unsigned long
unsigned long
or32_extract(param_ch, enc_initial, insn)
char param_ch;
char *enc_initial;
/trunk/or1ksim/cpu/dlx/Makefile.in
1,6 → 1,6
# Makefile.in generated automatically by automake 1.4 from Makefile.am
# Makefile.in generated automatically by automake 1.4-p5 from Makefile.am
 
# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
# Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
/trunk/or1ksim/cpu/or1k/opcode/or32.h
213,5 → 213,14
Return the size of the instruction. */
int disassemble_index (unsigned long insn, int index);
 
/* FOR INTERNAL USE ONLY */
/* Automatically does zero- or sign- extension and also finds correct
sign bit position if sign extension is correct extension. Which extension
is proper is figured out from letter description. */
unsigned long extend_imm(unsigned long imm, char l);
 
/* Extracts value from opcode */
unsigned long or32_extract(char param_ch, char *enc_initial, unsigned long insn);
 
#endif
 
/trunk/or1ksim/profiler.c
23,51 → 23,27
#include <stdio.h>
#include "profiler.h"
 
struct stack_struct {
/* Function address */
unsigned int addr;
/* Cycles of function start; cycles of subfunctions are added later */
unsigned int cycles;
/* Return address */
unsigned int raddr;
/* Name of the function */
char name[33];
} stack[MAX_STACK];
static struct stack_struct stack[MAX_STACK];
 
struct func_struct {
/* Start address of function */
unsigned int addr;
/* Name of the function */
char name[33];
/* Total cycles spent in function */
long cum_cycles;
/* Calls to this function */
long calls;
} func[MAX_FUNCS];
struct func_struct prof_func[MAX_FUNCS];
 
/* Total number of functions */
int nfuncs = 0;
int prof_nfuncs = 0;
 
/* Current depth */
int nstack = 0;
static int nstack = 0;
 
/* Max depth */
int maxstack = 0;
static int maxstack = 0;
 
/* Number of total calls */
int ntotcalls = 0;
static int ntotcalls = 0;
 
/* Number of covered calls */
int nfunccalls = 0;
static int nfunccalls = 0;
 
/* Current cycles */
static int cycles = 0;
int prof_cycles = 0;
 
/* Whether we are in cumulative mode */
static int cumulative = 0;
75,6 → 51,10
/* Whether we should not report warnings */
static int quiet = 0;
 
/* File to read from */
static FILE *fprof = 0;
 
/* Print out command line help */
void prof_help ()
{
printf ("profiler [-c] [-q] -g [profile_file_name]\n");
85,38 → 65,10
printf ("\t\t\t\t\tstdout/profile_file_name\n");
}
 
/* File to read from */
static FILE *fprof = 0;
 
int main_profiler (int argc, char *argv[]) {
char fprofname[50] = "sim.profile";
/* Acquire data from profiler file */
int prof_acquire (char *fprofname)
{
int line = 0;
 
if (argc > 4 || argc < 2) {
prof_help ();
return 1;
}
 
argv++; argc--;
while (argc > 0) {
if (!strcmp(argv[0], "-q") || !strcmp(argv[0], "--quiet")) {
quiet = 1;
argv++; argc--;
} else if (!strcmp(argv[0], "-c") || !strcmp(argv[0], "--cumulative")) {
cumulative = 1;
argv++; argc--;
} else if (strcmp(argv[0], "-g") && strcmp(argv[0], "--generate")) {
prof_help ();
return -1;
} else {
argv++; argc--;
if (argv[0] && argv[0][0] != '-') {
strcpy (&fprofname[0], argv[0]);
argv++; argc--;
}
}
}
 
fprof = fopen (fprofname, "rt");
 
if (!fprof) {
132,7 → 84,7
&stack[nstack].addr, &stack[nstack].name[0]) != 4)
fprintf (stderr, "Error reading line #%i\n", line);
else {
cycles = stack[nstack].cycles;
prof_cycles = stack[nstack].cycles;
nstack++;
if (nstack > maxstack)
maxstack = nstack;
144,7 → 96,7
fprintf (stderr, "Error reading line #%i\n", line);
else {
int i;
cycles = s.cycles;
prof_cycles = s.cycles;
for (i = nstack - 1; i >= 0; i--)
if (stack[i].raddr == s.raddr) break;
if (i >= 0) {
168,20 → 120,20
if (!quiet && i != nstack)
fprintf (stderr, "WARNING: Missaligned return call for %s (%08X) (found %s @ %08X), closing.\n", stack[nstack].name, stack[nstack].raddr, stack[i].name, stack[i].raddr);
for (j = 0; j < nfuncs; j++)
if (stack[nstack].addr == func[j].addr) { /* function exists, append. */
func[j].cum_cycles += time;
func[j].calls++;
for (j = 0; j < prof_nfuncs; j++)
if (stack[nstack].addr == prof_func[j].addr) { /* function exists, append. */
prof_func[j].cum_cycles += time;
prof_func[j].calls++;
nfunccalls++;
break;
}
if (j >= nfuncs) { /* function does not yet exist, create new. */
func[nfuncs].cum_cycles = time;
func[nfuncs].calls = 1;
if (j >= prof_nfuncs) { /* function does not yet exist, create new. */
prof_func[prof_nfuncs].cum_cycles = time;
prof_func[prof_nfuncs].calls = 1;
nfunccalls++;
func[nfuncs].addr = stack[nstack].addr;
strcpy (func[nfuncs].name, stack[nstack].name);
nfuncs++;
prof_func[prof_nfuncs].addr = stack[nstack].addr;
strcpy (prof_func[prof_nfuncs].name, stack[nstack].name);
prof_nfuncs++;
}
}
} else if (!quiet) fprintf (stderr, "WARNING: Cannot find return call for (%08X), ignoring.\n", s.raddr);
190,30 → 142,72
break;
}
fclose(fprof);
}
 
/* Now we have all data acquired. Print out. */
{
int i, j;
if (cumulative)
printf ("CUMULATIVE TIMES\n");
printf ("---------------------------------------------------------------------------\n");
printf ("|function name |addr |# calls |avg cycles |total cyles |\n");
printf ("|-------------------------+--------+--------+------------+----------------|\n");
for (j = 0; j < nfuncs; j++) {
int bestcyc = 0, besti = 0;
for (i = 0; i < nfuncs; i++)
if (func[i].cum_cycles > bestcyc) {
bestcyc = func[i].cum_cycles;
besti = i;
}
i = besti;
printf ("| %-24s|%08X|%8i|%12.1f|%11i,%3.0f%%|\n",
func[i].name, func[i].addr, func[i].calls, ((double)func[i].cum_cycles / func[i].calls), func[i].cum_cycles, (100. * func[i].cum_cycles / cycles));
func[i].cum_cycles = -1;
/* Print out profiling data */
void prof_print ()
{
int i, j;
if (cumulative)
printf ("CUMULATIVE TIMES\n");
printf ("---------------------------------------------------------------------------\n");
printf ("|function name |addr |# calls |avg cycles |total cyles |\n");
printf ("|-------------------------+--------+--------+------------+----------------|\n");
for (j = 0; j < prof_nfuncs; j++) {
int bestcyc = 0, besti = 0;
for (i = 0; i < prof_nfuncs; i++)
if (prof_func[i].cum_cycles > bestcyc) {
bestcyc = prof_func[i].cum_cycles;
besti = i;
}
i = besti;
printf ("| %-24s|%08X|%8i|%12.1f|%11i,%3.0f%%|\n",
prof_func[i].name, prof_func[i].addr, prof_func[i].calls, ((double)prof_func[i].cum_cycles / prof_func[i].calls), prof_func[i].cum_cycles, (100. * prof_func[i].cum_cycles / prof_cycles));
prof_func[i].cum_cycles = -1;
}
printf ("---------------------------------------------------------------------------\n");
printf ("Total %i functions, %i cycles.\n", prof_nfuncs, prof_cycles);
printf ("Total function calls %i/%i (max depth %i).\n", nfunccalls, ntotcalls, maxstack);
}
 
/* Set options */
void prof_set (int _quiet, int _cumulative)
{
quiet = _quiet;
cumulative = _cumulative;
}
 
int main_profiler (int argc, char *argv[]) {
char fprofname[50] = "sim.profile";
 
if (argc > 4 || argc < 2) {
prof_help ();
return 1;
}
 
argv++; argc--;
while (argc > 0) {
if (!strcmp(argv[0], "-q") || !strcmp(argv[0], "--quiet")) {
quiet = 1;
argv++; argc--;
} else if (!strcmp(argv[0], "-c") || !strcmp(argv[0], "--cumulative")) {
cumulative = 1;
argv++; argc--;
} else if (strcmp(argv[0], "-g") && strcmp(argv[0], "--generate")) {
prof_help ();
return -1;
} else {
argv++; argc--;
if (argv[0] && argv[0][0] != '-') {
strcpy (&fprofname[0], argv[0]);
argv++; argc--;
}
}
printf ("---------------------------------------------------------------------------\n");
}
printf ("Total %i functions, %i cycles.\n", nfuncs, cycles);
printf ("Total function calls %i/%i (max depth %i).\n", nfunccalls, ntotcalls, maxstack);
 
prof_acquire (fprofname);
 
/* Now we have all data acquired. Print out. */
prof_print ();
return 0;
}
/trunk/or1ksim/profiler.h
29,5 → 29,51
#define PROF_CUMULATIVE 0x01
#define PROF_QUIET 0x02
 
struct stack_struct {
/* Function address */
unsigned int addr;
/* Cycles of function start; cycles of subfunctions are added later */
unsigned int cycles;
/* Return address */
unsigned int raddr;
/* Name of the function */
char name[33];
};
 
struct func_struct {
/* Start address of function */
unsigned int addr;
/* Name of the function */
char name[33];
/* Total cycles spent in function */
long cum_cycles;
/* Calls to this function */
long calls;
};
 
extern struct func_struct prof_func[MAX_FUNCS];
 
/* Total number of functions */
extern int prof_nfuncs;
extern int prof_cycles;
 
/* Print out command line help */
void prof_help ();
 
/* Acquire data from profiler file */
int prof_acquire (char *fprofname);
 
/* Print out profiling data */
void prof_print ();
 
/* Set options */
void prof_set (int _quiet, int _cumulative);
 
int main_profiler (int argc, char *argv[]);
#endif /* not __PROFILER_H */
/trunk/or1ksim/cuc/cuc.h
0,0 → 1,209
/* cuc.h -- OpenRISC Custom Unit Compiler, main header file
* Copyright (C) 2002 Marko Mlinar, markom@opencores.org
*
* This file is part of OpenRISC 1000 Architectural Simulator.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
 
#ifndef __DATAF_H__
#define __DATAF_H__
 
#define DEBUG 0
 
/* Maximum number of instructions per function */
#define MAX_INSNS 0x10000
#define MAX_OPERANDS 4
#define MAX_BB 0x100
#define MAX_REGS 34
#define FLAG_REG (MAX_REGS - 2)
#define LRBB_REG (MAX_REGS - 1)
#define MAX_STACK 0x1000 /* if more, not converted */
#define MAX_PREROLL 32
#define MAX_UNROLL 32
 
#define IT_BRANCH 0x0001 /* Branch instruction */
#define IT_INDELAY 0x0002 /* Instruction is in delay slot */
#define IT_BBSTART 0x0004 /* BB start marker */
#define IT_BBEND 0x0008 /* BB end marker */
#define IT_OUTPUT 0x0010 /* this instruction holds final value of the register */
#define IT_SIGNED 0x0020 /* Instruction is signed */
#define IT_MEMORY 0x0040 /* Instruction does memory access */
#define IT_UNUSED 0x0080 /* dead instruction marker */
#define IT_FLAG1 0x0100 /* misc flags */
#define IT_FLAG2 0x0200
#define IT_VOLATILE 0x0400 /* Should not be moved/removed */
#define IT_MEMADD 0x0800 /* add before the load -- should not be removed */
#define IT_COND 0x1000 /* Conditional */
#define IT_LATCHED 0x2000 /* Output of this instruction is latched/registered */
#define IT_CUT 0x4000 /* After this instruction register is placed */
 
#define OPT_NONE 0x00
#define OPT_CONST 0x01
#define OPT_REGISTER 0x02
#define OPT_REF 0x04
#define OPT_JUMP 0x08 /* Jump to an instruction index */
#define OPT_DEST 0x10 /* This operand is dest */
#define OPT_BB 0x20 /* Jumpt to BB */
#define OPT_LRBB 0x40 /* 0 if we came in from left BB, or 1 otherwise */
 
#define MT_WIDTH 0x07 /* These bits hold memory access width in bytes 1 << x */
#define MT_BURST 0x08 /* burst start & end markers */
#define MT_BURSTE 0x10
#define MT_WRITE 0x20 /* This memory access does a write */
#define MT_SIGNED 0x40 /* Signed memory access */
 
#define BB_INLOOP 0x01 /* This block is inside a loop */
#define BB_OPTIONAL 0x02
#define BB_END 0x04 /* Last block in a function */
#define BB_DEAD 0x08 /* This block is unaccessible -> to be removed */
 
/* Various macros to minimize code size */
#define REF(bb,i) (((bb) * MAX_INSNS) + (i))
#define REF_BB(r) ((r) / MAX_INSNS)
#define REF_I(r) ((r) % MAX_INSNS)
#define INSN(ref) bb[REF_BB(ref)].insn[REF_I(ref)]
 
#define log(x...) fprintf (flog, x)
 
/* Options */
static const int calling_convention = 1;
static const int memory_order = 2;
static const int enable_bursts = 1;
static const int no_multicycle = 1;
 
/* Temporary registers by software convention */
extern const int call_saved[MAX_REGS];
 
typedef struct _dep_list_t {
unsigned long ref;
struct _dep_list_t *next;
} dep_list;
 
/* Implementation specific timings */
typedef struct {
int b; /* Basic block # this timing is referring to */
int preroll; /* How many times was this BB pre/unrolled */
int unroll;
int new_time;
int old_time;
double size;
} cuc_timings;
 
/* Instructionn entity */
typedef struct {
int type; /* type of the instruction */
int index; /* Instruction index */
int opt[MAX_OPERANDS]; /* operand types */
unsigned long op[MAX_OPERANDS]; /* operand values */
dep_list *dep; /* instruction dependencies */
unsigned long insn; /* Instruction opcode */
char disasm[40]; /* disassembled string */
int tmp;
} cuc_insn;
 
/* Basic block entity */
typedef struct {
unsigned long type; /* Type of the bb */
int first, last; /* Where this block lies */
int prev[2], next[2];
int tmp;
cuc_insn *insn; /* Instructions lie here */
int ninsn; /* Number of instructions */
int last_used_reg[MAX_REGS];
dep_list *mdep; /* Last memory access dependencies */
int nmemory;
int cnt; /* how many times was this block executed */
int unrolled; /* how many times has been this block unrolled */
int ntim; /* Basic block options */
cuc_timings *tim;
} cuc_bb;
 
/* Function entity */
typedef struct {
/* Basic blocks */
int num_bb;
cuc_bb bb[MAX_BB];
int saved_regs[MAX_REGS];
/* Schedule of memory instructions */
int nmsched;
int msched[MAX_INSNS];
int mtype[MAX_INSNS];
 
/* initial bb and their relocations to new block numbers */
int num_init_bb;
int *init_bb_reloc;
int orig_time; /* time in cyc required for SW implementation */
unsigned long start_addr; /* Address of first instruction inn function */
unsigned long end_addr; /* Address of last instruction inn function */
} cuc_func;
 
/* Instructions from function */
extern cuc_insn insn[MAX_INSNS];
extern int num_insn;
extern int reloc[MAX_INSNS];
extern FILE *flog;
 
/* Loads from file into global array insn */
void cuc_load (char *in_fn);
 
/* Scans sequence of BBs and set bb[].cnt */
void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename);
 
/* Prints out instructions */
void print_insns (cuc_insn *insn, int size, int verbose);
 
/* Print out basic blocks */
void print_cuc_bb (cuc_func *func, char *s);
 
/* Duplicates function */
cuc_func *dup_func (cuc_func *f);
 
/* Releases memory allocated by function */
void free_func (cuc_func *f);
 
/* Set the BB limits */
void detect_bb (cuc_func *func);
 
/* Optimize basic blocks */
void optimize_bb (cuc_func *func);
 
/* Removes BBs marked as dead */
void remove_dead_bb (cuc_func *func);
 
/* Detect register dependencies */
void reg_dep (cuc_func *func);
 
/* Cuts the tree and marks registers */
void mark_cut (cuc_func *f);
 
/* Unroll loop b times times and return new function. Original
function is unmodified. */
cuc_func *preunroll_loop (cuc_func *func, int b, int preroll, int unroll, char *bb_filename);
/* Schedule memory accesses
0 - exact; 1 - strong; 2 - weak; 3 - none */
void schedule_memory (cuc_func *func, int otype);
 
/* Generates verilog file out of insn dataflow */
void output_verilog (cuc_func *func, char *filename);
 
/* Recalculates bb[].cnt values, based on generated profile file */
void recalc_cnts (cuc_func *f, char *bb_filename);
 
/* Calculate timings */
void analyse_timings (cuc_func *func, cuc_timings *timings);
 
#endif /* __DATAF_H__ */
trunk/or1ksim/cuc/cuc.h Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/or1ksim/cuc/load.c =================================================================== --- trunk/or1ksim/cuc/load.c (nonexistent) +++ trunk/or1ksim/cuc/load.c (revision 879) @@ -0,0 +1,433 @@ +/* load.c -- OpenRISC Custom Unit Compiler, instruction loading and converting + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include "cuc.h" +#include "opcode/or32.h" +#include "insn.h" + +static const cuc_conv conv[] = { +{"l.add", II_ADD}, {"l.addi", II_ADD}, +{"l.sub", II_SUB}, {"l.subi", II_SUB}, +{"l.and", II_AND}, {"l.andi", II_AND}, +{"l.xor", II_XOR}, {"l.xori", II_XOR}, +{"l.or", II_OR }, {"l.ori", II_OR}, +{"l.mul", II_MUL}, {"l.muli", II_MUL}, + +{"l.sra", II_SRA}, {"l.srai", II_SRA}, +{"l.srl", II_SRL}, {"l.srli", II_SRL}, +{"l.sll", II_SLL}, {"l.slli", II_SLL}, + +{"l.lbz",II_LB | II_MEM}, {"l.lbs", II_LB | II_MEM | II_SIGNED}, +{"l.lhz",II_LH | II_MEM}, {"l.lhs", II_LH | II_MEM | II_SIGNED}, +{"l.lwz",II_LW | II_MEM}, {"l.lws", II_LW | II_MEM | II_SIGNED}, +{"l.sb", II_SB | II_MEM}, {"l.sh", II_SH | II_MEM}, {"l.sw", II_SW | II_MEM}, +{"l.sfeq", II_SFEQ }, {"l.sfeqi", II_SFEQ}, +{"l.sfne", II_SFNE }, {"l.sfnei", II_SFNE}, +{"l.sflts", II_SFLT | II_SIGNED}, {"l.sfltis", II_SFLT | II_SIGNED}, +{"l.sfltu", II_SFLT}, {"l.sfltiu", II_SFLT}, +{"l.sfgts", II_SFGT | II_SIGNED}, {"l.sfgtis", II_SFGT | II_SIGNED}, +{"l.sfgtu", II_SFGT}, {"l.sfgtiu", II_SFGT}, +{"l.sfges", II_SFGE | II_SIGNED}, {"l.sfgeis", II_SFGE | II_SIGNED}, +{"l.sfgeu", II_SFGE}, {"l.sfgeiu", II_SFGE}, +{"l.sfles", II_SFLE | II_SIGNED}, {"l.sfleis", II_SFLE | II_SIGNED}, +{"l.sfleu", II_SFLE}, {"l.sfleiu", II_SFLE}, +{"l.j", II_BF }, +{"l.bf", II_BF }, +{"l.nop", II_NOP } +}; + +/* Instructions from function */ +cuc_insn insn[MAX_INSNS]; +int num_insn; +int reloc[MAX_INSNS]; + +/* Prints out instructions */ +void print_cuc_insns (char *s, int verbose) +{ + int i, j; + printf ("****************** %s ******************\n", s); + print_insns (insn, num_insn,verbose); + printf ("\n\n"); +} + +void xchg_insn (int i, int j) +{ + cuc_insn t; + t = insn[i]; + insn[i] = insn[j]; + insn[j] = t; +} + +/* Remove delay slots */ +void remove_dslots () +{ + int i; + int in_delay = 0; + for (i = 0; i < num_insn; i++) { + if (in_delay) insn[i].type |= IT_INDELAY; + in_delay = 0; + if (insn[i].type & IT_BRANCH) in_delay = 1; + if (insn[i].type & IT_INDELAY) { + /* delay slot should not be a branch target! */ + assert ((insn[i].type & IT_BBSTART) == 0); + assert ((insn[i - 1].type & IT_INDELAY) == 0); + insn[i].type &= ~IT_INDELAY; /* no more in delay slot */ + xchg_insn (i, i - 1); + } + } + assert (in_delay == 0); +} + +/* Convert local variables (uses stack frame -- r1) to internal values */ +void detect_locals () +{ + int stack[MAX_STACK]; + int i, can_remove_stack = 1; + int real_stack_size = 0; + + for (i = 0; i < MAX_STACK; i++) stack[i] = -1; + + for (i = 0; i < num_insn; i++) { + /* sw off (r1),rx */ + if (insn[i].index == II_SW + && (insn[i].opt[0] & OPT_CONST) + && insn[i].op[1] == 1 && (insn[i].opt[1] & OPT_REGISTER)) { + + if (insn[i].op[0] < MAX_STACK) { /* Convert to normal move */ + stack[insn[i].op[0]] = i; + insn[i].type &= IT_INDELAY | IT_BBSTART; + change_insn_type (&insn[i], II_ADD); + insn[i].op[0] = -1; insn[i].opt[0] = OPT_REGISTER | OPT_DEST; + insn[i].op[1] = insn[i].op[2]; insn[i].opt[1] = insn[i].opt[2]; + insn[i].op[2] = 0; insn[i].opt[2] = OPT_CONST; + } else can_remove_stack = 0; + /* lw rx,off (r1) */ + } else if (insn[i].index == II_LW + && (insn[i].opt[1] & OPT_CONST) + && insn[i].op[2] == 1 && (insn[i].opt[2] & OPT_REGISTER)) { + + if (insn[i].op[1] < MAX_STACK) { /* Convert to normal move */ + insn[i].type &= IT_INDELAY | IT_BBSTART; + change_insn_type (&insn[i], II_ADD); + assert (stack[insn[i].op[1]] >= 0); + insn[i].op[1] = stack[insn[i].op[1]]; insn[i].opt[1] = OPT_REF; + insn[i].op[2] = 0; insn[i].opt[2] = OPT_CONST; + } else can_remove_stack = 0; + /* Check for defined stack size */ + } else if (insn[i].index == II_ADD && !real_stack_size + && (insn[i].opt[0] & OPT_REGISTER) && insn[i].op[0] == 1 + && (insn[i].opt[1] & OPT_REGISTER) && insn[i].op[1] == 1 + && (insn[i].opt[2] & OPT_CONST)) { + real_stack_size = -insn[i].op[2]; + } + } + assert (can_remove_stack); /* TODO */ +} + +/* Disassemble one instruction from insn index and generate parameters */ +const char *build_insn (unsigned long data, cuc_insn *insn) +{ + const char *name; + char *s; + extern char *disassembled; + int index = insn_decode (data); + struct or32_opcode const *opcode; + int i, argc = 0; + + insn->insn = data; + insn->index = -1; + insn->type = 0; + name = insn_name (index); + insn->index = index; + disassemble_index (data, index); + strcpy (insn->disasm, disassembled); + insn->dep = NULL; + for (i = 0; i < MAX_OPERANDS; i++) insn->opt[i] = OPT_NONE; + + if (index < 0) { + fprintf (stderr, "Invalid opcode 0x%08x!\n", data); + exit (1); + } + opcode = &or32_opcodes[index]; + + for (s = opcode->args; *s != '\0'; ++s) { + switch (*s) { + case '\0': return name; + case 'r': + insn->opt[argc] = OPT_REGISTER | (argc ? 0 : OPT_DEST); + insn->op[argc++] = or32_extract(*++s, opcode->encoding, data); + break; + + default: + if (strchr (opcode->encoding, *s)) { + unsigned long imm = or32_extract (*s, opcode->encoding, data); + imm = extend_imm(imm, *s); + insn->opt[argc] = OPT_CONST; + insn->op[argc++] = imm; + } + } + } + return name; +} + +/* expands immediate memory instructions to two */ +void expand_memory () +{ + int i, j, num_mem = 0, d; + for (i = 0; i < num_insn; i++) if (insn[i].type & IT_MEMORY) num_mem++; + + d = num_insn + num_mem; + assert (d < MAX_INSNS); + + /* Split memory commands */ + for (i = num_insn - 1; i >= 0; i--) if (insn[i].type & IT_MEMORY) { + insn[--d] = insn[i]; + insn[--d] = insn[i]; + reloc[i] = d; + switch (insn[d].index) { + case II_SW: + case II_SH: + case II_SB: + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; /* add rd, ra, rb */ + insn[d].op[2] = insn[i].op[0]; insn[d].opt[2] = insn[i].opt[0]; + insn[d].opt[3] = OPT_NONE; + insn[d].type &= IT_INDELAY | IT_BBSTART; + insn[d].type |= IT_MEMADD; + change_insn_type (&insn[d], II_ADD); + insn[d + 1].op[1] = d; insn[d + 1].opt[1] = OPT_REF; /* sw (t($-1)),rx */ + insn[d + 1].op[0] = insn[i].op[2]; insn[d + 1].opt[0] = insn[i].opt[2]; + insn[d + 1].opt[2] = OPT_NONE; + insn[d + 1].type &= ~IT_BBSTART; + break; + case II_LW: + case II_LH: + case II_LB: + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; /* add rd, ra, rb */ + insn[d].type &= IT_INDELAY | IT_BBSTART; + insn[d].type |= IT_MEMADD; + change_insn_type (&insn[d], II_ADD); + insn[d + 1].op[1] = d; insn[d + 1].opt[1] = OPT_REF; /* lw (t($-1)),rx */ + insn[d + 1].opt[2] = OPT_NONE; + insn[d + 1].opt[3] = OPT_NONE; + insn[d + 1].type &= ~IT_BBSTART; + break; + default: fprintf (stderr, "%4i, %4i: %s\n", i, d, cuc_insn_name (&insn[d])); + assert (0); + } + } else { + insn[--d] = insn[i]; + reloc[i] = d; + } + num_insn += num_mem; + for (i = 0; i < num_insn; i++) if (!(insn[i].type & IT_MEMORY)) + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP) + insn[i].op[j] = reloc[insn[i].op[j]]; +} + +/* expands signed comparisons to three instructions */ +void expand_signed () +{ + int i, j, num_sig = 0, d; + for (i = 0; i < num_insn; i++) if (insn[i].type & IT_SIGNED) num_sig++; + + d = num_insn + num_sig * 2; + assert (d < MAX_INSNS); + + /* Split signed instructions */ + for (i = num_insn - 1; i >= 0; i--) if (insn[i].type & IT_SIGNED) { + /* We will expand signed memory later */ + if (insn[i].type & IT_MEMORY) continue; + insn[--d] = insn[i]; + insn[d].op[1] = d - 2; insn[d].opt[1] = OPT_REF; + insn[d].op[2] = d - 1; insn[d].opt[2] = OPT_REF; + + insn[--d] = insn[i]; + change_insn_type (&insn[d], II_ADD); + insn[d].type = 0; + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; + insn[d].op[1] = insn[d].op[2]; insn[d].opt[1] = insn[d].opt[2]; + insn[d].op[2] = 0x20000000; insn[d].opt[2] = OPT_CONST; + insn[d].opt[3] = OPT_NONE; + + insn[--d] = insn[i]; + change_insn_type (&insn[d], II_ADD); + insn[d].type = 0; + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; + insn[d].op[1] = insn[d].op[1]; insn[d].opt[1] = insn[d].opt[1]; + insn[d].op[2] = 0x20000000; insn[d].opt[2] = OPT_CONST; + insn[d].opt[3] = OPT_NONE; + + reloc[i] = d; + } else { + insn[--d] = insn[i]; + reloc[i] = d; + } + num_insn += num_sig * 2; + for (i = 0; i < num_insn; i++) if (insn[i].type & IT_MEMORY || !(insn[i].type & IT_SIGNED)) { + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP) + insn[i].op[j] = reloc[insn[i].op[j]]; + } else insn[i].type &= ~IT_SIGNED; +} + +/* CSE -- common subexpression elimination */ +void cse () +{ + int i, j, k, l; + for (i = 0; i < num_insn; i++) + for (j = 0; j < i; j++) { + if (insn[i].index == insn[j].index) continue; + if (insn[i].type & IT_VOLATILE) continue; + + /* Do we have an exact match? */ + if (insn[i].op[1] != insn[j].op[1] || insn[i].opt[1] != insn[j].opt[1]) continue; + if (insn[i].op[2] != insn[j].op[2] || insn[i].opt[2] != insn[j].opt[2]) continue; + + /* Check if we drive outputs? */ + if ((insn[i].opt[0] & OPT_REGISTER) && insn[i].op[0] >= 0) + if ((insn[j].opt[0] & OPT_REGISTER) && insn[j].op[0] >= 0) continue; + else insn[j].op[0] = insn[i].op[0]; + + /* remove duplicated instruction and relink the references */ + change_insn_type (&insn[i], II_NOP); + for (k = i + 1; k < num_insn; k++) + for (l = 0; l < MAX_OPERANDS; l++) + if (insn[k].op[l] == i && (insn[k].opt[l] & OPT_REF)) insn[k].op[l] = j; + } +} + +/* Loads from file into global array insn */ +void cuc_load (char *in_fn) +{ + int i, j, in_delay; + FILE *fi; + int func_return = 0; + num_insn = 0; + + log ("Loading filename %s\n", in_fn); + if ((fi = fopen (in_fn, "rt")) == NULL) { + fprintf (stderr, "Cannot open '%s'\n", in_fn); + exit (1); + } + /* Read in the function and decode the instructions */ + for (i = 0;; i++) { + unsigned long data; + extern char *disassembled; + const char *name; + + if (fscanf (fi, "%08x\n", &data) != 1) break; + + /* build params */ + name = build_insn (data, &insn[i]); + if (func_return) func_return++; + //printf ("%s\n", name); + + if (or32_opcodes[insn[i].index].flags & OR32_IF_DELAY) { + int f; + if (strcmp (name, "l.bnf") == 0) f = 1; + else if (strcmp (name, "l.bf") == 0) f = 0; + else if (strcmp (name, "l.j") == 0) { + f = -1; + } else if (strcmp (name, "l.jr") == 0 && func_return == 0) { + func_return = 1; + change_insn_type (&insn[i], II_NOP); + continue; + } else { + fprintf (stderr, "Instruction #%i: \"%s\" not supported.\n", i, name); + exit (1); + } + if (f < 0) { /* l.j */ + /* repair params */ + change_insn_type (&insn[i], II_BF); + insn[i].op[0] = i + insn[i].op[0]; insn[i].opt[0] = OPT_JUMP; + insn[i].op[1] = 1; insn[i].opt[1] = OPT_CONST; + insn[i].type |= IT_BRANCH | IT_VOLATILE; + } else { + i--; + if (f) { + //printf ("%s\n", cuc_insn_name (&insn[i])); + if (insn[i].index == II_SFEQ) change_insn_type (&insn[i], II_SFNE); + else if (insn[i].index == II_SFNE) change_insn_type (&insn[i], II_SFEQ); + else if (insn[i].index == II_SFLT) change_insn_type (&insn[i], II_SFGE); + else if (insn[i].index == II_SFGT) change_insn_type (&insn[i], II_SFLE); + else if (insn[i].index == II_SFLE) change_insn_type (&insn[i], II_SFGT); + else if (insn[i].index == II_SFGE) change_insn_type (&insn[i], II_SFLT); + else assert (0); + } + /* repair params */ + insn[i].op[2] = insn[i].op[1]; insn[i].opt[2] = insn[i].opt[1] & ~OPT_DEST; + insn[i].op[1] = insn[i].op[0]; insn[i].opt[1] = insn[i].opt[0] & ~OPT_DEST; + insn[i].op[0] = FLAG_REG; insn[i].opt[0] = OPT_DEST | OPT_REGISTER; + insn[i].opt[3] = OPT_NONE; + insn[i].type |= IT_COND; + i++; + change_insn_type (&insn[i], II_BF); + insn[i].op[0] = i + insn[i].op[0]; insn[i].opt[0] = OPT_JUMP; + insn[i].op[1] = FLAG_REG; insn[i].opt[1] = OPT_REGISTER; + insn[i].type |= IT_BRANCH | IT_VOLATILE; + } + } else { + insn[i].index = -1; + for (j = 0; j < sizeof (conv) / sizeof (cuc_conv); j++) + if (strcmp (conv[j].from, name) == 0) { + const int x = conv[j].to; + if (conv[j].to & II_SIGNED) insn[i].type |= IT_SIGNED; + if (conv[j].to & II_MEM) insn[i].type |= IT_MEMORY | IT_VOLATILE; + change_insn_type (&insn[i], conv[j].to & II_MASK); + break; + } + if (insn[i].index < 0) { + fprintf (stderr, "Instruction #%i: \"%s\" not supported (2).\n", i, name); + exit (1); + } + } + } + num_insn = i; + fclose (fi); + if (func_return != 2) { + fprintf (stderr, "Unsupported function structure.\n"); + exit (1); + } + + log ("Number of instructions loaded = %i\n", num_insn); + if (DEBUG > 3) print_cuc_insns ("INITIAL", 1); + + log ("Converting.\n"); + remove_dslots (); + if (DEBUG > 8) print_cuc_insns ("NO_DELAY_SLOTS", 0); + + if (calling_convention) { + detect_locals (); + if (DEBUG > 7) print_cuc_insns ("AFTER_LOCALS", 0); + } + expand_memory (); + if (DEBUG > 3) print_cuc_insns ("AFTER_EXP_MEM", 0); + + expand_signed (); + if (DEBUG > 3) print_cuc_insns ("AFTER_EXP_SIG", 0); + + log ("Common subexpression elimination.\n"); + cse (); + if (DEBUG > 8) print_cuc_insns ("AFTER_CSE", 0); +}
trunk/or1ksim/cuc/load.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/or1ksim/cuc/Makefile.in =================================================================== --- trunk/or1ksim/cuc/Makefile.in (nonexistent) +++ trunk/or1ksim/cuc/Makefile.in (revision 879) @@ -0,0 +1,337 @@ +# Makefile.in generated automatically by automake 1.4-p5 from Makefile.am + +# Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +# Makefile -- Makefile for cpu architecture independent simulation +# Copyright (C) 2002 Marko Mlinar, markom@opencores.org +# +# This file is part of OpenRISC 1000 Architectural Simulator. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + + +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include + +DESTDIR = + +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ + +top_builddir = .. + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ $(AM_INSTALL_PROGRAM_FLAGS) +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +transform = @program_transform_name@ + +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_alias = @build_alias@ +build_triplet = @build@ +host_alias = @host_alias@ +host_triplet = @host@ +target_alias = @target_alias@ +target_triplet = @target@ +AR = @AR@ +ARFLAGS = @ARFLAGS@ +BUILD_DIR = @BUILD_DIR@ +CC = @CC@ +CFLAGS = @CFLAGS@ +CPU_ARCH = @CPU_ARCH@ +INCLUDES = @INCLUDES@ +LOCAL_CFLAGS = @LOCAL_CFLAGS@ +LOCAL_DEFS = @LOCAL_DEFS@ +LOCAL_LDFLAGS = @LOCAL_LDFLAGS@ +MAKEINFO = @MAKEINFO@ +MAKE_SHELL = @MAKE_SHELL@ +PACKAGE = @PACKAGE@ +RANLIB = @RANLIB@ +SUMVERSION = @SUMVERSION@ +TERMCAP_LIB = @TERMCAP_LIB@ +VERSION = @VERSION@ +host = @host@ +host_cpu = @host_cpu@ +host_os = @host_os@ + +noinst_LIBRARIES = libcuc.a + +libcuc_a_SOURCES = cuc.c cuc.h load.c bb.c memory.c \ + verilog.c timings.c insn.c insn.h + +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = ../config.h +CONFIG_CLEAN_FILES = +LIBRARIES = $(noinst_LIBRARIES) + + +DEFS = @DEFS@ -I. -I$(srcdir) -I.. +CPPFLAGS = @CPPFLAGS@ +LDFLAGS = @LDFLAGS@ +LIBS = @LIBS@ +libcuc_a_LIBADD = +libcuc_a_OBJECTS = cuc.o load.o bb.o memory.o verilog.o timings.o \ +insn.o +COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ +DIST_COMMON = Makefile.am Makefile.in + + +DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) + +TAR = gtar +GZIP_ENV = --best +DEP_FILES = .deps/bb.P .deps/cuc.P .deps/insn.P .deps/load.P \ +.deps/memory.P .deps/timings.P .deps/verilog.P +SOURCES = $(libcuc_a_SOURCES) +OBJECTS = $(libcuc_a_OBJECTS) + +all: all-redirect +.SUFFIXES: +.SUFFIXES: .S .c .o .s +$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && $(AUTOMAKE) --gnu cuc/Makefile + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(BUILT_SOURCES) + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + + +mostlyclean-noinstLIBRARIES: + +clean-noinstLIBRARIES: + -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) + +distclean-noinstLIBRARIES: + +maintainer-clean-noinstLIBRARIES: + +.s.o: + $(COMPILE) -c $< + +.S.o: + $(COMPILE) -c $< + +mostlyclean-compile: + -rm -f *.o core *.core + +clean-compile: + +distclean-compile: + -rm -f *.tab.c + +maintainer-clean-compile: + +libcuc.a: $(libcuc_a_OBJECTS) $(libcuc_a_DEPENDENCIES) + -rm -f libcuc.a + $(AR) cru libcuc.a $(libcuc_a_OBJECTS) $(libcuc_a_LIBADD) + $(RANLIB) libcuc.a + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) + list='$(SOURCES) $(HEADERS)'; \ + unique=`for i in $$list; do echo $$i; done | \ + awk ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + here=`pwd` && cd $(srcdir) \ + && mkid -f$$here/ID $$unique $(LISP) + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS)'; \ + unique=`for i in $$list; do echo $$i; done | \ + awk ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ + || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags $$unique $(LISP) -o $$here/TAGS) + +mostlyclean-tags: + +clean-tags: + +distclean-tags: + -rm -f TAGS ID + +maintainer-clean-tags: + +distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) + +subdir = cuc + +distdir: $(DISTFILES) + here=`cd $(top_builddir) && pwd`; \ + top_distdir=`cd $(top_distdir) && pwd`; \ + distdir=`cd $(distdir) && pwd`; \ + cd $(top_srcdir) \ + && $(AUTOMAKE) --include-deps --build-dir=$$here --srcdir-name=$(top_srcdir) --output-dir=$$top_distdir --gnu cuc/Makefile + @for file in $(DISTFILES); do \ + d=$(srcdir); \ + if test -d $$d/$$file; then \ + cp -pr $$d/$$file $(distdir)/$$file; \ + else \ + test -f $(distdir)/$$file \ + || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ + || cp -p $$d/$$file $(distdir)/$$file || :; \ + fi; \ + done + +DEPS_MAGIC := $(shell mkdir .deps > /dev/null 2>&1 || :) + +-include $(DEP_FILES) + +mostlyclean-depend: + +clean-depend: + +distclean-depend: + -rm -rf .deps + +maintainer-clean-depend: + +%.o: %.c + @echo '$(COMPILE) -c $<'; \ + $(COMPILE) -Wp,-MD,.deps/$(*F).pp -c $< + @-cp .deps/$(*F).pp .deps/$(*F).P; \ + tr ' ' '\012' < .deps/$(*F).pp \ + | sed -e 's/^\\$$//' -e '/^$$/ d' -e '/:$$/ d' -e 's/$$/ :/' \ + >> .deps/$(*F).P; \ + rm .deps/$(*F).pp + +%.lo: %.c + @echo '$(LTCOMPILE) -c $<'; \ + $(LTCOMPILE) -Wp,-MD,.deps/$(*F).pp -c $< + @-sed -e 's/^\([^:]*\)\.o[ ]*:/\1.lo \1.o :/' \ + < .deps/$(*F).pp > .deps/$(*F).P; \ + tr ' ' '\012' < .deps/$(*F).pp \ + | sed -e 's/^\\$$//' -e '/^$$/ d' -e '/:$$/ d' -e 's/$$/ :/' \ + >> .deps/$(*F).P; \ + rm -f .deps/$(*F).pp +info-am: +info: info-am +dvi-am: +dvi: dvi-am +check-am: all-am +check: check-am +installcheck-am: +installcheck: installcheck-am +install-exec-am: +install-exec: install-exec-am + +install-data-am: +install-data: install-data-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am +install: install-am +uninstall-am: +uninstall: uninstall-am +all-am: Makefile $(LIBRARIES) +all-redirect: all-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install +installdirs: + + +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + -rm -f config.cache config.log stamp-h stamp-h[0-9]* + +maintainer-clean-generic: +mostlyclean-am: mostlyclean-noinstLIBRARIES mostlyclean-compile \ + mostlyclean-tags mostlyclean-depend mostlyclean-generic + +mostlyclean: mostlyclean-am + +clean-am: clean-noinstLIBRARIES clean-compile clean-tags clean-depend \ + clean-generic mostlyclean-am + +clean: clean-am + +distclean-am: distclean-noinstLIBRARIES distclean-compile \ + distclean-tags distclean-depend distclean-generic \ + clean-am + +distclean: distclean-am + +maintainer-clean-am: maintainer-clean-noinstLIBRARIES \ + maintainer-clean-compile maintainer-clean-tags \ + maintainer-clean-depend maintainer-clean-generic \ + distclean-am + @echo "This command is intended for maintainers to use;" + @echo "it deletes files that may require special tools to rebuild." + +maintainer-clean: maintainer-clean-am + +.PHONY: mostlyclean-noinstLIBRARIES distclean-noinstLIBRARIES \ +clean-noinstLIBRARIES maintainer-clean-noinstLIBRARIES \ +mostlyclean-compile distclean-compile clean-compile \ +maintainer-clean-compile tags mostlyclean-tags distclean-tags \ +clean-tags maintainer-clean-tags distdir mostlyclean-depend \ +distclean-depend clean-depend maintainer-clean-depend info-am info \ +dvi-am dvi check check-am installcheck-am installcheck install-exec-am \ +install-exec install-data-am install-data install-am install \ +uninstall-am uninstall all-redirect all-am all installdirs \ +mostlyclean-generic distclean-generic clean-generic \ +maintainer-clean-generic clean mostlyclean distclean maintainer-clean + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: Index: trunk/or1ksim/cuc/bb.c =================================================================== --- trunk/or1ksim/cuc/bb.c (nonexistent) +++ trunk/or1ksim/cuc/bb.c (revision 879) @@ -0,0 +1,1197 @@ +/* bb.c -- OpenRISC Custom Unit Compiler, Basic Block handling + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include "cuc.h" +#include "insn.h" +#include "support/profile.h" + +/* Print out basic blocks */ +void print_cuc_bb (cuc_func *f, char *s) +{ + int i; + printf ("------- %s -------\n", s); + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].insn) printf ("\n---- BB%-2x * %x ---- ", i, f->bb[i].cnt); + else printf ("BB%-2x: %4x-%-4x", i, f->bb[i].first, f->bb[i].last); + printf (" type %02x tmp %i ", f->bb[i].type, f->bb[i].tmp); + if (f->bb[i].next[0] >= 0) printf ("next %2x ", f->bb[i].next[0]); + else printf ("next * "); + if (f->bb[i].next[1] >= 0) printf ("%2x ", f->bb[i].next[1]); + else printf ("* "); + if (f->bb[i].prev[0] >= 0) printf ("prev %2x ", f->bb[i].prev[0]); + else printf ("prev * "); + if (f->bb[i].prev[1] >= 0) printf ("%2x\n", f->bb[i].prev[1]); + else printf ("*\n"); + + if (f->bb[i].insn) print_insns (f->bb[i].insn, f->bb[i].ninsn, 0); + } + printf ("\n"); +} + +/* Copies src basic block into destination */ +cuc_bb *cpy_bb (cuc_bb *dest, cuc_bb *src) +{ + int i; + *dest = *src; + assert (dest->insn = malloc (sizeof (cuc_insn) * src->ninsn)); + for (i = 0; i < src->ninsn; i++) + dest->insn[i] = src->insn[i]; + if (src->ntim) { + assert (dest->tim = malloc (sizeof (cuc_timings) * src->ntim)); + for (i = 0; i < src->ntim; i++) dest->tim[i] = src->tim[i]; + } +} + +/* Duplicates function */ +cuc_func *dup_func (cuc_func *f) +{ + cuc_func *n = (cuc_func *) malloc (sizeof (cuc_func)); + int b, i; + for (b = 0; b < f->num_bb; b++) cpy_bb (&n->bb[b], &f->bb[b]); + n->num_bb = f->num_bb; + assert (n->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb)); + for (b = 0; b < f->num_init_bb; b++) n->init_bb_reloc[b] = f->init_bb_reloc[b]; + n->num_init_bb = f->num_init_bb; + for (i = 0; i < MAX_REGS; i++) n->saved_regs[i] = f->saved_regs[i]; + n->start_addr = f->start_addr; + n->end_addr = f->end_addr; + n->orig_time = f->orig_time; + n->nmsched = f->nmsched; + for (i = 0; i < f->nmsched; i++) { + n->msched[i] = f->msched[i]; + n->mtype[i] = f->mtype[i]; + } + return n; +} + +/* Releases memory allocated by function */ +void free_func (cuc_func *f) +{ + int b, i; + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < f->bb[b].ninsn; i++) + dispose_list (&f->bb[b].insn[i].dep); + if (f->bb[b].insn) free (f->bb[b].insn); + if (f->bb[b].tim && f->bb[b].ntim) free (f->bb[b].tim); + } + free (f); +} + + +/* Recalculates last_used_reg */ +void recalc_last_used_reg (cuc_func *f, int b) +{ + int i; + cuc_bb *bb = &f->bb[b]; + + /* rebuild last used reg array */ + if (bb->insn[0].index == II_LRBB) bb->last_used_reg[LRBB_REG] = 0; + else bb->last_used_reg[LRBB_REG] = -1; + + for (i = 1; i < MAX_REGS - 1; i++) bb->last_used_reg[i] = -1; + + /* Create references */ + for (i = 0; i < bb->ninsn; i++) { + int k; + /* Now check for destination operand(s) */ + for (k = 0; k < MAX_OPERANDS; k++) if (bb->insn[i].opt[k] & OPT_DEST) + if ((bb->insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER + && (int)bb->insn[i].op[k] >= 0) { + bb->last_used_reg[bb->insn[i].op[k]] = REF (b, i); + } + } +} + +/* Set the BB limits */ +void detect_bb (cuc_func *f) +{ + int i, j, end_bb = 0, eb = 0; + + /* Mark block starts/ends */ + for (i = 0; i < num_insn; i++) { + if (end_bb) insn[i].type |= IT_BBSTART; + end_bb = 0; + if (insn[i].type & IT_BRANCH) { + int jt = insn[i].op[0]; + insn[i].type |= IT_BBEND; + end_bb = 1; + if (jt < 0 || jt >= num_insn) { + fprintf (stderr, "Instruction #%i:Jump out of function '%s'.\n", i, insn[i].disasm); + exit (1); + } + if (jt > 0) insn[jt - 1].type |= IT_BBEND; + insn[jt].type |= IT_BBSTART; + } + } + + /* Initialize bb array */ + insn[0].type |= IT_BBSTART; + insn[num_insn - 1].type |= IT_BBEND; + f->num_bb = 0; + for (i = 0; i < num_insn; i++) { + if (insn[i].type & IT_BBSTART) { + f->bb[f->num_bb].first = i; + f->bb[f->num_bb].cnt = 0; + } + /* Determine repetitions of a loop */ + if (insn[i].type & IT_BBEND) { + f->bb[f->num_bb].type = 0; + f->bb[f->num_bb].last = i; + f->bb[f->num_bb].next[0] = f->bb[f->num_bb].next[1] = -1; + f->bb[f->num_bb].tmp = 0; + f->bb[f->num_bb].ntim = 0; + f->num_bb++; + assert (f->num_bb < MAX_BB); + } + } + if (DEBUG > 3) print_cuc_bb (f, "AFTER_INIT"); + + /* Build forward connections between BBs */ + for (i = 0; i < f->num_bb; i++) + if (insn[f->bb[i].last].type & IT_BRANCH) { + int j; + assert (insn[f->bb[i].last].index == II_BF); + /* Find block this instruction jumps to */ + for (j = 0; j < f->num_bb; j++) + if (f->bb[j].first == insn[f->bb[i].last].op[0]) break; + assert (j < f->num_bb); + + /* Convert the jump address to BB link */ + insn[f->bb[i].last].op[0] = j; insn[f->bb[i].last].opt[0] = OPT_BB; + + /* Make a link */ + f->bb[i].next[0] = j; + if (++f->bb[j].tmp > 2) eb++; + f->bb[i].next[1] = i + 1; + if (++f->bb[i + 1].tmp > 2) eb++; + } else if (f->bb[i].last == num_insn - 1) { /* Last instruction doesn't have to do anything */ + f->bb[i].type |= BB_END; + } else { + f->bb[i].next[0] = i + 1; + if (++f->bb[i + 1].tmp > 2) eb++; + } + + if (DEBUG > 3) print_cuc_bb (f, "AFTER_NEXT"); + + /* Build backward connections, but first insert artificial blocks + * to handle more than 2 connections */ + debug (6, "artificial %i %i\n", f->num_bb, eb); + end_bb = f->num_bb + eb; + for (i = f->num_bb - 1; i >= 0; i--) { + j = f->bb[i].tmp; + if (f->bb[i].tmp > 2) f->bb[i].tmp = -f->bb[i].tmp; + f->bb[--end_bb] = f->bb[i]; + reloc[i] = end_bb; + while (j-- > 2) { + f->bb[--end_bb].first = f->bb[i].first; + f->bb[end_bb].last = -1; + f->bb[end_bb].next[0] = -1; + f->bb[end_bb].next[1] = -1; + f->bb[end_bb].tmp = 0; + f->bb[end_bb].cnt = f->bb[i].cnt; + f->bb[end_bb].ntim = 0; + } + } + f->num_bb += eb; + + /* relocate jump instructions */ + for (i = 0; i < num_insn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_BB) + insn[i].op[j] = reloc[insn[i].op[j]]; + if (DEBUG > 3) print_cuc_bb (f, "AFTER_INSERT-reloc"); + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].next[0] >= 0) { + int t = reloc[f->bb[i].next[0]]; + if (f->bb[t].tmp < 0) { + f->bb[t].tmp = -f->bb[t].tmp; + t -= f->bb[t].tmp - 2; + } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2; + f->bb[i].next[0] = t; + } + if (f->bb[i].next[1] >= 0) { + int t = reloc[f->bb[i].next[1]]; + if (f->bb[t].tmp < 0) { + f->bb[t].tmp = -f->bb[t].tmp; + t -= f->bb[t].tmp - 2; + } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2; + f->bb[i].next[1] = t; + } + /* artificial blocks do not have relocations, hardcode them */ + if (f->bb[i].last < 0) f->bb[i].next[0] = i + 1; + } + if (DEBUG > 3) print_cuc_bb (f, "AFTER_INSERT"); + + /* Uncoditional branched do not continue to next block */ + for (i = 0; i < f->num_bb; i++) { + cuc_insn *ii; + if (f->bb[i].last < 0) continue; + ii = &insn[f->bb[i].last]; + /* Unconditional branch? */ + if (ii->type & IT_BRANCH && ii->opt[1] & OPT_CONST) { + change_insn_type (ii, II_NOP); + if (f->bb[i].next[1] == i + 1) f->bb[i].next[0] = f->bb[i].next[1]; + f->bb[i].next[1] = -1; + } + } + if (DEBUG > 3) print_cuc_bb (f, "AFTER_UNCOND_JUMP"); + + /* Add backward connections */ + for (i = 0; i < f->num_bb; i++) + f->bb[i].prev[0] = f->bb[i].prev[1] = -1; + + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].next[0] >= 0) { + int t = f->bb[i].next[0]; + if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i; + else { + assert (f->bb[t].prev[1] < 0); + f->bb[t].prev[1] = i; + } + } + if (f->bb[i].next[1] >= 0) { + int t = f->bb[i].next[1]; + if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i; + else { + assert (f->bb[t].prev[1] < 0); + f->bb[t].prev[1] = i; + } + } + } + if (DEBUG > 3) print_cuc_bb (f, "AFTER_PREV"); +} + +/* Build basic blocks */ +void build_bb (cuc_func *f) +{ + int i, j, k; + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].last < 0) f->bb[i].ninsn = MAX_REGS - 1; + else f->bb[i].ninsn = f->bb[i].last - f->bb[i].first + 1 + MAX_REGS - 1; + assert (f->bb[i].ninsn >= MAX_REGS - 1); + f->bb[i].insn = (cuc_insn *) malloc (sizeof (cuc_insn) * f->bb[i].ninsn); + assert (f->bb[i].insn); + f->bb[i].nmemory = 0; + f->bb[i].unrolled = 1; + + /* Save space for conditional moves, exclude r0, place lrbb instead */ + change_insn_type (&f->bb[i].insn[0], II_LRBB); + strcpy (f->bb[i].insn[0].disasm, "lrbb"); + f->bb[i].insn[0].type = IT_UNUSED; + f->bb[i].insn[0].dep = NULL; + f->bb[i].insn[0].op[0] = LRBB_REG; f->bb[i].insn[0].opt[0] = OPT_REGISTER | OPT_DEST; + f->bb[i].insn[0].opt[1] = OPT_LRBB; + f->bb[i].insn[0].opt[2] = f->bb[i].insn[0].opt[3] = OPT_NONE; + for (j = 1; j < MAX_REGS - 1; j++) { + change_insn_type (&f->bb[i].insn[j], II_CMOV); + strcpy (f->bb[i].insn[j].disasm, "cmov"); + f->bb[i].insn[j].type = 0; + f->bb[i].insn[j].dep = NULL; + f->bb[i].insn[j].opt[0] = f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_REGISTER; + f->bb[i].insn[j].opt[0] |= OPT_DEST; + f->bb[i].insn[j].op[0] = f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = j; + f->bb[i].insn[j].op[3] = LRBB_REG; f->bb[i].insn[j].opt[3] = OPT_REGISTER; + } + for (j = MAX_REGS - 1; j < f->bb[i].ninsn; j++) { + f->bb[i].insn[j] = insn[f->bb[i].first + j - (MAX_REGS - 1)]; + for (k = 0; k < MAX_OPERANDS; k++) + if (f->bb[i].insn[j].opt[k] & OPT_REF) { + int b1; + for (b1 = 0; b1 < i; b1++) + if (f->bb[b1].first <= f->bb[i].insn[j].op[k] + && f->bb[i].insn[j].op[k] <= f->bb[b1].last) break; + assert (b1 < f->num_bb); + f->bb[i].insn[j].op[k] = REF (b1, f->bb[i].insn[j].op[k] - f->bb[b1].first + MAX_REGS - 1); + } + if (f->bb[i].insn[j].type & IT_MEMORY) f->bb[i].nmemory++; + } + } +} + +/* type == 0; keep predecessor condition + * type == 1; keep successor condition + * type == 2; join loop unrolled blocks */ +static void join_bb (cuc_func *f, int pred, int succ, int type) +{ + int i, j, k, add, ninsn, add_cond = 0; + unsigned long cond_op, cond_opt; + cuc_insn *insn; + + //printf ("%i <= %i+%i (%i)\n", pred, pred, succ, type); + //printf ("%i %i\n", f->bb[pred].ninsn, f->bb[succ].ninsn); + + add = f->bb[pred].ninsn; + if (f->bb[pred].ninsn <= 0 + || !(f->bb[pred].insn[f->bb[pred].ninsn - 1].type & IT_BRANCH)) type = 1; + if (type == 0 && f->bb[succ].prev[0] == f->bb[succ].next[0]) add_cond = 1; + + ninsn = f->bb[pred].ninsn + f->bb[succ].ninsn + (type == 0 || type == 2 ? 1 : 0) + + (add_cond ? MAX_REGS : 0); + + insn = (cuc_insn *) malloc (ninsn * sizeof (cuc_insn)); + for (i = 0; i < add; i++) insn[i] = f->bb[pred].insn[i]; + /* when type == 0, we copy the last (jump) instruction to the end */ + if (type == 0 || type == 2) { + insn[ninsn - 1] = insn[add - 1]; + cond_op = insn[add - 1].op[1]; + cond_opt = insn[add - 1].opt[1]; + change_insn_type (&insn[add - 1], II_NOP); + /* and when type == 2, we must add sfor instruction, to quit when either is true */ + if (type == 2) { + /* TODO */ + assert (0); + } + } + /* Copy second block */ + for (i = 0; i < f->bb[succ].ninsn; i++) insn[i + f->bb[pred].ninsn] = f->bb[succ].insn[i]; + + for (i = 0; i < ninsn; i++) reloc[i] = -1; + + /* Add conditional instructions if required */ + if (add_cond) { + recalc_last_used_reg (f, pred); + recalc_last_used_reg (f, succ); + + /* r0 -- add nop for it */ + change_insn_type (&insn[add + f->bb[succ].ninsn], II_NOP); + for (i = 1; i < MAX_REGS; i++) { + cuc_insn *ii = &insn[add + f->bb[succ].ninsn + i]; + int a = f->bb[pred].last_used_reg[i]; + int b = f->bb[succ].last_used_reg[i]; + + if (b < 0) change_insn_type (ii, II_NOP); + else if (a < 0) { + change_insn_type (ii, II_ADD); + ii->type = 0; + ii->dep = NULL; + ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = b; ii->opt[1] = OPT_REF; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->op[3] = OPT_NONE; + } else if (b >= 0) { + change_insn_type (ii, II_CMOV); + ii->type = 0; + ii->dep = NULL; + ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = a; ii->opt[1] = OPT_REF; + ii->op[2] = b; ii->opt[2] = OPT_REF; + ii->op[3] = cond_op; ii->opt[3] = cond_opt; + reloc[REF_I(a)] = REF (pred, add + f->bb[succ].ninsn + i); + } + sprintf (ii->disasm, "cmov (join BB)"); + } + } + + f->bb[pred].type |= f->bb[succ].type; + i = 0; + assert (f->bb[pred].next[0] >= 0); + switch (type) { + case 0: + if (f->bb[pred].next[0] == succ) f->bb[pred].next[0] = f->bb[succ].next[0]; + if (f->bb[pred].next[1] == succ) f->bb[pred].next[1] = f->bb[succ].next[0]; + assert (f->bb[succ].next[1] < 0); + break; + case 1: + f->bb[pred].next[0] = f->bb[succ].next[0]; + f->bb[pred].next[1] = f->bb[succ].next[1]; + break; + } + if (f->bb[pred].next[0] == f->bb[pred].next[1]) f->bb[pred].next[1] = -1; + f->bb[succ].type = BB_DEAD; + + /* Set max count */ + if (f->bb[pred].cnt < f->bb[succ].cnt) f->bb[pred].cnt = f->bb[succ].cnt; + f->bb[pred].ninsn = ninsn; + free (f->bb[pred].insn); f->bb[pred].insn = NULL; + free (f->bb[succ].insn); f->bb[succ].insn = NULL; + f->bb[pred].insn = insn; + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) { + if (f->bb[i].prev[0] == succ) f->bb[i].prev[0] = pred; + if (f->bb[i].prev[1] == succ) f->bb[i].prev[1] = pred; + if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1; + for (j = 0; j < f->bb[i].ninsn; j++) + for (k = 0; k < MAX_OPERANDS; k++) + if (f->bb[i].insn[j].opt[k] & OPT_REF) { + /* Check if we are referencing successor BB -> relocate to second part of + the new block */ + if (REF_BB (f->bb[i].insn[j].op[k]) == succ) { + if (type == 0 && f->bb[i].insn[j].op[j] == REF (succ, ninsn - 1)) + f->bb[i].insn[j].op[j] = REF (pred, f->bb[pred].ninsn); + else { + int t = f->bb[i].insn[j].op[k]; + int ndest = REF (pred, REF_I (t) + add); + + /* We've found a reference to succ. block, being removed, relocate */ + if (add_cond && i != succ && !(i == pred && j >= ninsn - MAX_REGS)) { + //printf ("%x!\n", t); + //printf ("%x!\n", f->INSN(ndest).op[0]); + /* interblock dependency should have physical register attached */ + assert (f->INSN(ndest).opt[0] == OPT_DEST | OPT_REGISTER); + assert (f->INSN(ndest).op[0] >= 0); + f->bb[i].insn[j].op[k] = REF (pred, add + f->bb[succ].ninsn + f->INSN(ndest).op[0]); + } else f->bb[i].insn[j].op[k] = ndest; + } + } else if (REF_BB(f->bb[i].insn[j].op[k]) == pred) { + if (i != pred && reloc[REF_I(f->bb[i].insn[j].op[k])] >= 0) { + f->bb[i].insn[j].op[k] = reloc[REF_I(f->bb[i].insn[j].op[k])]; + } + } + } + } + + if (DEBUG > 3) print_cuc_bb (f, "join"); +} + +/* Optimize basic blocks */ +void optimize_bb (cuc_func *f) +{ + int i, j; +remove_lrbb: + /* we can remove lrbb instructions from blocks with just one predecessor */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) { + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* exactly one predecessor */ + for (j = 0; j < f->bb[i].ninsn; j++) + if (f->bb[i].insn[j].index == II_LRBB) { + cuc_insn *t; + debug (4, "-lrbb %i.%i\n", i, j); + + /* Change to add LRBB, 0, 0 */ + change_insn_type (&f->bb[i].insn[j], II_ADD); + f->bb[i].insn[j].type &= ~IT_VOLATILE; + t = &f->bb[f->bb[i].prev[0]].insn[f->bb[f->bb[i].prev[0]].ninsn - 1]; + f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_CONST; + f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = 0; /* always use left block */ + f->bb[i].insn[j].opt[3] = OPT_NONE; + + /* If the predecessor still has a conditional jump instruction, we must be careful. + This could only have occured when we found out that next[0] == next[1] and have + joined them. Now we will link lrbb and correct the situation */ + if (t->type & IT_BRANCH) { /* We must set a reference to branch result */ + f->bb[i].insn[j].opt[1] = t->opt[1]; + f->bb[i].insn[j].op[1] = t->op[1]; + change_insn_type (t, II_NOP); + } + } + } + } + + /* Type 0 joining + 1. link between pred & succ + 2. no memory accesses in succ + 3. optional pred's second successors + 4. max. one succ's successors */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0 /* one predecessor */ + && f->bb[i].next[1] < 0 /* max. one successor */ + && f->bb[i].nmemory == 0) { /* and no memory acceses */ + join_bb (f, f->bb[i].prev[0], i, 0); + goto remove_lrbb; + } + + /* Type 1 joining + 1. link between pred & succ + 2. no other pred's successors + 3. no other succ's predecessors */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0 /* one predecessor */ + && f->bb[f->bb[i].prev[0]].next[0] >= 0 && f->bb[f->bb[i].prev[0]].next[1] < 0) { /* one successor */ + join_bb (f, f->bb[i].prev[0], i, 1); + goto remove_lrbb; + } + +#if 1 + /* Type 2 joining + 1. link between pred & succ + 2. succ has exactly one predeccessor + 3. pred & succ share common successor + 4. optional succ's second successor */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* one predecessor */ + int p = f->bb[i].prev[0]; + if (f->bb[p].next[0] == i && f->bb[p].next[1] == f->bb[p].next[1]) + join_bb (f, f->bb[i].prev[0], i, 2); + goto remove_lrbb; + } +#endif +} + +/* Removes BBs marked as dead */ +void remove_dead_bb (cuc_func *f) +{ + int i, j, k, d = 0; + + for (i = 0; i < f->num_bb; i++) if (f->bb[i].type & BB_DEAD) { + if (f->bb[i].insn) free (f->bb[i].insn); + f->bb[i].insn = NULL; + reloc[i] = -1; + } else { + reloc[i] = d; + f->bb[d++] = f->bb[i]; + } + f->num_bb = d; + + /* relocate initial blocks */ + for (i = 0; i < f->num_init_bb; i++) + f->init_bb_reloc[i] = reloc[f->init_bb_reloc[i]]; + + /* repair references */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) { + if (f->bb[i].prev[0] >= 0) assert ((f->bb[i].prev[0] = reloc[f->bb[i].prev[0]]) >= 0); + if (f->bb[i].prev[1] >= 0) assert ((f->bb[i].prev[1] = reloc[f->bb[i].prev[1]]) >= 0); + if (f->bb[i].next[0] >= 0) assert ((f->bb[i].next[0] = reloc[f->bb[i].next[0]]) >= 0); + if (f->bb[i].next[1] >= 0) assert ((f->bb[i].next[1] = reloc[f->bb[i].next[1]]) >= 0); + if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1; + if (f->bb[i].next[0] == f->bb[i].next[1]) f->bb[i].next[1] = -1; + + for (j = 0; j < f->bb[i].ninsn; j++) + for (k = 0; k < MAX_OPERANDS; k++) + if (f->bb[i].insn[j].opt[k] & OPT_BB && (signed)f->bb[i].insn[j].op[k] >= 0) + assert ((f->bb[i].insn[j].op[k] = reloc[f->bb[i].insn[j].op[k]]) >= 0); + else if (f->bb[i].insn[j].opt[k] & OPT_REF) { + int t = f->bb[i].insn[j].op[k]; + assert (reloc[REF_BB(t)] >= 0); + f->bb[i].insn[j].op[k] = REF (reloc[REF_BB(t)], REF_I (t)); + } + } +} + +/* Recursive calculation of dependencies */ +static int reg_dep_rec (cuc_func *f, int cur) +{ + int i, j; + cuc_insn *insn = f->bb[cur].insn; + + //printf ("\n %i", cur); + /* Spread only, do not loop */ + if (f->bb[cur].tmp) return; + f->bb[cur].tmp = 1; + //printf ("! "); + + for (i = 0; i < f->bb[cur].ninsn; i++) { + /* Check for destination operand(s) */ + for (j = 0; j < MAX_OPERANDS; j++) if (insn[i].opt[j] & OPT_DEST) + if ((insn[i].opt[j] & ~OPT_DEST) == OPT_REGISTER && (signed)insn[i].op[j] >= 0) { + //printf ("%i:%i,%x ", insn[i].op[j], i, REF (cur, i)); + assert (insn[i].op[j] > 0 && insn[i].op[j] < MAX_REGS); /* r0 should never be dest */ + f->bb[cur].last_used_reg[insn[i].op[j]] = REF (cur, i); + } + } + + if (f->bb[cur].next[0] >= 0) reg_dep_rec (f, f->bb[cur].next[0]); + if (f->bb[cur].next[1] >= 0) reg_dep_rec (f, f->bb[cur].next[1]); +} + +/* Detect register dependencies */ +void reg_dep (cuc_func *f) +{ + int i, b, c; + + /* Set dead blocks */ + for (b = 0; b < f->num_bb; b++) { + f->bb[b].tmp = 0; + for (i = 0; i < MAX_REGS; i++) f->bb[b].last_used_reg[i] = -1; + } + + /* Start with first block and set dependecies of all reachable blocks */ + /* At the same time set last_used_regs */ + reg_dep_rec (f, 0); + + for (i = 0; i < f->num_bb; i++) + if (f->bb[i].tmp) f->bb[i].tmp = 0; + else f->bb[i].type |= BB_DEAD; + + /* Detect loops; mark BBs where loops must be broken */ + for (c = 0; c < f->num_bb; c++) { + int min = 3, minb; + + /* search though all non-visited for minimum number of unvisited predecessors */ + for (b = 0; b < f->num_bb; b++) if (!f->bb[b].tmp) { + int tmp = 0; + if (f->bb[b].prev[0] >= 0 && !f->bb[f->bb[b].prev[0]].tmp) tmp++; + if (f->bb[b].prev[1] >= 0 && !f->bb[f->bb[b].prev[1]].tmp) tmp++; + if (tmp < min) { + minb = b; + min = tmp; + if (tmp == 0) break; /* We already have the best one */ + } + } + b = minb; + f->bb[b].tmp = 1; /* Mark visited */ + debug (3, "minb %i min %i\n", minb, min); + if (min) { /* We just broke the loop */ + f->bb[b].type |= BB_INLOOP; + } + } + + /* Set real predecessors in cmov instructions to previous blocks */ + for (b = 0; b < f->num_bb; b++) + for (i = 1; i < MAX_REGS - 1; i++) { + int pa, pb; + assert (f->bb[b].insn[i].index == II_CMOV); + assert (f->bb[b].insn[i].opt[0] == OPT_REGISTER | OPT_DEST); + assert (f->bb[b].insn[i].op[0] == i); + if (f->bb[b].prev[0] < 0) pa = -1; + else pa = f->bb[f->bb[b].prev[0]].last_used_reg[i]; + if (f->bb[b].prev[1] < 0) pb = -1; + else pb = f->bb[f->bb[b].prev[1]].last_used_reg[i]; + + /* We do some very simple optimizations right away to make things more readable */ + if (pa < 0 && pb < 0) { + /* Was not used at all */ + change_insn_type (&f->bb[b].insn[i], II_ADD); + f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST; + f->bb[b].insn[i].opt[3] = OPT_NONE; + } else if (pa < 0) { + change_insn_type (&f->bb[b].insn[i], II_ADD); + assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST)); + f->bb[b].insn[i].op[1] = pb; f->bb[b].insn[i].opt[1] = OPT_REF; + f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST; + f->bb[b].insn[i].opt[3] = OPT_NONE; + } else if (pb < 0) { + change_insn_type (&f->bb[b].insn[i], II_ADD); + assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST)); + f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF; + f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST; + f->bb[b].insn[i].opt[3] = OPT_NONE; + } else { + int t = REF (b, 0); /* lrbb should be first instruction */ + assert (f->INSN(t).index == II_LRBB); + + f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF; + assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST)); + + f->bb[b].insn[i].op[2] = pb; f->bb[b].insn[i].opt[2] = OPT_REF; + assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST)); + + /* Update op[3] -- flag register */ + assert (f->bb[b].insn[i].opt[3] == OPT_REGISTER); + assert (f->bb[b].insn[i].op[3] == LRBB_REG); + assert (t >= 0); + f->bb[b].insn[i].opt[3] = OPT_REF; /* Convert already used regs to references */ + f->bb[b].insn[i].op[3] = t; + assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST)); + } + } + + /* assign register references */ + for (b = 0; b < f->num_bb; b++) { + /* rebuild last used reg array */ + f->bb[b].last_used_reg[0] = -1; + if (f->bb[b].insn[0].index == II_LRBB) f->bb[b].last_used_reg[LRBB_REG] = 0; + else f->bb[b].last_used_reg[LRBB_REG] = -1; + + for (i = 1; i < MAX_REGS - 1; i++) + f->bb[b].last_used_reg[i] = -1; + + /* Create references */ + for (i = 0; i < f->bb[b].ninsn; i++) { + int k; + /* Check for source operands first */ + for (k = 0; k < MAX_OPERANDS; k++) { + if (!(f->bb[b].insn[i].opt[k] & OPT_DEST)) + if (f->bb[b].insn[i].opt[k] & OPT_REGISTER) { + int t = f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]]; + + if (f->bb[b].insn[i].op[k] == 0) { /* Convert r0 to const0 */ + f->bb[b].insn[i].opt[k] = OPT_CONST; + f->bb[b].insn[i].op[k] = 0; + } else if (t >= 0) { + f->bb[b].insn[i].opt[k] = OPT_REF; /* Convert already used regs to references */ + f->bb[b].insn[i].op[k] = t; + assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST)); + //f->INSN(t).op[0] = -1; + } + } else if (f->bb[b].insn[i].opt[k] & OPT_REF) { + //f->INSN(f->bb[b].insn[i].op[k]).op[0] = -1; /* Mark referenced */ + f->INSN(f->bb[b].insn[i].op[k]).type &= ~IT_UNUSED; + } + } + + /* Now check for destination operand(s) */ + for (k = 0; k < MAX_OPERANDS; k++) if (f->bb[b].insn[i].opt[k] & OPT_DEST) + if ((f->bb[b].insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER + && (int)f->bb[b].insn[i].op[k] >= 0) { + int t = f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]]; + assert (f->bb[b].insn[i].op[k] != 0); /* r0 should never be dest */ + f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]] = REF (b, i); + } + } + } + + /* Remove all unused lrbb */ + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_UNUSED) change_insn_type (&f->bb[b].insn[i], II_NOP); + + /* SSAs with final register value are marked as outputs */ + assert (f->bb[f->num_bb - 1].type & BB_END); + for (i = 0; i < MAX_REGS; i++) if (!call_saved[i]) { + int t = f->bb[f->num_bb - 1].last_used_reg[i]; + /* Mark them volatile, so optimizer does not remove them */ + if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT; + } +} + +/* Scans sequence of BBs and set bb[].cnt */ +void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename) +{ + FILE *fi, *fo; + struct mprofentry_struct *buf; + const int bufsize = 256; + unsigned long *bb_start; + unsigned long *bb_end; + int b, i, r; + int curbb, prevbb = -1; + unsigned long addr = -1; + unsigned long prevaddr = -1; + + assert (fi = fopen (mp_filename, "rb")); + assert (fo = fopen (bb_filename, "wb+")); + + assert (bb_start = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb)); + assert (bb_end = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb)); + for (b = 0; b < f->num_bb; b++) { + bb_start[b] = f->start_addr + f->bb[b].first * 4; + bb_end[b] = f->start_addr + f->bb[b].last * 4; + //printf ("%i %x %x\n", b, bb_start[b], bb_end[b]); + f->bb[0].cnt = 0; + } + + buf = (struct mprofentry_struct *) malloc (sizeof (struct mprofentry_struct) * bufsize); + assert (buf); + + //printf ("BBSEQ:\n"); + do { + r = fread (buf, sizeof (struct mprofentry_struct), bufsize, fi); + //printf ("r%i : ", r); + for (i = 0; i < r; i++) { + if (buf[i].type & MPROF_FETCH) { + //printf ("%x, ", buf[i].addr); + if (buf[i].addr >= f->start_addr && buf[i].addr <= f->end_addr) { + assert (buf[i].type & MPROF_32); + prevaddr = addr; + addr = buf[i].addr; + for (b = 0; b < f->num_bb; b++) + if (bb_start[b] <= addr && addr <= bb_end[b]) break; + assert (b < f->num_bb); + curbb = b; + if (prevaddr + 4 != addr) prevbb = -1; + } else curbb = -1; + +#warning TODO: do not count interrupts + if (curbb != prevbb && curbb >= 0) { + fwrite (&curbb, sizeof (unsigned long), 1, fo); + //printf (" [%i] ", curbb); + f->bb[curbb].cnt++; + prevbb = curbb; + } + } + } + //printf ("\n"); + } while (r == bufsize); + //printf ("\n"); + + fclose (fi); + fclose (fo); + free (buf); + free (bb_end); + free (bb_start); + + /* Initialize basic block relocations */ + f->num_init_bb = f->num_bb; + //printf ("num_init_bb = %i\n", f->num_init_bb); + assert (f->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb)); + for (b = 0; b < f->num_init_bb; b++) f->init_bb_reloc[b] = b; +} + +/* Scans sequence of BBs and set counts for pre/unrolled loop for BB b */ +void count_bb_seq (cuc_func *f, int b, char *bb_filename, int *counts, int preroll, int unroll) +{ + FILE *fi; + const int bufsize = 256; + int i, r; + int *buf; + int cnt = 0; + int times = preroll - 1 + unroll; + + assert (fi = fopen (bb_filename, "rb")); + for (i = 0; i < times; i++) counts[i] = 0; + assert (buf = (int *) malloc (sizeof (int) * bufsize)); + + do { + r = fread (buf, sizeof (int), bufsize, fi); + for (i = 0; i < r; i++) { + /* count consecutive acesses */ + if (f->init_bb_reloc[buf[i]] == b) { + counts[cnt]++; + if (++cnt >= times) cnt = preroll - 1; + } else cnt = 0; + } + } while (r == bufsize); + + log ("Counts %i,%i :", preroll, unroll); + for (i = 0; i < times; i++) log ("%x ", counts[i]); + log ("\n"); + + fclose (fi); + free (buf); +} + +/* relocate all accesses inside of BB b to back/fwd */ +static void relocate_bb (cuc_bb *bb, int b, int back, int fwd) +{ + int i, j; + for (i = 0; i < bb->ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (bb->insn[i].opt[j] & OPT_REF + && REF_BB (bb->insn[i].op[j]) == b) { + int t = REF_I (bb->insn[i].op[j]); + if (t < i) bb->insn[i].op[j] = REF (back, t); + else bb->insn[i].op[j] = REF (fwd, t); + } +} + +/* Unroll loop b unroll times and return new function. Original + function is unmodified. */ +static cuc_func *unroll_loop (cuc_func *f, int b, int unroll) +{ + int b1, t, i, j, prevb, prevart_b; + cuc_func *n = dup_func (f); + cuc_bb *ob = &f->bb[b]; + cuc_insn *ii; + + assert (unroll > 1); + //printf ("unroll BB%i x %i (num_bb %i)\n", b, unroll, n->num_bb); + unroll--; + assert (n->num_bb + unroll * 2 < MAX_BB); + + prevb = b; + prevart_b = b; + /* Duplicate the BB */ + for (t = 0; t < unroll; t++) { + cuc_bb *pb = &n->bb[prevart_b]; + /* Add new block and set links */ + b1 = n->num_bb++; + cpy_bb (&n->bb[b1], ob); + /* Only one should be in loop, so we remove any INLOOP flags from duplicates */ + n->bb[b1].type &= ~(BB_END | BB_INLOOP); + + /* Set predecessor's successor */ + if (n->bb[prevb].next[0] == b) { + n->bb[prevb].next[0] = b1; + if (pb->next[0] < 0) pb->next[0] = b1 + 1; + else pb->next[1] = b1 + 1; + n->bb[b1].next[1] = b1 + 1; + } else if (n->bb[prevb].next[1] == b) { + if (pb->next[0] < 0) pb->next[0] = b1 + 1; + else pb->next[1] = b1 + 1; + n->bb[b1].next[0] = b1 + 1; + n->bb[prevb].next[1] = b1; + } else assert (0); + + /* Set predecessor */ + n->bb[b1].prev[0] = prevb; + n->bb[b1].prev[1] = -1; + + /* Relocate backward references to current instance and forward references + to previous one */ + relocate_bb (&n->bb[b1], b, b1, prevb); + + /* add artificial block, just to join accesses */ + b1 = n->num_bb++; + cpy_bb (&n->bb[b1], ob); + n->bb[b1].cnt = 0; + + for (i = 0; i < ob->ninsn - 1; i++) { + ii = &n->bb[b1].insn[i]; + if (ob->insn[i].opt[0] & OPT_DEST) { + change_insn_type (ii, II_CMOV); + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (prevart_b, i); ii->opt[1] = OPT_REF; + ii->op[2] = REF (b1 - 1, i); ii->opt[2] = OPT_REF; + + /* Take left one, if we should have finished the first iteration*/ + if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[3] = pb->insn[pb->ninsn - 1].op[1]; ii->opt[3] = pb->insn[pb->ninsn - 1].opt[1]; + } else { + assert (pb->insn[pb->ninsn - 1].type & IT_COND); + ii->op[3] = REF (prevart_b, pb->ninsn - 1); ii->opt[3] = OPT_REF; + } + ii->dep = NULL; + ii->type = 0; + } else { + change_insn_type (ii, II_NOP); + } + } + + /* Add sfor instruction at the end, prioritizing flags */ + ii = &n->bb[b1].insn[ob->ninsn - 1]; + change_insn_type (ii, II_SFOR); + ii->op[0] = FLAG_REG; ii->opt[0] = OPT_REGISTER | OPT_DEST; + if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[1] = pb->insn[pb->ninsn - 1].op[1]; + ii->opt[1] = pb->insn[pb->ninsn - 1].opt[1]; + } else { + ii->op[1] = REF (prevart_b, pb->ninsn - 1); + ii->opt[1] = OPT_REF; + } + if (n->bb[b1 - 1].insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].op[1]; + ii->opt[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].opt[1]; + } else { + ii->op[2] = REF (b1 - 1, pb->ninsn - 1); + ii->opt[2] = OPT_REF; + } + ii->opt[3] = OPT_NONE; + ii->type = IT_COND; + + /* Only one should be in loop, so we remove any INLOOP flags from duplicates */ + n->bb[b1].type &= ~(BB_END | BB_INLOOP); + n->bb[b1].prev[0] = prevart_b; + n->bb[b1].prev[1] = b1 - 1; + n->bb[b1].next[0] = ob->next[0] == b ? ob->next[1] : ob->next[0]; + n->bb[b1].next[1] = -1; + + prevb = b1 - 1; + prevart_b = b1; + } + if (ob->type & BB_END) { + n->bb[prevart_b].type |= BB_END; + n->bb[b].type &= ~BB_END; + } + + //print_cuc_bb (n, "unroll1"); + /* repair BB after loop, to point back to latest artificial BB */ + b1 = n->bb[prevart_b].next[0]; + if (b1 >= 0) { + if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = b1; + else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = b1; + else assert (0); + } + + /* Relink back to start of the loop */ + /* Set predecessor's successor */ + if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = b; + else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = b; + else assert (0); + + /* Set predecessor */ + if (n->bb[b].prev[0] == b) n->bb[b].prev[0] = prevb; + else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = prevb; + else assert (0); + + //print_cuc_bb (n, "unroll2"); + + /* Relocate backward references to current instance and forward references + to previous one */ + relocate_bb (&n->bb[b], b, b, prevb); + + /* Relocate all other blocks to point to latest prevart_b */ + for (i = 0; i < f->num_bb; i++) + if (i != b) relocate_bb (&n->bb[i], b, prevart_b, prevart_b); + + return n; +} + +/* Preroll loop b preroll times and return new function. Original + function is unmodified. */ +static cuc_func *preroll_loop (cuc_func *f, int b, int preroll) +{ + int b1, t, i, j, prevb, prevart_b; + cuc_func *n = dup_func (f); + cuc_bb *ob = &f->bb[b]; + cuc_insn *ii; + + assert (preroll > 1); + //printf ("preroll BB%i x %i (num_bb %i)\n", b, preroll, n->num_bb); + preroll--; + assert (n->num_bb + preroll * 2 < MAX_BB); + + prevb = b; + prevart_b = b; + /* Duplicate the BB */ + for (t = 0; t < preroll; t++) { + cuc_bb *pb = &n->bb[prevart_b]; + /* Add new block and set links */ + b1 = n->num_bb++; + cpy_bb (&n->bb[b1], ob); + /* Only one should be in loop, so we remove any INLOOP flags from duplicates */ + n->bb[b1].type &= ~(BB_END | BB_INLOOP); + + /* Set predecessor's successor */ + if (n->bb[prevb].next[0] == b) { + n->bb[prevb].next[0] = b1; + if (pb->next[0] < 0) pb->next[0] = b1 + 1; + else pb->next[1] = b1 + 1; + n->bb[b1].next[1] = b1 + 1; + } else if (n->bb[prevb].next[1] == b) { + if (pb->next[0] < 0) pb->next[0] = b1 + 1; + else pb->next[1] = b1 + 1; + n->bb[b1].next[0] = b1 + 1; + n->bb[prevb].next[1] = b1; + } else assert (0); + + /* Set predecessor */ + n->bb[b1].prev[0] = prevb; + n->bb[b1].prev[1] = -1; + + /* Relocate backward references to current instance and forward references + to previous one */ + relocate_bb (&n->bb[b1], b, b1, prevb); + + /* add artificial block, just to join accesses */ + b1 = n->num_bb++; + cpy_bb (&n->bb[b1], ob); + n->bb[b1].cnt = 0; + + for (i = 0; i < ob->ninsn - 1; i++) { + ii = &n->bb[b1].insn[i]; + if (ob->insn[i].opt[0] & OPT_DEST) { + change_insn_type (ii, II_CMOV); + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (prevart_b, i); ii->opt[1] = OPT_REF; + ii->op[2] = REF (b1 - 1, i); ii->opt[2] = OPT_REF; + + /* Take left one, if we should have finished the first iteration*/ + if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[3] = pb->insn[pb->ninsn - 1].op[1]; ii->opt[3] = pb->insn[pb->ninsn - 1].opt[1]; + } else { + assert (pb->insn[pb->ninsn - 1].type & IT_COND); + ii->op[3] = REF (prevart_b, pb->ninsn - 1); ii->opt[3] = OPT_REF; + } + ii->dep = NULL; + ii->type = 0; + } else { + change_insn_type (ii, II_NOP); + } + } + + /* Add sfor instruction at the end, prioritizing flags */ + ii = &n->bb[b1].insn[ob->ninsn - 1]; + change_insn_type (ii, II_SFOR); + ii->op[0] = FLAG_REG; ii->opt[0] = OPT_REGISTER | OPT_DEST; + if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[1] = pb->insn[pb->ninsn - 1].op[1]; + ii->opt[1] = pb->insn[pb->ninsn - 1].opt[1]; + } else { + ii->op[1] = REF (prevart_b, pb->ninsn - 1); + ii->opt[1] = OPT_REF; + } + if (n->bb[b1 - 1].insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].op[1]; + ii->opt[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].opt[1]; + } else { + ii->op[2] = REF (b1 - 1, pb->ninsn - 1); + ii->opt[2] = OPT_REF; + } + ii->opt[3] = OPT_NONE; + ii->type = IT_COND; + + /* Only one should be in loop, so we remove any INLOOP flags from duplicates */ + n->bb[b1].type &= ~(BB_END | BB_INLOOP); + n->bb[b1].prev[0] = prevart_b; + n->bb[b1].prev[1] = b1 - 1; + n->bb[b1].next[0] = ob->next[0] == b ? ob->next[1] : ob->next[0]; + n->bb[b1].next[1] = -1; + + prevb = b1 - 1; + prevart_b = b1; + } + if (ob->type & BB_END) { + n->bb[prevart_b].type |= BB_END; + n->bb[b].type &= ~BB_END; + } + + //print_cuc_bb (n, "preroll1"); + /* repair BB after loop, to point back to latest artificial BB */ + b1 = n->bb[prevart_b].next[0]; + if (b1 >= 0) { + if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = b1; + else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = b1; + else assert (0); + } + + /* Relink to itself */ + /* Set predecessor's successor */ + if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = prevb; + else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = prevb; + else assert (0); + n->bb[prevb].prev[1] = prevb; + + if (n->bb[b].prev[0] == b) { + n->bb[b].prev[0] = n->bb[b].prev[1]; + n->bb[b].prev[1] = -1; + } else if (n->bb[b].prev[1] == b) { + n->bb[b].prev[1] = -1; + } + + //print_cuc_bb (n, "preroll2"); + + /* Relocate backward references to current instance and forward references + to previous one */ + relocate_bb (&n->bb[b], b, b, prevb); + + /* Relocate all other blocks to point to latest prevart_b */ + for (i = 0; i < f->num_bb; i++) + if (i != b) relocate_bb (&n->bb[i], b, prevart_b, prevart_b); + + return n; +} + +/* Unroll loop b unroll times and return new function. Original + function is unmodified. */ +cuc_func *preunroll_loop (cuc_func *f, int b, int preroll, int unroll, char *bb_filename) +{ + int b1, i; + cuc_func *n, *t; + int *counts; + int *bb_reloc; + + if (preroll > 1) { + t = preroll_loop (f, b, preroll); + b1 = t->num_bb - 2; + if (unroll > 1) { + //print_cuc_bb (t, "preunroll1"); + n = unroll_loop (t, b1, unroll); + free_func (t); + } else n = t; + } else { + b1 = b; + if (unroll > 1) + n = unroll_loop (f, b1, unroll); + else n = dup_func (n); + } + + /* Assign new count to functions */ + assert (counts = (int *)malloc (sizeof (int) * (preroll - 1 + unroll))); + count_bb_seq (n, b, bb_filename, counts, preroll, unroll); + for (i = 0; i < preroll - 1 + unroll; i++) { + if (i == 0) b1 = b; + else b1 = f->num_bb + (i - 1) * 2; + n->bb[b1].cnt = counts[i]; + } + + //print_cuc_bb (n, "preunroll"); + free (counts); + return n; +}
trunk/or1ksim/cuc/bb.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/or1ksim/cuc/insn.c =================================================================== --- trunk/or1ksim/cuc/insn.c (nonexistent) +++ trunk/or1ksim/cuc/insn.c (revision 879) @@ -0,0 +1,80 @@ +/* insn.c -- OpenRISC Custom Unit Compiler, instruction support + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include "cuc.h" +#include "insn.h" + +/* Table of known instructions. Watch out for indexes I_*! */ +const cuc_known_insn known[II_LAST + 1] = { +{"add", 1, "assign \1 = \2 + \3;"}, +{"sub", 0, "assign \1 = \2 - \3;"}, +{"and", 1, "assign \1 = \2 & \3;"}, +{"or", 1, "assign \1 = \2 | \3;"}, +{"xor", 1, "assign \1 = \2 ^ \3;"}, +{"mul", 1, "assign \1 = \2 * \3;"}, + +{"srl", 0, "assign \1 = \2 >> \3;"}, +{"sll", 0, "assign \1 = \2 << \3;"}, +{"sra", 0, "assign \1 = ({32{\2[31]}} << (6'd32-{1'b0, \3}))\n\ + | \2 >> \3;"}, + +{"lb", 0, "always @(posedge clk or posedge rst)"}, +{"lh", 0, "always @(posedge clk or posedge rst)"}, +{"lw", 0, "always @(posedge clk or posedge rst)"}, +{"sb", 0, "/* mem8[\2] = \1 */"}, +{"sh", 0, "/* mem16[\2] = \1 */"}, +{"sw", 0, "/* mem32[\2] = \1 */"}, + +{"sfeq", 1, "assign \1 = \2 == \3;"}, +{"sfne", 1, "assign \1 = \2 != \3;"}, +{"sfle", 0, "assign \1 = \2 <= \3;"}, +{"sflt", 0, "assign \1 = \2 < \3;"}, +{"sfgt", 0, "assign \1 = \2 > \3;"}, +{"sfge", 0, "assign \1 = \2 >= \3;"}, +{"sfor", 1, "assign \1 = \2 || \3;"}, +{"bf", 0, ""}, + +{"lrbb", 0,"always @(posedge clk or posedge rst)"}, +{"cmov", 0,"assign \1 = \4 ? \2 : \3;"}, +{"reg", 0, "always @(posedge clk or posedge rst)"}, + +{"nop", 0, NULL}}; + +/* Find known instruction and attach them to insn */ +void change_insn_type (cuc_insn *i, int index) +{ + int j; + assert (index >= 0 && index <= II_LAST); + i->index = index; + if (i->index == II_NOP) { + for (j = 0; j < MAX_OPERANDS; j++) i->opt[j] = OPT_NONE; + i->type = 0; + i->dep = NULL; + } +} + +/* Returns instruction name */ +const char *cuc_insn_name (cuc_insn *ii) { + if (ii->index < 0 || ii->index > II_LAST) return "???"; + else return known[ii->index].name; +} Index: trunk/or1ksim/cuc/verilog.c =================================================================== --- trunk/or1ksim/cuc/verilog.c (nonexistent) +++ trunk/or1ksim/cuc/verilog.c (revision 879) @@ -0,0 +1,583 @@ +/* verilog.c -- OpenRISC Custom Unit Compiler, verilog generator + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include "cuc.h" +#include "insn.h" + +/* Find index of load/store */ +int find_ls_index (cuc_func *f, int ref) +{ + int c = 0; + int i; + int load = II_IS_LOAD (f->INSN(ref).index); + for (i = 0; i < f->nmsched; i++) { + if (f->msched[i] == ref) break; + if (load && (!(f->mtype[i] & MT_WRITE)) + || !load && (f->mtype[i] & MT_WRITE)) c++; + } + return c; +} + +/* Print out dependencies as verilog expression */ +void print_deps (FILE *fo, cuc_func *f, int b, dep_list *t, int registered) +{ + if (t) { + int first = 0; + while (t) { + assert (f->INSN(t->ref).type & IT_MEMORY); + fprintf (fo, "%s%c_end[%i]", first ? " && " : "", + II_IS_LOAD (f->INSN(t->ref).index) ? 'l' : 's', find_ls_index (f, t->ref)); + first = 1; + t = t->next; + } + } else { + if (registered) fprintf (fo, "bb_start_r[%i]", b); + else fprintf (fo, "bb_start[%i]", b); + } +} + +char *print_op_v (cuc_func *f, char *s, int ref, int j) +{ + unsigned long op = f->INSN(ref).op[j]; + unsigned long opt = f->INSN(ref).opt[j]; + switch (opt & ~OPT_DEST) { + case OPT_NONE: assert (0); break; + case OPT_CONST: sprintf (s, "32'h%x", op); break; + case OPT_REGISTER: + if (opt & OPT_DEST) sprintf (s, "t%x_%x", REF_BB(ref), REF_I(ref)); + else sprintf (s, "r%i_%c", op, opt & OPT_DEST ? 'o' : 'i'); + break; + case OPT_REF: sprintf (s, "t%x_%x", REF_BB(op), REF_I(op)); break; + } + return s; +} + +/* Prints out specified instruction */ +void print_insn_v (FILE *fo, cuc_func *f, int b, int i) +{ + cuc_insn *ii = &f->bb[b].insn[i]; + char *s = known[ii->index].rtl; + char tmp[200] = ""; + while (*s) { + if (*s <= MAX_OPERANDS) { + char t[30]; + sprintf (tmp, "%s%s", tmp, print_op_v (f, t, REF(b, i), *s - 1)); + } else if (*s == '\b') sprintf (tmp, "%s%i", b); + else sprintf (tmp, "%s%c", tmp, *s); + s++; + } + fprintf (fo, "%-40s /* %s */\n", tmp, ii->disasm); + if (ii->type & IT_MEMORY) { + int j, nls = find_ls_index (f, REF (b, i)); + if (II_IS_LOAD (ii->index)) { + int nm; + for (nm = 0; nm < f->nmsched; nm++) if (f->msched[nm] == REF (b, i)) break; + assert (nm < f->nmsched); + + fprintf (fo, " if (rst) t%x_%x <= #1 32'h0;\n", b, i); + fprintf (fo, " else if (l_end[%i]) t%x_%x <= #1 ", nls, b, i); + switch (f->mtype[nm] & (MT_WIDTH | MT_SIGNED)) { + case 1: fprintf (fo, "lwb_dat_i & 32'hff;\n"); + break; + case 2: fprintf (fo, "lwb_dat_i & 32'hffff;\n"); + break; + case 4 | MT_SIGNED: + case 4: fprintf (fo, "lwb_dat_i;\n"); + break; + case 1 | MT_SIGNED: + fprintf (fo, "{24{lwb_dat_i[7]}, lwb_dat_i[7:0]};\n"); + break; + case 2 | MT_SIGNED: + fprintf (fo, "{16{lwb_dat_i[15]}, lwb_dat_i[15:0]};\n"); + break; + default: assert (0); + } + } + } else if (ii->index == II_LRBB) { + fprintf (fo, " if (rst) t%x_%x <= #1 1'b0;\n", b, i); + assert (f->bb[b].prev[0] >= 0); + fprintf (fo, " else if (bb_start[%i]) t%x_%x <= #1 bb_stb[%i];\n", b, b, i, f->bb[b].prev[0]); + } else if (ii->index == II_REG) { + fprintf (fo, " if (rst) t%x_%x <= #1 32'h0;\n", b, i); + assert (ii->opt[1] == OPT_REF); + fprintf (fo, " else if ("); + if (f->bb[b].mdep) print_deps (fo, f, b, f->bb[b].mdep, 0); + else fprintf (fo, "bb_stb[%i]", b); + fprintf (fo, ") t%x_%x <= #1 t%x_%x;\n", b, i, + REF_BB (ii->op[1]), REF_I (ii->op[1])); + } +} + +/* Outputs binary number */ +char *bin_str (unsigned long x, int len) +{ + static char bx[33]; + char *s = bx; + while (len > 0) *s++ = '0' + ((x >> --len) & 1); + *s = '\0'; + return bx; +} + +/* Returns index of branch instruction inside a block b */ +int branch_index (cuc_bb *bb) +{ + int i; + for (i = bb->ninsn - 1; i >= 0; i--) + if (bb->insn[i].type & IT_BRANCH) return i; + return -1; +} + +/* Generates verilog file out of insn dataflow */ +void output_verilog (cuc_func *f, char *filename) +{ + FILE *fo; + int used_regs[MAX_REGS] = {0}; + int lur[MAX_REGS] = {0}; + int b, i, j; + int ci = 0, co = 0; + int nloads = 0, nstores = 0; + char tmp[256]; + cuc_bb *end_bb = NULL; + int end_bb_no = -1; + sprintf (tmp, "%s.v", filename); + + if ((fo = fopen (tmp, "wt+")) == NULL) { + fprintf (stderr, "Cannot open '%s'\n", tmp); + exit (1); + } + + /* Determine register usage */ + for (i = 0; i < MAX_REGS; i++) lur[i] = -1; + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b].insn[i].opt[j] & OPT_REGISTER && f->bb[b].insn[i].op[j] >= 0) + if (f->bb[b].insn[i].opt[j] & OPT_DEST) lur[f->bb[b].insn[i].op[j]] = REF (b, i); + else used_regs[f->bb[b].insn[i].op[j]] = 1; + if (f->bb[b].type & BB_END) end_bb = &f->bb[end_bb_no = b]; + } + assert (end_bb && end_bb->type & BB_END); + + /* output header */ + fprintf (fo, "/* %s -- generated by OpenRISC Custom Unit Compiler (c) OpenCores */\n", tmp); + fprintf (fo, "module %s (clk, rst,\n", filename); + fprintf (fo, " lwb_adr_o, lwb_dat_i, lwb_cycstb_o,\n"); + fprintf (fo, " lwb_sel_o, lwb_linbrst_o, lwb_ack_i,\n"); + fprintf (fo, " swb_adr_o, swb_dat_o, swb_cycstb_o,\n"); + fprintf (fo, " swb_sel_o, swb_linbrst_o, swb_ack_i,\n"); + + fprintf (fo, "/* inputs */ "); + for (i = 0; i < MAX_REGS; i++) + if (used_regs[i]) { + fprintf (fo, "r%i_i, ", i); + ci++; + } + if (!ci) fprintf (fo, "/* NONE */"); + + fprintf (fo, "\n/* outputs */ "); + for (i = 0; i < MAX_REGS; i++) + if (lur[i] >= 0 && !f->saved_regs[i]) { + fprintf (fo, "r%i_o, ", i); + co++; + } + + if (!co) fprintf (fo, "/* NONE */"); + fprintf (fo, "\n start_i, end_o);\n\n"); + + fprintf (fo, "input clk, rst;\n"); + fprintf (fo, "input start_i;\t/* Module starts when set to 1 */ \n"); + fprintf (fo, "output end_o;\t/* Set when module finishes, cleared upon start_i == 1 */\n\n"); + fprintf (fo, "/* Bus signals */\n"); + fprintf (fo, "output lwb_cycstb_o, swb_cycstb_o;\n"); + fprintf (fo, "input lwb_ack_i, swb_ack_i;\n"); + fprintf (fo, "output [3:0] lwb_sel_o, swb_sel_o;\n"); + fprintf (fo, "output [31:0] lwb_adr_o, swb_adr_o;\n"); + fprintf (fo, "output lwb_linbrst_o, swb_linbrst_o;\n"); + fprintf (fo, "input [31:0] lwb_dat_i;\n"); + fprintf (fo, "output [31:0] swb_dat_o;\n\n"); + + fprintf (fo, "reg lwb_cycstb_o, swb_cycstb_o;\n"); + fprintf (fo, "reg [31:0] lwb_adr_o, swb_adr_o;\n"); + fprintf (fo, "reg [3:0] lwb_sel_o, swb_sel_o;\n"); + fprintf (fo, "reg [31:0] swb_dat_o;\n"); + fprintf (fo, "reg lwb_linbrst_o, swb_linbrst_o;\n"); + + if (ci || co) fprintf (fo, "\n/* module ports */\n"); + if (ci) { + int first = 1; + fprintf (fo, "input [31:0]"); + for (i = 0; i < MAX_REGS; i++) + if (used_regs[i]) { + fprintf (fo, "%sr%i_i", first ? " " : ", ", i); + first = 0; + } + fprintf (fo, ";\n"); + } + + if (co) { + int first = 1; + fprintf (fo, "output [31:0]"); + for (i = 0; i < MAX_REGS; i++) + if (lur[i] >= 0 && !f->saved_regs[i]) { + fprintf (fo, "%sr%i_o", first ? " " : ", ", i); + first = 0; + } + fprintf (fo, ";\n"); + } + + /* Count loads & stores */ + for (i = 0; i < f->nmsched; i++) + if (f->mtype[i] & MT_WRITE) nstores++; + else nloads++; + + /* Output internal registers for loads */ + if (nloads) { + int first = 1; + fprintf (fo, "\n/* internal registers for loads */\n"); + for (i = 0; i < f->nmsched; i++) + if (!(f->mtype[i] & MT_WRITE)) { + fprintf (fo, "%st%x_%x", first ? "reg [31:0] " : ", ", + REF_BB(f->msched[i]), REF_I(f->msched[i])); + first = 0; + } + if (!first) fprintf (fo, ";\n"); + } + + fprintf (fo, "\n/* 'zero or one' hot state machines */\n"); + if (nloads) fprintf (fo, "reg [%i:0] l_stb; /* loads */\n", nloads - 1); + if (nstores) fprintf (fo, "reg [%i:0] s_stb; /* stores */\n", nstores - 1); + fprintf (fo, "reg [%i:0] bb_stb; /* basic blocks */\n", f->num_bb - 1); + + { + int first = 1; + int num = 0; + fprintf (fo, "\n/* basic block condition wires */\n"); + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_COND + && f->bb[b].insn[i].index != II_REG + && f->bb[b].insn[i].index != II_LRBB) { + fprintf (fo, "%st%x_%x", first ? "wire " : ", ", b, i); + if (num >= 10) { + fprintf (fo, ";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) fprintf (fo, ";\n"); + + fprintf (fo, "\n/* forward declaration of normal wires */\n"); + num = 0; + first = 1; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (!(f->bb[b].insn[i].type & (IT_COND | IT_BRANCH)) + && f->bb[b].insn[i].index != II_REG + && f->bb[b].insn[i].index != II_LRBB) { + /* Exclude loads */ + if (f->bb[b].insn[i].type & IT_MEMORY && II_IS_LOAD (f->bb[b].insn[i].index)) continue; + fprintf (fo, "%st%x_%x", first ? "wire [31:0] " : ", ", b, i); + if (num >= 10) { + fprintf (fo, ";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) fprintf (fo, ";\n"); + + fprintf (fo, "\n/* forward declaration registers */\n"); + num = 0; + first = 1; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].index == II_REG + && f->bb[b].insn[i].index != II_LRBB) { + fprintf (fo, "%st%x_%x", first ? "reg [31:0] " : ", ", b, i); + if (num >= 10) { + fprintf (fo, ";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) fprintf (fo, ";\n"); + + num = 0; + first = 1; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].index != II_REG + && f->bb[b].insn[i].index == II_LRBB) { + fprintf (fo, "%st%x_%x", first ? "reg " : ", ", b, i); + if (num >= 10) { + fprintf (fo, ";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) fprintf (fo, ";\n"); + } + + if (nloads || nstores) fprintf (fo, "\n/* dependencies */\n"); + if (nloads) fprintf (fo, "wire [%i:0] l_end = l_stb & {%i{lwb_ack_i}};\n", + nloads - 1, nloads); + if (nstores) fprintf (fo, "wire [%i:0] s_end = s_stb & {%i{swb_ack_i}};\n", + nstores - 1, nstores); + + fprintf (fo, "\n/* last dependency */\n"); + fprintf (fo, "wire end_o = bb_stb[%i]", end_bb_no); + if (end_bb->mdep) { + fprintf (fo, " && "); + print_deps (fo, f, end_bb_no, end_bb->mdep, 0); + } + /* Is there a loop right at end? */ + if (end_bb->next[0] >= 0) { + int bidx = branch_index (end_bb); + char t[30]; + print_op_v (f, t, REF (end_bb_no, bidx), 1); + fprintf (fo, " && !%s", t); + } + fprintf (fo, ";\n"); + + fprintf (fo, "\n/* Basic block triggers */\n"); + fprintf (fo, "wire [%2i:0] bb_start = {\n", f->num_bb - 1); + for (b = f->num_bb - 1; b >= 0; b--) { + fprintf (fo, " /* bb_start[%2i] */ ", b); + if (f->bb[b].prev[0] < 0) fprintf (fo, "start_i"); + else { + cuc_bb *prev = &f->bb[f->bb[b].prev[0]]; + int t; + if (prev->mdep) { + print_deps (fo, f, f->bb[b].prev[0], prev->mdep, 0); + fprintf (fo, " && "); + } + fprintf (fo, "bb_stb[%i]", f->bb[b].prev[0]); + if (prev->next[0] >= 0 && prev->next[1] >= 0) { + int bidx = branch_index (&f->bb[f->bb[b].prev[0]]); + assert (bidx >= 0); + fprintf (fo, " && "); + t = prev->next[0] == b; + fprintf (fo, "%st%x_%x", t ? "" : "!", f->bb[b].prev[0], bidx); + } + if (f->bb[b].prev[1] >= 0) { + prev = &f->bb[f->bb[b].prev[1]]; + fprintf (fo, "\n || "); + if (prev->mdep) { + print_deps (fo, f, f->bb[b].prev[1], prev->mdep, 0); + fprintf (fo, " && "); + } + fprintf (fo, "bb_stb[%i]", f->bb[b].prev[1]); + if (prev->next[0] >= 0 && prev->next[1] >= 0) { + int bidx = branch_index (&f->bb[f->bb[b].prev[1]]); + assert (bidx >= 0); + fprintf (fo, " && "); + t = prev->next[0] == b; + fprintf (fo, "%st%x_%x", t ? "" : "!", f->bb[b].prev[1], bidx); + } + } + } + if (b == 0) fprintf (fo, "};\n"); + else fprintf (fo, ",\n"); + } + + fprintf (fo, "\n/* Register the bb_start */\n"); + fprintf (fo, "reg [%2i:0] bb_start_r;\n\n", f->num_bb - 1); + fprintf (fo, "always @(posedge rst or posedge clk)\n"); + fprintf (fo, "begin\n"); + fprintf (fo, " if (rst || end_o) bb_start_r <= #1 %i'b0;\n", f->num_bb); + fprintf (fo, " else bb_start_r <= #1 bb_start;\n"); + fprintf (fo, "end\n"); + + fprintf (fo, "\n/* Logic */\n"); + /* output body */ + for (b = 0; b < f->num_bb; b++) { + fprintf (fo, "\t\t/* BB%i */\n", b); + for (i = 0; i < f->bb[b].ninsn; i++) + print_insn_v (fo, f, b, i); + fprintf (fo, "\n"); + } + + if (co) { + fprintf (fo, "\n/* Outputs */\n"); + for (i = 0; i < MAX_REGS; i++) + if (lur[i] >= 0 && !f->saved_regs[i]) + fprintf (fo, "assign r%i_o = t%x_%x;\n", i, REF_BB(lur[i]), + REF_I(lur[i])); + } + + if (nstores) { + int cur_store = 0; + fprintf (fo, "\n/* Memory stores */\n"); + fprintf (fo, "always @(posedge clk or posedge rst)\nbegin\n"); + fprintf (fo, " if (rst) swb_dat_o <= #1 32'h0;\n"); + for (i = 0; i < f->nmsched; i++) + if (f->mtype[i] & MT_WRITE) { + char t[30]; + fprintf (fo, " else if (s_stb[%i]) swb_dat_o <= #1 %s;\n", cur_store++, + print_op_v (f, t, f->msched[i], 0)); + //printf ("msched[%i] = %x (mtype %x) %x\n", i, f->msched[i], f->mtype[i], f->INSN(f->msched[i]).op[0]); + } + fprintf (fo, "end\n"); + } + + if (nloads) { + int cur_load = 0; + fprintf (fo, "\n/* Load state machine */\n"); + fprintf (fo, "always @(posedge clk or posedge rst)\n"); + fprintf (fo, "begin\n"); + fprintf (fo, " if (rst) begin\n"); + fprintf (fo, " l_stb <= #1 %i'h0;\n", nloads); + fprintf (fo, " lwb_cycstb_o <= #1 1'b0;\n"); + fprintf (fo, " lwb_sel_o[3:0] <= #1 4'b0000;\n"); + fprintf (fo, " lwb_linbrst_o <= #1 1'b0;\n"); + fprintf (fo, " lwb_adr_o <= #1 32'h0;\n"); + fprintf (fo, " end else begin\n"); + printf ("loads \n"); + for (i = 0; i < f->nmsched; i++) if (!(f->mtype[i] & MT_WRITE)) { + char t[30]; + dep_list *dep = f->INSN(f->msched[i]).dep; + printf ("msched[%i] = %x (mtype %x)\n", i, f->msched[i], f->mtype[i]); + assert (f->INSN(f->msched[i]).opt[1] & (OPT_REF | OPT_REGISTER)); + fprintf (fo, " if ("); + print_deps (fo, f, REF_BB(f->msched[i]), f->INSN(f->msched[i]).dep, 1); + fprintf (fo, ") begin\n"); + while (dep) { + assert (f->INSN(dep->ref).type & IT_MEMORY); + fprintf (fo, " %c_end[%i] <= #1 1'b0;\n", + II_IS_LOAD (f->INSN(dep->ref).index) ? 'l' : 's', find_ls_index (f, dep->ref)); + dep = dep->next; + } + fprintf (fo, " l_stb[%i] <= #1 1'b1;\n", cur_load++); + fprintf (fo, " lwb_cycstb_o <= #1 1'b1;\n"); + fprintf (fo, " lwb_sel_o[3:0] <= #1 4'b"); + switch (f->mtype[i] & MT_WIDTH) { + case 1: fprintf (fo, "0001 << (%s & 32h'3);\n", + print_op_v (f, t, f->msched[i], 1)); break; + case 2: fprintf (fo, "0011 << ((%s & 32h'1) << 1);\n", + print_op_v (f, t, f->msched[i], 1)); break; + case 4: fprintf (fo, "1111;\n"); break; + default: assert (0); + } + fprintf (fo, " lwb_linbrst_o <= #1 1'b%i;\n", + (f->mtype[i] & MT_BURST) && !(f->mtype[i] & MT_BURSTE) ? 1 : 0); + fprintf (fo, " lwb_adr_o <= #1 t%x_%x & ~32'h3;\n", + REF_BB(f->INSN(f->msched[i]).op[1]), REF_I(f->INSN(f->msched[i]).op[1])); + fprintf (fo, " end\n"); + } + fprintf (fo, " if (l_end[%i]) begin\n", nloads - 1); + fprintf (fo, " l_stb <= #1 %i'h0;\n", nloads); + fprintf (fo, " lwb_cycstb_o <= #1 1'b0;\n"); + fprintf (fo, " lwb_sel_o[3:0] <= #1 4'b0000;\n"); + fprintf (fo, " lwb_linbrst_o <= #1 1'b0;\n"); + fprintf (fo, " lwb_adr_o <= #1 32'h0;\n"); + fprintf (fo, " end\n"); + fprintf (fo, " end\n"); + fprintf (fo, "end\n"); + } + + if (nstores) { + int cur_store = 0; + fprintf (fo, "\n/* Store state machine */\n"); + fprintf (fo, "always @(posedge clk or posedge rst)\n"); + fprintf (fo, "begin\n"); + fprintf (fo, " if (rst) begin\n"); + fprintf (fo, " s_stb <= #1 %i'h0;\n", nstores); + fprintf (fo, " swb_cycstb_o <= #1 1'b0;\n"); + fprintf (fo, " swb_sel_o[3:0] <= #1 4'b0000;\n"); + fprintf (fo, " swb_linbrst_o <= #1 1'b0;\n"); + fprintf (fo, " swb_adr_o <= #1 32'h0;\n"); + fprintf (fo, " end else begin\n"); + printf ("stores \n"); + for (i = 0; i < f->nmsched; i++) if (f->mtype[i] & MT_WRITE) { + char t[30]; + dep_list *dep = f->INSN(f->msched[i]).dep; + printf ("msched[%i] = %x (mtype %x)\n", i, f->msched[i], f->mtype[i]); + assert (f->INSN(f->msched[i]).opt[1] & (OPT_REF | OPT_REGISTER)); + fprintf (fo, " if ("); + print_deps (fo, f, REF_BB(f->msched[i]), f->INSN(f->msched[i]).dep, 1); + fprintf (fo, ") begin\n"); + while (dep) { + assert (f->INSN(dep->ref).type & IT_MEMORY); + fprintf (fo, " %c_end[%i] <= #1 1'b0;\n", + II_IS_LOAD (f->INSN(dep->ref).index) ? 'l' : 's', find_ls_index (f, dep->ref)); + dep = dep->next; + } + fprintf (fo, " s_stb[%i] <= #1 1'b1;\n", cur_store++); + fprintf (fo, " swb_cycstb_o <= #1 1'b1;\n"); + fprintf (fo, " swb_sel_o[3:0] <= #1 4'b"); + switch (f->mtype[i] & MT_WIDTH) { + case 1: fprintf (fo, "0001 << (%i & 32h'3);\n", + print_op_v (f, t, f->msched[i], 1)); break; + case 2: fprintf (fo, "0011 << ((%i & 32h'1) << 1);\n", + print_op_v (f, t, f->msched[i], 1)); break; + case 4: fprintf (fo, "1111;\n"); break; + default: assert (0); + } + fprintf (fo, " swb_linbrst_o <= #1 1'b%i;\n", + (f->mtype[i] & MT_BURST) && !(f->mtype[i] & MT_BURSTE) ? 1 : 0); + fprintf (fo, " swb_adr_o <= #1 t%x_%x & ~32'h3;\n", + REF_BB(f->INSN(f->msched[i]).op[1]), REF_I(f->INSN(f->msched[i]).op[1])); + fprintf (fo, " end\n"); + } + fprintf (fo, " if (s_end[%i]) begin\n", nstores - 1); + fprintf (fo, " s_stb <= #1 %i'h0;\n", nstores); + fprintf (fo, " swb_cycstb_o <= #1 1'b0;\n"); + fprintf (fo, " swb_sel_o[3:0] <= #1 4'b0000;\n"); + fprintf (fo, " swb_linbrst_o <= #1 1'b0;\n"); + fprintf (fo, " swb_adr_o <= #1 32'h0;\n"); + fprintf (fo, " end\n"); + fprintf (fo, " end\n"); + fprintf (fo, "end\n"); + } + + fprintf (fo, "\n/* Basic blocks state machine */\n"); + fprintf (fo, "always @(posedge clk or posedge rst)\n"); + fprintf (fo, "begin\n"); + fprintf (fo, " if (rst || end_o) begin\n"); + fprintf (fo, " bb_stb <= #1 %i'h%x;\n", f->num_bb, 0); + for (i = 0; i < f->num_bb; i++) { + fprintf (fo, " end else if (bb_start[%i]) begin\n", i); + fprintf (fo, " bb_stb <= #1 %i'h%x;\n", f->num_bb, 1 << i); + } + fprintf (fo, " end else if (end_o) begin\n"); + fprintf (fo, " bb_stb <= #1 %i'h%x;\n", f->num_bb, 0); + fprintf (fo, " end\n"); + fprintf (fo, "end\n"); + + /* output footer */ + fprintf (fo, "\nendmodule\n"); + + fclose (fo); +} +
trunk/or1ksim/cuc/verilog.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/or1ksim/cuc/memory.c =================================================================== --- trunk/or1ksim/cuc/memory.c (nonexistent) +++ trunk/or1ksim/cuc/memory.c (revision 879) @@ -0,0 +1,263 @@ +/* memory.c -- OpenRISC Custom Unit Compiler, memory optimization and scheduling + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include "cuc.h" +#include "insn.h" + +/* Checks for memory conflicts between two instructions; returns 1 if detected + 0 - exact; 1 - strong; 2 - weak; 3 - none */ +static int check_memory_conflict (cuc_func *f, cuc_insn *a, cuc_insn *b, int otype) +{ + switch (otype) { + case 0: /* exact */ + case 1: /* strong */ + return 1; + case 2: /* weak */ + assert (a->type & IT_MEMORY); + assert (b->type & IT_MEMORY); + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + int aw, bw; + assert ((aw = II_MEM_WIDTH (a->index)) >= 0); + assert ((bw = II_MEM_WIDTH (b->index)) >= 0); + + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) return 1; + + /* Check if they overlap */ + if (a->op[2] >= b->op[2] && a->op[2] < b->op[2] + bw) return 1; + if (b->op[2] >= a->op[2] && b->op[2] < a->op[2] + aw) return 1; + return 0; + } else return 1; + case 3: /* none */ + return 0; + default: + assert (0); + } + return 1; +} + +/* Adds memory dependencies based on ordering type: + 0 - exact; 1 - strong; 2 - weak; 3 - none */ +void add_memory_dep (cuc_func *f, int otype) +{ + int b, i; + dep_list *all_mem = NULL; + + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (insn[i].type & IT_MEMORY) { + dep_list *tmp = all_mem; + while (tmp) { + //printf ("%x %x\n", REF (b,i), tmp->ref); + if (check_memory_conflict (f, &insn[i], &f->INSN(tmp->ref), otype)) + add_dep (&insn[i].dep, tmp->ref); + tmp = tmp->next; + } + add_dep (&all_mem, REF (b, i)); + } + } + dispose_list (&all_mem); +} + +/* returns nonzero if a < b */ +int mem_ordering_cmp (cuc_func *f, cuc_insn *a, cuc_insn *b) +{ + assert (a->type & IT_MEMORY); + assert (b->type & IT_MEMORY); + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) return 0; + + /* Order linearly, we can then join them to bursts */ + return a->op[2] < b->op[2]; + } else return 0; +} + +/* Schedule memory accesses + 0 - exact; 1 - strong; 2 - weak; 3 - none */ +void schedule_memory (cuc_func *f, int otype) +{ + int b, i, j; + f->nmsched = 0; + + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (insn[i].type & IT_MEMORY) { + f->msched[f->nmsched++] = REF (b, i); + if (otype == 2 || otype == 3) insn[i].type |= IT_FLAG1; /* mark unscheduled */ + } + } +#if 0 + for (i = 0; i < f->nmsched; i++) + printf ("[%i]%i%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' '); + printf ("\n"); +#endif + + /* We can reorder just more loose types + We assume, that memory accesses are currently in valid (but not neccesserly) + optimal order */ + if (otype == 2 || otype == 3) { + for (i = 0; i < f->nmsched; i++) { + int best = i; + int tmp; + for (j = i + 1; j < f->nmsched; j++) if (REF_BB(f->msched[j]) == REF_BB(f->msched[best])) { + if (mem_ordering_cmp (f, &f->INSN (f->msched[j]), &f->INSN(f->msched[best]))) { + /* Check dependencies */ + dep_list *t = f->INSN(f->msched[j]).dep; + while (t) { + if (f->INSN(t->ref).type & IT_FLAG1) break; + t = t->next; + } + if (!t) best = j; /* no conflicts -> ok */ + } + } + + /* we have to shift instructions up, to maintain valid dependencies + and make space for best candidate */ + + /* make local copy */ + tmp = f->msched[best]; + for (j = best; j > i; j--) f->msched[j] = f->msched[j - 1]; + f->msched[i] = tmp; + f->INSN(f->msched[i]).type &= ~IT_FLAG1; /* mark scheduled */ + } + } + +#if 0 + for (i = 0; i < f->nmsched; i++) + printf ("[%i]%i%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' '); + printf ("\n"); +#endif + + for (i = 0; i < f->nmsched; i++) { + cuc_insn *a = &f->INSN(f->msched[i]); + f->mtype[i] = !II_IS_LOAD(a->index) ? MT_WRITE : 0; + f->mtype[i] |= II_MEM_WIDTH (a->index); + if (a->type & IT_SIGNED) f->mtype[i] |= MT_SIGNED; + } + + if (enable_bursts) { + //printf ("\n"); + for (i = 1; i < f->nmsched; i++) { + cuc_insn *a = &f->INSN(f->msched[i - 1]); + cuc_insn *b = &f->INSN(f->msched[i]); + int aw = f->mtype[i - 1] & MT_WIDTH; + + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + /* Not in usual form? */ + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) continue; + + //printf ("%i %i, ", a->op[2], b->op[2]); + + /* Check if they touch together */ + if (a->op[2] + aw == b->op[2]) { + /* yes => do burst */ + f->mtype[i - 1] &= ~MT_BURSTE; + f->mtype[i - 1] |= MT_BURST; + f->mtype[i] |= MT_BURST | MT_BURSTE; + } + } + } + } + +#if 0 + printf ("\n"); + for (i = 0; i < f->nmsched; i++) + printf ("[%i]%i%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' '); + printf ("\n"); +#endif + + /* We don't need dependencies in non-memory instructions */ + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) if (!(insn[i].type & IT_MEMORY)) + dispose_list (&insn[i].dep); + } + + /* Reduce number of dependecies, keeping just direct dependencies, based on memory schedule */ + { + int lastl[2] = {-1, -1}, lasts[2] = {-1, -1}; + int last_load = -1, last_store = -1; + for (i = 0; i < f->nmsched; i++) { + int t = (f->mtype[i] & MT_WRITE) ? 1 : 0; + int maxl = lastl[t]; + int maxs = lasts[t]; + dep_list *tmp = f->INSN(f->msched[i]).dep; + while (tmp) { + if (f->INSN(tmp->ref).type & IT_MEMORY && REF_BB(tmp->ref) == REF_BB(f->msched[i])) { + /* Search for the reference */ + for (j = 0; j < f->nmsched; j++) if (f->msched[j] == tmp->ref) break; + assert (j < f->nmsched); + if (f->mtype[j] & MT_WRITE) { + if (maxs < j) maxs = j; + } else { + if (maxl < j) maxl = j; + } + } + tmp = tmp->next; + } + dispose_list (&f->INSN(f->msched[i]).dep); + if (f->mtype[i] & MT_WRITE) { + maxs = last_store; + last_store = i; + } else { + maxl = last_load; + last_load = i; + } + + if (maxl > lastl[t]) { + add_dep (&f->INSN(f->msched[i]).dep, f->msched[maxl]); + lastl[t] = maxl; + } + if (maxs > lasts[t]) { + add_dep (&f->INSN(f->msched[i]).dep, f->msched[maxs]); + lasts[t] = maxs; + } + //printf ("%i(%i)> ml %i(%i) ms %i(%i) lastl %i %i lasts %i %i last_load %i last_store %i\n", i, f->msched[i], maxl, f->msched[maxl], maxs, f->msched[maxs], lastl[0], lastl[1], lasts[0], lasts[1], last_load, last_store); + + /* What we have to wait to finish this BB? */ + if (i + 1 >= f->nmsched || REF_BB(f->msched[i + 1]) != REF_BB(f->msched[i])) { + if (last_load > lastl[t]) { + add_dep (&f->bb[REF_BB(f->msched[i])].mdep, f->msched[last_load]); + lastl[t] = last_load; + } + if (last_store > lasts[t]) { + add_dep (&f->bb[REF_BB(f->msched[i])].mdep, f->msched[last_store]); + lasts[t] = last_store; + } + } + } + } +}
trunk/or1ksim/cuc/memory.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/or1ksim/cuc/Makefile.am =================================================================== --- trunk/or1ksim/cuc/Makefile.am (nonexistent) +++ trunk/or1ksim/cuc/Makefile.am (revision 879) @@ -0,0 +1,25 @@ +# Makefile -- Makefile for cpu architecture independent simulation +# Copyright (C) 2002 Marko Mlinar, markom@opencores.org +# +# This file is part of OpenRISC 1000 Architectural Simulator. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + +noinst_LIBRARIES = libcuc.a + +libcuc_a_SOURCES = cuc.c cuc.h load.c bb.c memory.c \ + verilog.c timings.c insn.c insn.h + Index: trunk/or1ksim/cuc/timings.c =================================================================== --- trunk/or1ksim/cuc/timings.c (nonexistent) +++ trunk/or1ksim/cuc/timings.c (revision 879) @@ -0,0 +1,273 @@ +/* timings.c -- OpenRISC Custom Unit Compiler, timing and size estimation + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include +#include "cuc.h" +#include "insn.h" + +/* average memory delays in cycles {read single, read burst, write single, write burst} */ +static const int mdelay[4] = {4, 1, 3, 1}; + +double cycle_duration; +double max_bb_delay; + +static cuc_timing_table *timing_table; + +static double insn_time (cuc_insn *ii) +{ + if (ii->opt[2] & OPT_CONST) + return timing_table[ii->index].delayi; + else return timing_table[ii->index].delay; +} + +/* Returns dataflow tree height in cycles */ +static double max_delay (cuc_func *f, int b) +{ + double max_d = 0.; + double *d; + cuc_bb *bb = &f->bb[b]; + int i, j; + d = (double *) malloc (sizeof (double) * bb->ninsn); + for (i = 0; i < bb->ninsn; i++) { + double md = 0.; + for (j = 0; j < MAX_OPERANDS; j++) { + int op = bb->insn[i].op[j]; + if (bb->insn[i].opt[j] & OPT_REF && op >= 0 && REF_BB (op) == b && REF_I (op) < i) { + double t = d[REF_I (op)]; + if (t > md) md = t; + } + } + d[i] = md + insn_time (&bb->insn[i]); + if (d[i] > max_d) max_d = d[i]; + } + free (d); + //printf ("max_d%i=%f\n", b, max_d); + return max_d; +} + +/* Calculates memory delay of a single run of a basic block */ +static int memory_delay (cuc_func *f, int b) +{ + int i; + int d = 0; + for (i = 0; i < f->nmsched; i++) + if (REF_BB (f->msched[i]) == b) { + if (f->mtype[i] & MT_WRITE) { + if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += mdelay[2]; + else d += mdelay[3]; + } else { + if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += mdelay[0]; + else d += mdelay[1]; + } + } + //printf ("md%i=%i\n", b, d); + return d; +} + +/* Cuts the tree and marks registers */ +void cut_tree (cuc_func *f, int b, double sd) +{ + int i, j; + double *depths; + cuc_bb *bb = &f->bb[b]; + depths = (double *) malloc (sizeof (double) * bb->ninsn); + + for (i = 0; i < bb->ninsn; i++) { + double md = 0.; + int mg = 0; + for (j = 0; j < MAX_OPERANDS; j++) { + int op = bb->insn[i].op[j]; + if (bb->insn[i].opt[j] & OPT_REF && op >= 0 && REF_BB (op) == b && REF_I (op) < i) { + double t = depths[REF_I (op)]; + if (f->INSN(op).type & IT_CUT) { + if (f->INSN(op).tmp + 1 >= mg) { + if (f->INSN(op).tmp + 1 > mg) md = 0.; + mg = f->INSN(op).tmp + 1; + if (t > md) md = t; + } + } else { + if (f->INSN(op).tmp >= mg) { + if (f->INSN(op).tmp > mg) md = 0.; + mg = f->INSN(op).tmp; + if (t > md) md = t; + } + } + } + } + //printf ("%2x md%.1f ", i, md); + md += insn_time (&bb->insn[i]); + //printf ("md%.1f mg%i %.1f\n", md, mg, sd); + bb->insn[i].tmp = mg; + if (md > sd) { + bb->insn[i].type |= IT_CUT; + if (md > cycle_duration) + log ("WARNING: operation t%x_%x may need to be registered inbetween\n", b, i); + depths[i] = 0.; + } else depths[i] = md; + } + free (depths); +} + +/* How many cycles we need now to get through the BB */ +static int new_bb_cycles (cuc_func *f, int b, int cut) +{ + long d; + double x = max_delay (f, b); + d = ceil (x / cycle_duration); + if (d < 1) d = 1; + if (cut && x > cycle_duration) cut_tree (f, b, x / d); + + if (x / d > max_bb_delay) max_bb_delay = x / d; + return memory_delay (f, b) + d; +} + +/* Cuts the tree and marks registers */ +void mark_cut (cuc_func *f) +{ + int b, i; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + f->bb[b].insn[i].tmp = 0; /* Set starting groups */ + if (no_multicycle) + for (b = 0; b < f->num_bb; b++) + new_bb_cycles (f, b, 1); +} + +/* Returns basic block circuit area */ +static double bb_size (cuc_bb *bb) +{ + int i; + double d = 0.; + for (i = 0; i < bb->ninsn; i++) { + if (bb->insn[i].opt[2] & OPT_CONST) + d = d + timing_table[bb->insn[i].index].sizei; + else d = d + timing_table[bb->insn[i].index].size; + } + return d; +} + +/* Recalculates bb[].cnt values, based on generated profile file */ +void recalc_cnts (cuc_func *f, char *bb_filename) +{ + int i, r, b, prevbb = -1, prevcnt = 0; + int buf[256]; + const int bufsize = 256; + FILE *fi = fopen (bb_filename, "rb"); + + assert (fi); + + /* initialize counts */ + for (b = 0; b < f->num_bb; b++) f->bb[b].cnt = 0; + + /* read control flow from file and set counts */ + do { + r = fread (buf, sizeof (int), bufsize, fi); + for (i = 0; i < r; i++) { + b = f->init_bb_reloc[buf[i]]; + if (b < 0) continue; + /* Were we in the loop? */ + if (b == prevbb) { + prevcnt++; + } else { + /* End the block */ + if (prevbb >= 0) f->bb[prevbb].cnt += prevcnt / f->bb[prevbb].unrolled + 1; + prevcnt = 0; + prevbb = b; + } + } + } while (r == bufsize); + + fclose (fi); +} + +/* Analizes current version of design and places results into timings structure */ +void analyse_timings (cuc_func *f, cuc_timings *timings) +{ + long new_time = 0; + double size = 0.; + int b; + + max_bb_delay = 0.; + for (b = 0; b < f->num_bb; b++) { + new_time += new_bb_cycles (f, b, 0) * f->bb[b].cnt; + size = size + bb_size (&f->bb[b]); + } + timings->new_time = new_time; + timings->size = size; + log ("Max circuit delay %.2fns; max circuit clock speed %.1fMHz\n", + max_bb_delay, 1000. / max_bb_delay); +} + +/* Loads in the specified timings table */ +void load_timing_table (char *filename) +{ + int i; + FILE *fi; + + log ("Loading timings from %s\n", filename); + log ("Using clock delay %.2fns (frequency %.0fMHz)\n", cycle_duration, 1000. / cycle_duration); + assert (fi = fopen (filename, "rt")); + + timing_table = (cuc_timing_table *)malloc ((II_LAST + 1) * sizeof (cuc_timing_table)); + assert (timing_table); + for (i = 0; i <= II_LAST; i++) { + timing_table[i].size = -1.; + timing_table[i].sizei = -1.; + timing_table[i].delay = -1.; + timing_table[i].delayi = -1.; + } + + while (!feof(fi)) { + char tmp[256]; + int index; + double a[4]; + char c; + if (fscanf (fi, "%s", tmp) != 1) break; + if (tmp[0] == '#') { + while (!feof (fi) && fgetc (fi) != '\n'); + continue; + } + for (i = 0; i <= II_LAST; i++) + if (strcmp (known[i].name, tmp) == 0) { + index = i; + break; + } + assert (index <= II_LAST); + i = index; + if (fscanf (fi, "%lf%lf%lf%lf\n", &timing_table[i].size, + &timing_table[i].sizei, &timing_table[i].delay, &timing_table[i].delayi) != 4) break; + /*printf ("!%s size %f,%f delay %f,%f\n", known[i].name, timing_table[i].size, + timing_table[i].sizei, timing_table[i].delay, timing_table[i].delayi);*/ + } + + /* Was everything initialized? */ + for (i = 0; i <= II_LAST; i++) { + assert (timing_table[i].size >= 0 && timing_table[i].sizei >= 0 + && timing_table[i].delay >= 0 && timing_table[i].delayi >= 0); + /*printf ("%s size %f,%f delay %f,%f\n", known[i], timing_table[i].size, + timing_table[i].sizei, timing_table[i].delay, timing_table[i].delayi);*/ + } + + fclose (fi); +} +
trunk/or1ksim/cuc/timings.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/or1ksim/cuc/cuc.c =================================================================== --- trunk/or1ksim/cuc/cuc.c (nonexistent) +++ trunk/or1ksim/cuc/cuc.c (revision 879) @@ -0,0 +1,828 @@ +/* cuc.c -- OpenRISC Custom Unit Compiler + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Main file, including code optimization and command prompt */ + +#include +#include +#include +#include +#include "sim-config.h" +#include "cuc.h" +#include "insn.h" +#include "profiler.h" + +FILE *flog; + +/* Last used registers by software convention */ +const int call_saved[MAX_REGS] = { + 0, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 1, 1}; + +/* Prints out instructions */ +void print_insns (cuc_insn *insn, int ninsn, int verbose) +{ + int i, j; + for (i = 0; i < ninsn; i++) { + dep_list *l = insn[i].dep; + printf ("%4x%c %-4s ", i, insn[i].index >= 0 ? ':' : '?', cuc_insn_name (&insn[i])); + if (verbose) { + printf ("%-20s insn = %08x, index = %i, type = %04x ", + insn[i].disasm, insn[i].insn, insn[i].index, insn[i].type); + } else printf ("type = %04x ", insn[i].type); + for (j = 0; j < MAX_OPERANDS; j++) { + if (insn[i].opt[j] & OPT_DEST) printf ("*"); + switch (insn[i].opt[j] & ~OPT_DEST) { + case OPT_NONE: break; + case OPT_CONST: printf ("0x%08x, ", insn[i].op[j]); break; + case OPT_JUMP: printf ("J%x ", insn[i].op[j]); break; + case OPT_REGISTER: printf ("r%i, ", insn[i].op[j]); break; + case OPT_REF: printf ("[%x.%x], ", REF_BB(insn[i].op[j]), REF_I(insn[i].op[j])); break; + case OPT_BB: printf ("BB%x, ", insn[i].op[j]); break; + case OPT_LRBB: printf ("LRBB, "); break; + default: + fprintf (stderr, "Invalid operand type %s(%x.%x) = %x\n", + cuc_insn_name (&insn[i]), i, j, insn[i].opt[j]); + assert (0); + } + } + if (l) { + printf ("\n\tdep:"); + while (l) { + printf (" [%x.%x],", REF_BB (l->ref), REF_I (l->ref)); + l = l->next; + } + } + printf ("\n"); + } +} + +void add_dep (dep_list **list, int dep) +{ + dep_list *ndep; + dep_list **tmp = list; + + while (*tmp) { + if ((*tmp)->ref == dep) return; /* already there */ + tmp = &((*tmp)->next); + } + ndep = (dep_list *)malloc (sizeof (dep_list)); + ndep->ref = dep; + ndep->next = NULL; + *tmp = ndep; +} + +void dispose_list (dep_list **list) +{ + while (*list) { + dep_list *tmp = *list; + *list = tmp->next; + free (tmp); + } +} + +void add_data_dep (cuc_func *f) +{ + int b, i, j; + dep_list *tmp; + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) { + fflush (stdout); + if (insn[i].opt[j] & OPT_REF) { + /* Copy list from predecessor */ + dep_list *l = f->INSN(insn[i].op[j]).dep; + while (l) { + add_dep (&insn[i].dep, l->ref); + l = l->next; + } + /* add predecessor */ + add_dep (&insn[i].dep, insn[i].op[j]); + } + } + } +} + +/* returns nonzero, if instruction was simplified */ +int apply_edge_condition (cuc_insn *ii) +{ + unsigned int c = ii->op[2]; + + if (ii->index == II_AND) { + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } + } else if (ii->index == II_OR) { + if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } + } else if (ii->index == II_SUB) { + if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } + } else if (ii->index == II_MUL) { + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else + if (ii->opt[2] & OPT_CONST && c == 1) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else + if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { + change_insn_type (ii, II_SUB); + ii->op[2] = ii->op[1]; ii->opt[2] = ii->opt[1]; + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + return 1; + } + } else if (ii->index == II_SRL) { + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] & OPT_CONST && c >= 32) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } + } else if (ii->index == II_SLL) { + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] & OPT_CONST && c >= 32) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } + } else if (ii->index == II_SRA) { + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } + } else if (ii->index == II_CMOV) { + if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + return 1; + } + } + return 0; +} + +/* Optimizes dataflow tree */ +void optimize_tree (cuc_func *f) +{ + int b, i, j; + int modified; + + do { + modified = 0; + for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) { + for (i = 0; i < f->bb[b].ninsn; i++) { + cuc_insn *ii = &f->bb[b].insn[i]; + /* We tend to have the third parameter const if instruction is cumutative */ + if ((ii->opt[1] & OPT_CONST) && !(ii->opt[2] & OPT_CONST) + && known[ii->index].comutative) { + unsigned long t = ii->opt[1]; + ii->opt[1] = ii->opt[2]; + ii->opt[2] = t; + t = ii->op[1]; + ii->op[1] = ii->op[2]; + ii->op[2] = t; + modified = 1; debug (2, "%08x:<>\n", REF(b, i)); + } + + /* Try to do the promotion */ + /* We have two consecutive expressions, containing constants, + * if previous is a simple expression we can handle it simply: */ + for (j = 0; j < MAX_OPERANDS; j++) + if (ii->opt[j] & OPT_REF) { + cuc_insn *t = &f->INSN(ii->op[j]); + if (f->INSN(ii->op[j]).index == II_ADD + && f->INSN(ii->op[j]).opt[2] & OPT_CONST + && f->INSN(ii->op[j]).op[2] == 0 + && !(ii->type & IT_MEMORY && t->type & IT_MEMADD) + && !(ii->type & IT_BRANCH) && !(t->type & IT_COND)) { + /* do not promote through add-mem, and branches */ + modified = 1; debug (2, "%8x:promote%i %8x %8x\n", REF (b, i), j, ii->op[j], t->op[1]); + ii->op[j] = t->op[1]; ii->opt[j] = t->opt[1]; + } + } + + /* In case of x = cmov x, y; or x = cmov y, x; we have + asynchroneous loop -> remove it */ + if (ii->index == II_CMOV) { + int f = 0; + if ((ii->opt[1] & OPT_REF) && ii->op[1] == REF (b, i)) f = 1; + if ((ii->opt[2] & OPT_REF) && ii->op[2] == REF (b, i)) f = 2; + if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) f = 2; + if (f) { + change_insn_type (ii, II_ADD); + debug (2, "%8x:cmov %i\n", REF(b, i), f); + ii->opt[f] = OPT_CONST; + ii->op[f] = 0; + ii->opt[3] = OPT_NONE; + modified = 1; + continue; + } + } + + /* Do nothing to volatile instructions */ + if (ii->type & IT_VOLATILE) continue; + + /* Check whether we can simplify the instruction */ + if (apply_edge_condition (ii)) { + modified = 1; + continue; + } + /* We cannot do anything more if at least one is not constant */ + if (!(ii->opt[2] & OPT_CONST)) continue; + + if (ii->opt[1] & OPT_CONST) { /* We have constant expression */ + unsigned long value; + int ok = 1; + /* Was constant expression already? */ + if (ii->index == II_ADD && !ii->op[2]) continue; + + if (ii->index == II_ADD) value = ii->op[1] + ii->op[2]; + else if (ii->index == II_SUB) value = ii->op[1] - ii->op[2]; + else if (ii->index == II_SLL) value = ii->op[1] << ii->op[2]; + else if (ii->index == II_SRL) value = ii->op[1] >> ii->op[2]; + else if (ii->index == II_MUL) value = ii->op[1] * ii->op[2]; + else if (ii->index == II_OR) value = ii->op[1] | ii->op[2]; + else if (ii->index == II_XOR) value = ii->op[1] ^ ii->op[2]; + else if (ii->index == II_AND) value = ii->op[1] & ii->op[2]; + else ok = 0; + if (ok) { + change_insn_type (ii, II_ADD); + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = value; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + modified = 1; debug (2, "%8x:const\n", REF (b, i)); + } + } else if (ii->opt[1] & OPT_REF) { + cuc_insn *prev = &f->INSN(ii->op[1]); + /* Is this just a link? */ + if (ii->index == II_ADD + && !(ii->type & IT_MEMADD) && ii->op[2] == 0) { + int b1, i1, j1; + debug (2, "%8x:link %8x: ", REF(b, i), ii->op[1]); + for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD)) + for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) + for (j1 = 0; j1 < MAX_OPERANDS; j1++) + if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF) + && f->bb[b1].insn[i1].op[j1] == REF(b, i)) { + debug (2, "%x ", REF (b1, i1)); + f->bb[b1].insn[i1].op[j1] = ii->op[1]; + } + debug (2, "\n"); + change_insn_type (ii, II_NOP); + } else if (prev->opt[2] & OPT_CONST) { + /* Handle some common cases */ + /* add - add joining */ + if (ii->index == II_ADD && prev->index == II_ADD) { + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] += prev->op[2]; + modified = 1; debug (2, "%8x: add-add\n", REF(b, i)); + } else /* add - sub joining */ + if (ii->index == II_ADD && prev->index == II_SUB) { + change_insn_type (&insn[i], II_SUB); + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] += prev->op[2]; + modified = 1; debug (2, "%8x: add-sub\n", REF(b, i)); + } else /* sub - add joining */ + if (ii->index == II_SUB && prev->index == II_ADD) { + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] += prev->op[2]; + modified = 1; debug (2, "%8x: sub-add\n", REF(b, i)); + } + } + } + } + } + } while (modified); +} + +/* Remove nop instructions */ +void remove_nops (cuc_func *f) +{ + int b; + for (b = 0; b < f->num_bb; b++) { + int c, d = 0, i, j; + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (insn[i].index != II_NOP) { + reloc [i] = d; + insn[d++] = insn[i]; + } else { + reloc[i] = d; /* For jumps only */ + } + f->bb[b].ninsn = d; + + /* Relocate references from all basic blocks */ + for (c = 0; c < f->num_bb; c++) + for (i = 0; i < f->bb[c].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if ((f->bb[c].insn[i].opt[j] & OPT_REF) + && REF_BB(f->bb[c].insn[i].op[j]) == b) + f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]); + } +} + +/* Remove unused assignments */ +void remove_dead (cuc_func *f) +{ + int b, i, j; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (!(f->bb[b].insn[i].type & (IT_VOLATILE | IT_OUTPUT))) + f->bb[b].insn[i].type |= IT_UNUSED; + + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b].insn[i].opt[j] & OPT_REF) { + f->INSN(f->bb[b].insn[i].op[j]).type &= ~IT_UNUSED; + } + } + + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_UNUSED) { + change_insn_type (&f->bb[b].insn[i], II_NOP); + } + + remove_nops (f); +} + +/* Removes trivial register assignments */ +void remove_trivial_regs (cuc_func *f) +{ + int b, i; + for (i = 0; i < MAX_REGS; i++) f->saved_regs[i] = call_saved[i]; + + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) { + if (insn[i].index == II_ADD + && insn[i].opt[0] & OPT_REGISTER + && insn[i].opt[1] & OPT_REGISTER && insn[i].op[0] == insn[i].op[1] + && insn[i].opt[2] & OPT_CONST && insn[i].op[2] == 0) { + if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1; + change_insn_type (&insn[i], II_NOP); + } + } + } + if (DEBUG > 2) { + printf ("saved regs "); + for (i = 0; i < MAX_REGS; i++) printf ("%i:%i ", i, f->saved_regs[i]); + printf ("\n"); + } + remove_nops (f); +} + +/* relocate all accesses inside of BB b to back/fwd */ +static void relocate_bb (cuc_bb *bb, int b, int back, int fwd) +{ + int i, j; + for (i = 0; i < bb->ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (bb->insn[i].opt[j] & OPT_REF + && REF_BB (bb->insn[i].op[j]) == b) { + int t = REF_I (bb->insn[i].op[j]); + if (t < i) bb->insn[i].op[j] = REF (back, t); + else bb->insn[i].op[j] = REF (fwd, t); + } +} + +/* split the BB, based on the group numbers in .tmp */ +void expand_bb (cuc_func *f, int b) +{ + int n = f->num_bb; + int mg = 0; + int b1, i, j; + + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp; + + /* Create copies */ + for (b1 = 1; b1 <= mg; b1++) { + assert (f->num_bb < MAX_BB); + cpy_bb (&f->bb[f->num_bb], &f->bb[b]); + f->num_bb++; + } + + /* Relocate */ + for (b1 = 0; b1 < f->num_bb; b1++) + for (i = 0; i < f->bb[b1].ninsn; i++) { + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b1].insn[i].opt[j] & OPT_REF) { + int t = f->bb[b1].insn[i].op[j]; + if (REF_BB(t) == b && f->INSN(t).tmp != 0) + f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t)); + } + } + + /* Delete unused instructions */ + for (j = 0; j <= mg; j++) { + if (j == 0) b1 = b; + else b1 = n + j - 1; + for (i = 0; i < f->bb[b1].ninsn; i++) { + if (f->bb[b1].insn[i].tmp != j) + change_insn_type (&f->bb[b1].insn[i], II_NOP); + f->bb[b1].insn[i].tmp = 0; + } + if (j < mg) { + f->bb[b1].next[0] = n + j; + f->bb[b1].next[1] = -1; + f->bb[n + j].prev[0] = b1; + f->bb[n + j].prev[1] = -1; + } else { + i = f->bb[b1].next[0]; + f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1; + f->bb[n + j].prev[1] = -1; + if (i >= 0) { + if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; + if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; + } + i = f->bb[b1].next[1]; + if (i >= 0) { + if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; + if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; + } + } + } +} + +/* Latch outputs in loops */ +void add_latches (cuc_func *f) +{ + int b, i, j; + + //print_cuc_bb (f, "ADD_LATCHES a"); + /* Cuts the tree and marks registers */ + mark_cut (f); + + /* Split BBs with more than one group */ + for (b = 0; b < f->num_bb; b++) expand_bb (f, b); + remove_nops (f); + //print_cuc_bb (f, "ADD_LATCHES 0"); + + /* Convert accesses in BB_INLOOP type block to latched */ + for (b = 0; b < f->num_bb; b++) { + int j; + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] == OPT_REF) { + int t = f->bb[b].insn[i].op[j]; + /* If we are pointing to a INLOOP block from outside, or forward + (= previous loop iteration) we must register that data */ + if ((f->bb[REF_BB(t)].type & BB_INLOOP || no_multicycle) + && !(f->INSN(t).type & (IT_BRANCH | IT_COND)) + && (REF_BB(t) != b || REF_I(t) >= i)) { + f->INSN(t).type |= IT_LATCHED; + } + } + } + //print_cuc_bb (f, "ADD_LATCHES 1"); + + /* Add latches at the end of blocks as needed */ + for (b = 0; b < f->num_bb; b++) { + int nreg = 0; + cuc_insn *insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_LATCHED) nreg++; + if (nreg) { + insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (f->bb[b].ninsn + nreg)); + j = 0; + for (i = 0; i < f->bb[b].ninsn; i++) { + insn[i] = f->bb[b].insn[i]; + if (insn[i].type & IT_LATCHED) { + cuc_insn *ii = &insn[f->bb[b].ninsn + j++]; + change_insn_type (ii, II_REG); + ii->op[0] = -1; ii->opt[0] = OPT_DEST | OPT_REGISTER; + ii->op[1] = REF (b, i); ii->opt[1] = OPT_REF; + ii->opt[2] = ii->opt[3] = OPT_NONE; + ii->dep = NULL; + ii->type = IT_VOLATILE; + sprintf (ii->disasm, "reg %i_%i", b, i); + } + } + f->bb[b].ninsn += nreg; + free (f->bb[b].insn); + f->bb[b].insn = insn; + } + } + //print_cuc_bb (f, "ADD_LATCHES 2"); + + /* Repair references */ + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + /* If destination instruction is latched, use register instead */ + if (f->bb[b].insn[i].opt[j] == OPT_REF + && f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) { + int b1, i1; + b1 = REF_BB (f->bb[b].insn[i].op[j]); + //debug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]); + if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) { + for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) { + assert (f->bb[b1].insn[i1].index == II_REG); + if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) { + f->bb[b].insn[i].op[j] = REF (b1, i1); + break; + } + } + } + } +} + +cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j) +{ + cuc_func *func; + debug (2, "BB%i unroll %i times preroll %i times\n", b, j, i); + func = preunroll_loop (f, b, i, j, bb_filename); + if (DEBUG > 2) print_cuc_bb (func, "AFTER_PREUNROLL"); + + log ("Optimizing.\n"); + optimize_tree (func); + if (DEBUG > 6) //print_cuc_bb (func, "AFTER_OPT_TREE1"); + remove_nops (func); + if (DEBUG > 6) //print_cuc_bb (func, "NO_NOPS"); + remove_dead (func); + if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD1"); + optimize_bb (func); + if (DEBUG > 5) print_cuc_bb (func, "AFTER_OPT_BB"); + remove_dead_bb (func); + if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD_BB"); + optimize_tree (func); + if (DEBUG > 3) print_cuc_bb (func, "AFTER_OPT_TREE"); + remove_dead (func); + if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD"); + remove_trivial_regs (func); + if (DEBUG > 2) print_cuc_bb (func, "AFTER_TRIVIAL"); + add_memory_dep (func, memory_order); + if (DEBUG > 7) print_cuc_bb (func, "AFTER_MEMORY_DEP"); + add_data_dep (func); + if (DEBUG > 8) print_cuc_bb (func, "AFTER_DATA_DEP"); + schedule_memory (func, memory_order); + if (DEBUG > 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM"); + add_latches (func); + if (DEBUG > 0) print_cuc_bb (func, "AFTER_LATCHES"); + + analyse_timings (func, timings); + debug (2, "new_time = %i, old_time = %i, size = %f\n", + timings->new_time, func->orig_time, timings->size); + log ("new time = %icyc, old_time = %icyc, size = %.0f gates\n", + timings->new_time, func->orig_time, timings->size); + //output_verilog (func, argv[1]); + free_func (func); + timings->b = b; + timings->unroll = j; + timings->preroll = i; + return timings; +} + +int tim_comp (cuc_timings *a, cuc_timings *b) +{ + if (a->new_time < b->new_time) return -1; + else if (a->new_time > b->new_time) return 1; + else return 0; +} + +cuc_func *analyse_function (char *module_name, long orig_time, + unsigned long start_addr, unsigned long end_addr) +{ + cuc_timings timings; + cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func)); + cuc_func *saved; + int b, i, j; + char tmp1[256]; + char tmp2[256]; + + func->orig_time = orig_time; + func->start_addr = start_addr; + func->end_addr = end_addr; + + sprintf (tmp1, "%s.bin", module_name); + cuc_load (tmp1); + + log ("Detecting basic blocks\n"); + detect_bb (func); + if (DEBUG > 2) print_cuc_insns ("WITH_BB_LIMITS", 0); + + //sprintf (tmp1, "%s.bin.mp", module_name); + sprintf (tmp2, "%s.bin.bb", module_name); + generate_bb_seq (func, config.sim.mprof_fn, tmp2); + + build_bb (func); + if (DEBUG > 5) print_cuc_bb (func, "AFTER_BUILD_BB"); + reg_dep (func); + + log ("Detecting dependencies\n"); + if (DEBUG > 2) print_cuc_bb (func, "AFTER_REG_DEP"); + optimize_tree (func); + log ("Optimizing.\n"); + if (DEBUG > 2) print_cuc_bb (func, "AFTER_OPT_TREE1"); + remove_nops (func); + if (DEBUG > 6) print_cuc_bb (func, "NO_NOPS"); + remove_dead (func); + if (DEBUG > 6) print_cuc_bb (func, "AFTER_DEAD1"); + optimize_bb (func); + if (DEBUG > 6) print_cuc_bb (func, "AFTER_OPT_BB"); + remove_dead_bb (func); + if (DEBUG > 2) print_cuc_bb (func, "AFTER_DEAD_BB"); + optimize_tree (func); + if (DEBUG > 2) print_cuc_bb (func, "AFTER_OPT_TREE"); + remove_dead (func); + if (DEBUG > 5) print_cuc_bb (func, "AFTER_DEAD"); + remove_trivial_regs (func); + if (DEBUG > 2) print_cuc_bb (func, "AFTER_TRIVIAL"); + + assert (saved = dup_func (func)); + + add_memory_dep (func, memory_order); + if (DEBUG > 7) print_cuc_bb (func, "AFTER_MEMORY_DEP"); + add_data_dep (func); + if (DEBUG > 8) print_cuc_bb (func, "AFTER_DATA_DEP"); + schedule_memory (func, memory_order); + if (DEBUG > 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM"); + add_latches (func); + if (DEBUG > 0) print_cuc_bb (func, "AFTER_LATCHES"); + analyse_timings (func, &timings); + + output_verilog (func, module_name); + free_func (func); + +#if 1 + /* detect and unroll simple loops */ + for (b = 0; b < saved->num_bb; b++) { + cuc_timings t[MAX_UNROLL * MAX_PREROLL]; + cuc_timings *ut; + cuc_timings *cut = &t[0]; + int nt = 1; + double csize; + + saved->bb[b].ntim = 0; + saved->bb[b].tim = NULL; + + /* Is it a loop? */ + if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue; + t[0] = timings; + t[0].b = b; + t[0].preroll = 1; + t[0].unroll = 1; + + sprintf (tmp1, "%s.bin.bb", module_name); + i = 1; + do { + cuc_timings *pt; + cuc_timings *cpt = cut; + j = 1; + + do { + pt = cpt; + cpt = preunroll_bb (tmp1, saved, &t[nt++], b, ++j, i); + } while (j <= MAX_PREROLL && pt->new_time >= cpt->new_time); + i++; + ut = cut; + cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i); + } while (i <= MAX_UNROLL && ut->new_time >= cut->new_time); + + /* Sort the timings */ + if (DEBUG > 3) + for (i = 0; i < nt; i++) printf ("%i:%i,%i: %icyc\n", + t[i].b, t[i].preroll, t[i].unroll, t[i].new_time); + + qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp); + + /* Delete timings, that have worst time and bigger size than other */ + j = 1; + csize = t[0].size; + for (i = 1; i < nt; i++) + if (t[i].size < csize) t[j++] = t[i]; + nt = j; + + printf ("A\n"); + for (i = 0; i < nt; i++) printf ("%i:%i,%i: %icyc %.1f\n", + t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size); + + saved->bb[b].ntim = nt; + assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt)); + + /* Copy options in reverse order -- smallest first */ + for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i]; + } +#endif + return saved; +} + +/* Dumps specified function to file (hex) */ +unsigned long extract_function (char *out_fn, unsigned long start_addr) +{ + FILE *fo; + unsigned long a = start_addr; + int x = 0; + assert (fo = fopen (out_fn, "wt+")); + + do { + unsigned long d = evalsim_mem32 (a); + int index = insn_decode (d); + assert (index >= 0); + if (x) x++; + if (strcmp (insn_name (index), "l.jr") == 0) x = 1; + a += 4; + fprintf (fo, "%08x\n", d); + } while (x < 2); + + fclose (fo); + return a - 4; +} + +static cuc_func *func[MAX_FUNCS]; + +void main_cuc (char *filename) +{ + int i; + char tmp1[256]; + + printf ("Entering Data Fusion command prompt\n"); + printf ("Using profile file \"%s\" and memory profile file \"%s\"\n", config.sim.prof_fn, config.sim.mprof_fn); + sprintf (tmp1, "%s.log", filename); + assert (flog = fopen (tmp1, "wt+")); + + /* Loads in the specified timings table */ + load_timing_table ("virtex.tim"); + + prof_set (1, 0); + assert (prof_acquire (config.sim.prof_fn) == 0); + + for (i = 0; i < prof_nfuncs; i++) + printf ("%-24s addr %08x cycles %i (%3.1f%%)\n", prof_func[i].name, + prof_func[i].addr, prof_func[i].cum_cycles, + 100. * prof_func[i].cum_cycles / prof_cycles); + + cycle_duration = 40.; + + /* Try all functions except "total" */ + for (i = 0; i < prof_nfuncs - 1; i++) { + long orig_time; + unsigned long start_addr, end_addr; + orig_time = prof_func[i].cum_cycles; + start_addr = prof_func[i].addr; + + /* Extract the function from the binary */ + sprintf (tmp1, "%s.bin", prof_func[i].name); + end_addr = extract_function (tmp1, start_addr); + + log ("Testing function %s (%08x - %08x)\n", prof_func[i].name, start_addr, end_addr); + func[i] = analyse_function (prof_func[i].name, orig_time, start_addr, end_addr); + } + + /* Dispose memory */ + for (i = 0; i < prof_nfuncs -1; i++) + if (func[i]) free_func (func[i]); + + fclose (flog); +} +
trunk/or1ksim/cuc/cuc.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: trunk/or1ksim/cuc/insn.h =================================================================== --- trunk/or1ksim/cuc/insn.h (nonexistent) +++ trunk/or1ksim/cuc/insn.h (revision 879) @@ -0,0 +1,101 @@ +/* insn.h -- OpenRISC Custom Unit Compiler, internal instruction definitions + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _DF_INSN_ +#define _DF_INSN_ + +#include "cuc.h" + +/* Instruction types */ +#define II_ADD 0 +#define II_SUB 1 +#define II_AND 2 +#define II_OR 3 +#define II_XOR 4 +#define II_MUL 5 +#define II_SRL 6 +#define II_SLL 7 +#define II_SRA 8 +#define II_LB 9 +#define II_LH 10 +#define II_LW 11 +#define II_SB 12 +#define II_SH 13 +#define II_SW 14 +#define II_SFEQ 15 +#define II_SFNE 16 +#define II_SFLE 17 +#define II_SFLT 18 +#define II_SFGE 19 +#define II_SFGT 20 +#define II_SFOR 21 +#define II_BF 22 +#define II_LRBB 23 +#define II_CMOV 24 +#define II_REG 25 +#define II_NOP 26 +#define II_LAST 26 + +/* misc flags */ +#define II_MASK 0x0fff +#define II_MEM 0x1000 +#define II_SIGNED 0x2000 + +#define II_IS_LOAD(x) ((x) == II_LB || (x) == II_LH || (x) == II_LW) +#define II_MEM_WIDTH(x) ((x) == II_LB || (x) == II_SB ? 1 :\ + (x) == II_LH || (x) == II_SH ? 1 :\ + (x) == II_LW || (x) == II_SW ? 1 : -1) + +/* Clock speed in ns */ +extern double cycle_duration; + +/* List of known instructions and their rtl representation */ +typedef struct { + char *name; + int comutative; + char *rtl; +} cuc_known_insn; + +extern const cuc_known_insn known[II_LAST + 1]; + +/* Timing table -- same indexes as known table */ +typedef struct { + double delay; + double size; + double delayi; + double sizei; +} cuc_timing_table; + +/* Conversion links */ +typedef struct { + const char *from; + const int to; +} cuc_conv; + +/* Find known instruction and attach them to insn */ +void change_insn_type (cuc_insn *i, int index); + +/* Returns instruction name */ +const char *cuc_insn_name (cuc_insn *ii); + +/* Loads in the specified timings table */ +void load_timing_table (char *filename); + +#endif /* _DF_INSN_ */ + Index: trunk/or1ksim/cuc/Makefile =================================================================== --- trunk/or1ksim/cuc/Makefile (nonexistent) +++ trunk/or1ksim/cuc/Makefile (revision 879) @@ -0,0 +1,337 @@ +# Generated automatically from Makefile.in by configure. +# Makefile.in generated automatically by automake 1.4-p5 from Makefile.am + +# Copyright (C) 1994, 1995-8, 1999, 2001 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +# Makefile -- Makefile for cpu architecture independent simulation +# Copyright (C) 2002 Marko Mlinar, markom@opencores.org +# +# This file is part of OpenRISC 1000 Architectural Simulator. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + + +SHELL = /bin/sh + +srcdir = . +top_srcdir = .. +prefix = /usr/local +exec_prefix = ${prefix} + +bindir = ${exec_prefix}/bin +sbindir = ${exec_prefix}/sbin +libexecdir = ${exec_prefix}/libexec +datadir = ${prefix}/share +sysconfdir = ${prefix}/etc +sharedstatedir = ${prefix}/com +localstatedir = ${prefix}/var +libdir = ${exec_prefix}/lib +infodir = ${prefix}/info +mandir = ${prefix}/man +includedir = ${prefix}/include +oldincludedir = /usr/include + +DESTDIR = + +pkgdatadir = $(datadir)/or1ksim +pkglibdir = $(libdir)/or1ksim +pkgincludedir = $(includedir)/or1ksim + +top_builddir = .. + +ACLOCAL = aclocal +AUTOCONF = autoconf +AUTOMAKE = automake +AUTOHEADER = autoheader + +INSTALL = /usr/bin//install -c +INSTALL_PROGRAM = ${INSTALL} $(AM_INSTALL_PROGRAM_FLAGS) +INSTALL_DATA = ${INSTALL} -m 644 +INSTALL_SCRIPT = ${INSTALL_PROGRAM} +transform = s,^,or32-uclinux-,; + +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_alias = i686-pc-linux-gnu +build_triplet = i686-pc-linux-gnu +host_alias = i686-pc-linux-gnu +host_triplet = i686-pc-linux-gnu +target_alias = or32-uclinux +target_triplet = or32-unknown-uclinux-gnu +AR = ar +ARFLAGS = cr +BUILD_DIR = /home/markom/sim +CC = gcc +CFLAGS = -g -O2 -DOR32 +CPU_ARCH = or32 +INCLUDES = -I${top_srcdir} -I${top_srcdir}/cpu/common -I${top_srcdir}/cpu/or1k -I${top_srcdir}/cpu/or32 -I${top_srcdir}/cache -I${top_srcdir}/mmu -I${top_srcdir}/bpb -I${top_srcdir}/peripheral -I${top_srcdir}/tick -I${top_srcdir}/pm -I${top_srcdir}/pic -I${top_srcdir}/debug -I${top_srcdir}/vapi -I${top_srcdir}/support +LOCAL_CFLAGS = +LOCAL_DEFS = +LOCAL_LDFLAGS = +MAKEINFO = makeinfo +MAKE_SHELL = /bin/sh +PACKAGE = or1ksim +RANLIB = ranlib +SUMVERSION = +TERMCAP_LIB = +VERSION = 1.3 +host = i686-pc-linux-gnu +host_cpu = i686 +host_os = linux-gnu + +noinst_LIBRARIES = libcuc.a + +libcuc_a_SOURCES = cuc.c cuc.h load.c bb.c memory.c \ + verilog.c timings.c insn.c insn.h + +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = ../config.h +CONFIG_CLEAN_FILES = +LIBRARIES = $(noinst_LIBRARIES) + + +DEFS = -DHAVE_CONFIG_H -I. -I$(srcdir) -I.. +CPPFLAGS = +LDFLAGS = +LIBS = +libcuc_a_LIBADD = +libcuc_a_OBJECTS = cuc.o load.o bb.o memory.o verilog.o timings.o \ +insn.o +COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ +DIST_COMMON = Makefile.am Makefile.in + + +DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) + +TAR = gtar +GZIP_ENV = --best +DEP_FILES = .deps/bb.P .deps/cuc.P .deps/insn.P .deps/load.P \ +.deps/memory.P .deps/timings.P .deps/verilog.P +SOURCES = $(libcuc_a_SOURCES) +OBJECTS = $(libcuc_a_OBJECTS) + +all: all-redirect +.SUFFIXES: +.SUFFIXES: .S .c .o .s +$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && $(AUTOMAKE) --gnu cuc/Makefile + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(BUILT_SOURCES) + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + + +mostlyclean-noinstLIBRARIES: + +clean-noinstLIBRARIES: + -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) + +distclean-noinstLIBRARIES: + +maintainer-clean-noinstLIBRARIES: + +.s.o: + $(COMPILE) -c $< + +.S.o: + $(COMPILE) -c $< + +mostlyclean-compile: + -rm -f *.o core *.core + +clean-compile: + +distclean-compile: + -rm -f *.tab.c + +maintainer-clean-compile: + +libcuc.a: $(libcuc_a_OBJECTS) $(libcuc_a_DEPENDENCIES) + -rm -f libcuc.a + $(AR) cru libcuc.a $(libcuc_a_OBJECTS) $(libcuc_a_LIBADD) + $(RANLIB) libcuc.a + +tags: TAGS + +ID: $(HEADERS) $(SOURCES) $(LISP) + list='$(SOURCES) $(HEADERS)'; \ + unique=`for i in $$list; do echo $$i; done | \ + awk ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + here=`pwd` && cd $(srcdir) \ + && mkid -f$$here/ID $$unique $(LISP) + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS)'; \ + unique=`for i in $$list; do echo $$i; done | \ + awk ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ + || (cd $(srcdir) && etags $(ETAGS_ARGS) $$tags $$unique $(LISP) -o $$here/TAGS) + +mostlyclean-tags: + +clean-tags: + +distclean-tags: + -rm -f TAGS ID + +maintainer-clean-tags: + +distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) + +subdir = cuc + +distdir: $(DISTFILES) + here=`cd $(top_builddir) && pwd`; \ + top_distdir=`cd $(top_distdir) && pwd`; \ + distdir=`cd $(distdir) && pwd`; \ + cd $(top_srcdir) \ + && $(AUTOMAKE) --include-deps --build-dir=$$here --srcdir-name=$(top_srcdir) --output-dir=$$top_distdir --gnu cuc/Makefile + @for file in $(DISTFILES); do \ + d=$(srcdir); \ + if test -d $$d/$$file; then \ + cp -pr $$d/$$file $(distdir)/$$file; \ + else \ + test -f $(distdir)/$$file \ + || ln $$d/$$file $(distdir)/$$file 2> /dev/null \ + || cp -p $$d/$$file $(distdir)/$$file || :; \ + fi; \ + done + +DEPS_MAGIC := $(shell mkdir .deps > /dev/null 2>&1 || :) + +-include $(DEP_FILES) + +mostlyclean-depend: + +clean-depend: + +distclean-depend: + -rm -rf .deps + +maintainer-clean-depend: + +%.o: %.c + @echo '$(COMPILE) -c $<'; \ + $(COMPILE) -Wp,-MD,.deps/$(*F).pp -c $< + @-cp .deps/$(*F).pp .deps/$(*F).P; \ + tr ' ' '\012' < .deps/$(*F).pp \ + | sed -e 's/^\\$$//' -e '/^$$/ d' -e '/:$$/ d' -e 's/$$/ :/' \ + >> .deps/$(*F).P; \ + rm .deps/$(*F).pp + +%.lo: %.c + @echo '$(LTCOMPILE) -c $<'; \ + $(LTCOMPILE) -Wp,-MD,.deps/$(*F).pp -c $< + @-sed -e 's/^\([^:]*\)\.o[ ]*:/\1.lo \1.o :/' \ + < .deps/$(*F).pp > .deps/$(*F).P; \ + tr ' ' '\012' < .deps/$(*F).pp \ + | sed -e 's/^\\$$//' -e '/^$$/ d' -e '/:$$/ d' -e 's/$$/ :/' \ + >> .deps/$(*F).P; \ + rm -f .deps/$(*F).pp +info-am: +info: info-am +dvi-am: +dvi: dvi-am +check-am: all-am +check: check-am +installcheck-am: +installcheck: installcheck-am +install-exec-am: +install-exec: install-exec-am + +install-data-am: +install-data: install-data-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am +install: install-am +uninstall-am: +uninstall: uninstall-am +all-am: Makefile $(LIBRARIES) +all-redirect: all-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install +installdirs: + + +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + -rm -f config.cache config.log stamp-h stamp-h[0-9]* + +maintainer-clean-generic: +mostlyclean-am: mostlyclean-noinstLIBRARIES mostlyclean-compile \ + mostlyclean-tags mostlyclean-depend mostlyclean-generic + +mostlyclean: mostlyclean-am + +clean-am: clean-noinstLIBRARIES clean-compile clean-tags clean-depend \ + clean-generic mostlyclean-am + +clean: clean-am + +distclean-am: distclean-noinstLIBRARIES distclean-compile \ + distclean-tags distclean-depend distclean-generic \ + clean-am + +distclean: distclean-am + +maintainer-clean-am: maintainer-clean-noinstLIBRARIES \ + maintainer-clean-compile maintainer-clean-tags \ + maintainer-clean-depend maintainer-clean-generic \ + distclean-am + @echo "This command is intended for maintainers to use;" + @echo "it deletes files that may require special tools to rebuild." + +maintainer-clean: maintainer-clean-am + +.PHONY: mostlyclean-noinstLIBRARIES distclean-noinstLIBRARIES \ +clean-noinstLIBRARIES maintainer-clean-noinstLIBRARIES \ +mostlyclean-compile distclean-compile clean-compile \ +maintainer-clean-compile tags mostlyclean-tags distclean-tags \ +clean-tags maintainer-clean-tags distdir mostlyclean-depend \ +distclean-depend clean-depend maintainer-clean-depend info-am info \ +dvi-am dvi check check-am installcheck-am installcheck install-exec-am \ +install-exec install-data-am install-data install-am install \ +uninstall-am uninstall all-redirect all-am all installdirs \ +mostlyclean-generic distclean-generic clean-generic \ +maintainer-clean-generic clean mostlyclean distclean maintainer-clean + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: Index: trunk/or1ksim/configure.in =================================================================== --- trunk/or1ksim/configure.in (revision 878) +++ trunk/or1ksim/configure.in (revision 879) @@ -224,7 +224,7 @@ AC_SUBST(INCLUDES) AC_OUTPUT([Makefile bpb/Makefile cache/Makefile cpu/Makefile - cpu/common/Makefile cpu/or32/Makefile + cpu/common/Makefile cpu/or32/Makefile cuc/Makefile cpu/or1k/Makefile cpu/dlx/Makefile debug/Makefile support/Makefile mmu/Makefile peripheral/Makefile tick/Makefile pm/Makefile pic/Makefile debug/Makefile vapi/Makefile],
/trunk/or1ksim/README
2,32 → 2,12
What is this stuff?
===================
 
This is OpenRISC 1000 and DLX architectural simulator. It was written by
Damjan Lampret and it is free software. See the file COPYING for copying
permission. To contact the author, send mail to <lampret@opencores.org>.
This is OpenRISC 1000 architectural simulator. See the file COPYING
for copying permission. To contact the authors, see AUTHORS file.
 
I use it to define OR1K system architecture. An implementation simulator
for OR1K will be also available, probably in Mar/2000 or later.
 
Initially this software was not meant to be released to public because it
was developed just to analyze program flow of GCC generated assembly code.
With the time it became bigger and was able to generate statistics about
superscalar issuing of multiple instructions. I've used it as a test simulator
to test OR1K GCC port. Perhaps some day I will (or perhaps someone else would
like to do that ??) clean-up the code and reorganize it.
 
This simulator loads an assembly file for one of the both architectures
and it simulates the operation of instructions. Because it was meant to be used
only to test characteristics of various RISC architectures and various GCC
optimization methods, it has a bit strange memory model. It is abstract and
physical at the same time. I can't really explain, just check the sources if
interested. Some other things are strange or incomplete too (like
C library emulation, currently supports only printf via simprintf).
and it simulates the operation of instructions.
 
MMU directory is not functional. Someday it will be filled with code for
virtual memory simulation.
 
 
Installation
============
 
40,49 → 20,22
warnings. There is no "make install". Just use it from default location
or copy it to your bin directory (usually something like /usr/local/bin
or ~/bin).
This program hasn't been written with security in mind. It has many static
buffers and it does not check the size of input strings (user commands
or whatever). So don't setuid it. If it kills your dog, don't blame it on me.
 
To select DLX simulation, change the target to the configure script to 'dlx'
and recompile everything (do 'make all' again).
 
GNU Tools
=========
 
To build GNU tools:
Instructions how to build GNU tools can be found on www.opencores.org
 
cvs -d :pserver:cvs@cvs.opencores.org:/home/oc/cvs -z9 co -d work or1k
mkdir b-b
cd b-b
../binutils/configure --target=or32-rtems --prefix=/opt/or32-rtems &&
make all install
export PATH=/opt/or32-rtems/bin:$PATH
cd ../gcc
./contrib/egcs_update --touch
ln -s ../newlib/newlib .
cd ..
mkdir b-gcc
cd b-gcc
../gcc/configure --target=or32-rtems \
--with-gnu-as --with-gnu-ld --with-newlib --verbose \
--enable-threads --prefix=/opt/or32-rtems --enable-languages="c" &&
\
make all install
 
 
Simulator test
==============
 
Edit the sim.cfg file in order to configure your system.
 
Issue 'sim testbench/dhry.or32' or 'sim testbench/dhry.dlx' to
test simulator. See testbench/README for details about Dhrystone 2.1
benchmark.
test simulator. See testbench/README for more details about running.
 
For microkernel test (exception test) undefine ONLY_VIRTUAL_MACHINE (you want
exceptions, right !) in cpu/or1k/except.h and recompile simulator. A compiled
and linked version should already exist in testbench/uos. Just issue
'sim testbench/uos/uos.or32' from the top level sim directory. Currently only
OR32 is supported by UOS.
Also run sim with --help option for list of command line options and
help in command mode, to list the commands.
 
OpenRISC and open cores
=======================
93,8 → 46,3
will run GNU/Linux.
For more information visit us at http://www.opencores.org.
 
--
 
29/Feb/2000, Damjan Lampret email:lampret@opencores.org
03/Mar/2000, Johan Rydberg email:johan.rydberg@netinsight.net
04/May/2000, Jimmy Chen-Min Chen email:jimmy@ee.nctu.edu.tw
/trunk/or1ksim/Makefile.am
4,14 → 4,16
#
#
 
SUBDIRS = cpu bpb support cache mmu peripheral tick pm pic debug vapi
SUBDIRS = cpu bpb support cache mmu peripheral tick pm pic debug vapi cuc
 
bin_PROGRAMS = sim
 
sim_SOURCES = toplevel.c sim-config.c sim-config.h profiler.c mprofiler.c profiler.h mprofiler.h
sim_SOURCES = toplevel.c sim-config.c sim-config.h profiler.c \
mprofiler.c profiler.h mprofiler.h
sim_LDADD = cpu/common/libcommon.a cpu/$(CPU_ARCH)/libarch.a \
cpu/or1k/libor1k.a support/libsupport.a mmu/libmmu.a \
bpb/libbpb.a cache/libcache.a peripheral/libperipheral.a \
tick/libtick.a pm/libpm.a pic/libpic.a debug/libdebug.a \
vapi/libvapi.a
vapi/libvapi.a cuc/libcuc.a
 
sim_LDFLAGS = #-lreadline
/trunk/or1ksim/toplevel.c
60,7 → 60,7
#include "atahost.h"
 
/* CVS revision number. */
const char rcsrev[] = "$Revision: 1.86 $";
const char rcsrev[] = "$Revision: 1.87 $";
 
/* Continuos run versus single step tracing switch. */
int cont_run;
160,6 → 160,7
printf("debug - toggles simulator debug mode\n");
mp_help ();
prof_help ();
printf("cuc - enters Custom Unit Compiler command prompt\n");
#endif
printf("help - available commands (this list)\n");
}
676,6 → 677,7
if (strcmp(item1, "info") == 0) /* configuration info */
sim_info ();
else
#if !FAST_SIM
if (strcmp (item1, "profiler") == 0) { /* run profiler utility */
char *argv[10];
int argc = tokenize_line (linestr, argv, 10);
686,7 → 688,9
int argc = tokenize_line (linestr, argv, 10);
main_mprofiler (argc, argv);
} else
#if !FAST_SIM
if (strcmp (item1, "cuc") == 0) { /* run Custom Unit Compiler */
main_cuc (runtime.sim.filename);
} else
if (strcmp(item1, "set") == 0) { /* configuration info */
char *s = linestr;
int i;
/trunk/gdb-5.0/include/opcode/or32.h
213,5 → 213,14
Return the size of the instruction. */
int disassemble_index (unsigned long insn, int index);
 
/* FOR INTERNAL USE ONLY */
/* Automatically does zero- or sign- extension and also finds correct
sign bit position if sign extension is correct extension. Which extension
is proper is figured out from letter description. */
unsigned long extend_imm(unsigned long imm, char l);
 
/* Extracts value from opcode */
unsigned long or32_extract(char param_ch, char *enc_initial, unsigned long insn);
 
#endif
 
/trunk/gdb-5.0/opcodes/or32.h
213,5 → 213,14
Return the size of the instruction. */
int disassemble_index (unsigned long insn, int index);
 
/* FOR INTERNAL USE ONLY */
/* Automatically does zero- or sign- extension and also finds correct
sign bit position if sign extension is correct extension. Which extension
is proper is figured out from letter description. */
unsigned long extend_imm(unsigned long imm, char l);
 
/* Extracts value from opcode */
unsigned long or32_extract(char param_ch, char *enc_initial, unsigned long insn);
 
#endif
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.