OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /or1k/tags/stable_0_2_0_rc2/or1ksim/cuc
    from Rev 1611 to Rev 1765
    Reverse comparison

Rev 1611 → Rev 1765

/Makefile.in
0,0 → 1,430
# Makefile.in generated by automake 1.9.5 from Makefile.am.
# @configure_input@
 
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
 
@SET_MAKE@
 
# Makefile -- Makefile for cpu architecture independent simulation
# Copyright (C) 2002 Marko Mlinar, markom@opencores.org
#
# This file is part of OpenRISC 1000 Architectural Simulator.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
 
SOURCES = $(libcuc_a_SOURCES)
 
srcdir = @srcdir@
top_srcdir = @top_srcdir@
VPATH = @srcdir@
pkgdatadir = $(datadir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
top_builddir = ..
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
INSTALL = @INSTALL@
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
subdir = cuc
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.in
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
LIBRARIES = $(noinst_LIBRARIES)
libcuc_a_AR = $(AR) $(ARFLAGS)
libcuc_a_LIBADD =
am_libcuc_a_OBJECTS = cuc.$(OBJEXT) load.$(OBJEXT) bb.$(OBJEXT) \
memory.$(OBJEXT) verilog.$(OBJEXT) timings.$(OBJEXT) \
insn.$(OBJEXT) adv.$(OBJEXT)
libcuc_a_OBJECTS = $(am_libcuc_a_OBJECTS)
DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
SOURCES = $(libcuc_a_SOURCES)
DIST_SOURCES = $(libcuc_a_SOURCES)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMDEP_FALSE = @AMDEP_FALSE@
AMDEP_TRUE = @AMDEP_TRUE@
AMTAR = @AMTAR@
AR = @AR@
ARFLAGS = @ARFLAGS@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
BUILD_DIR = @BUILD_DIR@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CPU_ARCH = @CPU_ARCH@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DYNAMIC_EXECUTION_FALSE = @DYNAMIC_EXECUTION_FALSE@
DYNAMIC_EXECUTION_TRUE = @DYNAMIC_EXECUTION_TRUE@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
GENERATE_NEEDED_FALSE = @GENERATE_NEEDED_FALSE@
GENERATE_NEEDED_TRUE = @GENERATE_NEEDED_TRUE@
INCLUDES = @INCLUDES@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LDFLAGS = @LDFLAGS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LOCAL_CFLAGS = @LOCAL_CFLAGS@
LOCAL_DEFS = @LOCAL_DEFS@
LOCAL_LDFLAGS = @LOCAL_LDFLAGS@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MAKE_SHELL = @MAKE_SHELL@
OBJEXT = @OBJEXT@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
SUMVERSION = @SUMVERSION@
TERMCAP_LIB = @TERMCAP_LIB@
VERSION = @VERSION@
ac_ct_CC = @ac_ct_CC@
ac_ct_RANLIB = @ac_ct_RANLIB@
ac_ct_STRIP = @ac_ct_STRIP@
am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
datadir = @datadir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
prefix = @prefix@
program_transform_name = @program_transform_name@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
sysconfdir = @sysconfdir@
target = @target@
target_alias = @target_alias@
target_cpu = @target_cpu@
target_os = @target_os@
target_vendor = @target_vendor@
noinst_LIBRARIES = libcuc.a
libcuc_a_SOURCES = cuc.c cuc.h load.c bb.c memory.c \
verilog.c timings.c insn.c insn.h adv.c
 
all: all-am
 
.SUFFIXES:
.SUFFIXES: .c .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
&& exit 0; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu cuc/Makefile'; \
cd $(top_srcdir) && \
$(AUTOMAKE) --gnu cuc/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
 
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
 
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
 
clean-noinstLIBRARIES:
-test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES)
libcuc.a: $(libcuc_a_OBJECTS) $(libcuc_a_DEPENDENCIES)
-rm -f libcuc.a
$(libcuc_a_AR) libcuc.a $(libcuc_a_OBJECTS) $(libcuc_a_LIBADD)
$(RANLIB) libcuc.a
 
mostlyclean-compile:
-rm -f *.$(OBJEXT)
 
distclean-compile:
-rm -f *.tab.c
 
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adv.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bb.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cuc.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insn.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/load.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/memory.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timings.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verilog.Po@am__quote@
 
.c.o:
@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \
@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
 
.c.obj:
@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \
@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
uninstall-info-am:
 
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
mkid -fID $$unique
tags: TAGS
 
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$tags $$unique; \
fi
ctags: CTAGS
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
test -z "$(CTAGS_ARGS)$$tags$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$tags $$unique
 
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& cd $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) $$here
 
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
 
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
list='$(DISTFILES)'; for file in $$list; do \
case $$file in \
$(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
$(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
esac; \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
if test "$$dir" != "$$file" && test "$$dir" != "."; then \
dir="/$$dir"; \
$(mkdir_p) "$(distdir)$$dir"; \
else \
dir=''; \
fi; \
if test -d $$d/$$file; then \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
fi; \
cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
else \
test -f $(distdir)/$$file \
|| cp -p $$d/$$file $(distdir)/$$file \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LIBRARIES)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
 
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
 
installcheck: installcheck-am
install-strip:
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
`test -z '$(STRIP)' || \
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
 
clean-generic:
 
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
 
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
 
clean-am: clean-generic clean-noinstLIBRARIES mostlyclean-am
 
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
 
dvi: dvi-am
 
dvi-am:
 
html: html-am
 
info: info-am
 
info-am:
 
install-data-am:
 
install-exec-am:
 
install-info: install-info-am
 
install-man:
 
installcheck-am:
 
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
 
mostlyclean: mostlyclean-am
 
mostlyclean-am: mostlyclean-compile mostlyclean-generic
 
pdf: pdf-am
 
pdf-am:
 
ps: ps-am
 
ps-am:
 
uninstall-am: uninstall-info-am
 
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-noinstLIBRARIES ctags distclean distclean-compile \
distclean-generic distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-exec install-exec-am install-info \
install-info-am install-man install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \
uninstall-am uninstall-info-am
 
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
/cuc.h
0,0 → 1,332
/* cuc.h -- OpenRISC Custom Unit Compiler, main header file
* Copyright (C) 2002 Marko Mlinar, markom@opencores.org
*
* This file is part of OpenRISC 1000 Architectural Simulator.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
 
#ifndef __DATAF_H__
#define __DATAF_H__
 
/* Maximum number of instructions per function */
#define MAX_INSNS 0x10000
#define MAX_BB 0x1000
#define MAX_REGS 34
#define FLAG_REG (MAX_REGS - 2)
#define LRBB_REG (MAX_REGS - 1)
#define CUC_MAX_STACK 0x1000 /* if more, not converted */
#define MAX_PREROLL 32
#define MAX_UNROLL 32
 
#define IT_BRANCH 0x0001 /* Branch instruction */
#define IT_INDELAY 0x0002 /* Instruction is in delay slot */
#define IT_BBSTART 0x0004 /* BB start marker */
#define IT_BBEND 0x0008 /* BB end marker */
#define IT_OUTPUT 0x0010 /* this instruction holds final value of the register */
#define IT_SIGNED 0x0020 /* Instruction is signed */
#define IT_MEMORY 0x0040 /* Instruction does memory access */
#define IT_UNUSED 0x0080 /* dead instruction marker */
#define IT_FLAG1 0x0100 /* misc flags */
#define IT_FLAG2 0x0200
#define IT_VOLATILE 0x0400 /* Should not be moved/removed */
#define IT_MEMADD 0x0800 /* add before the load -- should not be removed */
#define IT_COND 0x1000 /* Conditional */
#define IT_LATCHED 0x2000 /* Output of this instruction is latched/registered */
#define IT_CUT 0x4000 /* After this instruction register is placed */
 
#define OPT_NONE 0x00
#define OPT_CONST 0x01
#define OPT_REGISTER 0x02
#define OPT_REF 0x04
#define OPT_JUMP 0x08 /* Jump to an instruction index */
#define OPT_DEST 0x10 /* This operand is dest */
#define OPT_BB 0x20 /* Jumpt to BB */
#define OPT_LRBB 0x40 /* 0 if we came in from left BB, or 1 otherwise */
 
#define MT_WIDTH 0x007 /* These bits hold memory access width in bytes */
#define MT_BURST 0x008 /* burst start & end markers */
#define MT_BURSTE 0x010
#define MT_CALL 0x020 /* This is a call */
#define MT_LOAD 0x040 /* This memory access does a read */
#define MT_STORE 0x080 /* This memory access does a write */
#define MT_SIGNED 0x100 /* Signed memory access */
 
#define MO_NONE 0 /* different memory ordering, even if there are dependencies,
burst can be made, width can change */
#define MO_WEAK 1 /* different memory ordering, if there cannot be dependencies,
burst can be made, width can change */
#define MO_STRONG 2 /* Same memory ordering, burst can be made, width can change */
#define MO_EXACT 3 /* Exacltly the same memory ordering and widths */
 
#define BB_INLOOP 0x01 /* This block is inside a loop */
#define BB_OPTIONAL 0x02
#define BB_DEAD 0x08 /* This block is unaccessible -> to be removed */
 
#define BBID_START MAX_BB /* Start BB pointer */
#define BBID_END (MAX_BB + 1) /* End BB pointer */
 
/* Various macros to minimize code size */
#define REF(bb,i) (((bb) * MAX_INSNS) + (i))
#define REF_BB(r) ((r) / MAX_INSNS)
#define REF_I(r) ((r) % MAX_INSNS)
#define INSN(ref) bb[REF_BB(ref)].insn[REF_I(ref)]
#ifndef MIN
# define MIN(x,y) ((x) < (y) ? (x) : (y))
#endif
 
#ifndef MAX
# define MAX(x,y) ((x) > (y) ? (x) : (y))
#endif
 
#define log(x...) {fprintf (flog, x); fflush (flog); }
 
#define cucdebug(x,s...) {if ((x) <= cuc_debug) PRINTF (s);}
 
#define CUC_WIDTH_ITERATIONS 256
 
/* Options */
/* Whether we are debugging cuc (0-9) */
extern int cuc_debug;
 
/* Temporary registers by software convention */
extern const int caller_saved[MAX_REGS];
 
typedef struct _dep_list_t {
unsigned long ref;
struct _dep_list_t *next;
} dep_list;
 
/* Shared list, if marked dead, entry is not used */
typedef struct _csm_list {
int ref;
int cnt;
int cmovs;
double size, osize;
int cmatch;
int dead;
int ninsn; /* Number of associated instructions */
struct _csm_list *from;
struct _csm_list *next;
} cuc_shared_list;
 
/* Shared resource item definition */
typedef struct {
int ref;
int cmatch;
} cuc_shared_item;
 
/* Implementation specific timings */
typedef struct {
int b; /* Basic block # this timing is referring to */
int preroll; /* How many times was this BB pre/unrolled */
int unroll;
int nshared;
cuc_shared_item *shared; /* List of shared resources */
int new_time;
double size;
} cuc_timings;
 
/* Instructionn entity */
typedef struct {
int type; /* type of the instruction */
int index; /* Instruction index */
int opt[MAX_OPERANDS]; /* operand types */
unsigned long op[MAX_OPERANDS]; /* operand values */
dep_list *dep; /* instruction dependencies */
unsigned long insn; /* Instruction opcode */
char disasm[40]; /* disassembled string */
unsigned long max; /* max result value */
int tmp;
} cuc_insn;
 
/* Basic block entity */
typedef struct {
unsigned long type; /* Type of the bb */
int first, last; /* Where this block lies */
int prev[2], next[2];
int tmp;
cuc_insn *insn; /* Instructions lie here */
int ninsn; /* Number of instructions */
int last_used_reg[MAX_REGS];
dep_list *mdep; /* Last memory access dependencies */
int nmemory;
int cnt; /* how many times was this block executed */
int unrolled; /* how many times has been this block unrolled */
int ntim; /* Basic block options */
cuc_timings *tim;
int selected_tim; /* Selected option, -1 if none */
} cuc_bb;
 
/* Function entity */
typedef struct _cuc_func {
/* Basic blocks */
int num_bb;
cuc_bb bb[MAX_BB];
int saved_regs[MAX_REGS];/* Whether this register was saved */
int lur[MAX_REGS]; /* Location of last use */
int used_regs[MAX_REGS]; /* Nonzero if it was used */
/* Schedule of memory instructions */
int nmsched;
int msched[MAX_INSNS];
int mtype[MAX_INSNS];
 
/* initial bb and their relocations to new block numbers */
int num_init_bb;
int *init_bb_reloc;
int orig_time; /* time in cyc required for SW implementation */
int num_runs; /* Number times this function was run */
cuc_timings timings; /* Base timings */
unsigned long start_addr; /* Address of first instruction inn function */
unsigned long end_addr; /* Address of last instruction inn function */
int memory_order; /* Memory order */
 
int nfdeps; /* Function dependencies */
struct _cuc_func **fdeps;
 
int tmp;
} cuc_func;
 
/* Instructions from function */
extern cuc_insn insn[MAX_INSNS];
extern int num_insn;
extern int reloc[MAX_INSNS];
extern FILE *flog;
 
/* Loads from file into global array insn */
int cuc_load (char *in_fn);
 
/* Negates conditional instruction */
void negate_conditional (cuc_insn *ii);
 
/* Scans sequence of BBs and set bb[].cnt */
void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename);
 
/* Prints out instructions */
void print_insns (int bb, cuc_insn *insn, int size, int verbose);
 
/* prints out bb string */
void print_bb_num (int num);
 
/* Print out basic blocks */
void print_cuc_bb (cuc_func *func, char *s);
 
/* Duplicates function */
cuc_func *dup_func (cuc_func *f);
 
/* Releases memory allocated by function */
void free_func (cuc_func *f);
 
/* Common subexpression matching -- resource sharing, analysis pass */
void csm (cuc_func *f);
 
/* Common subexpression matching -- resource sharing, generation pass */
void csm_gen (cuc_func *f, cuc_func *rf, cuc_shared_item *shared, int nshared);
 
/* Set the BB limits */
void detect_bb (cuc_func *func);
 
/* Optimize basic blocks */
int optimize_bb (cuc_func *func);
 
/* Search and optimize complex cmov assignments */
int optimize_cmovs (cuc_func *func);
 
/* Optimizes dataflow tree */
int optimize_tree (cuc_func *func);
 
/* Remove nop instructions */
int remove_nops (cuc_func *func);
 
/* Removes dead instruction */
int remove_dead (cuc_func *func);
 
/* Removes trivial register assignments */
int remove_trivial_regs (cuc_func *f);
 
/* Determine inputs and outputs */
void set_io (cuc_func *func);
 
/* Removes BBs marked as dead */
int remove_dead_bb (cuc_func *func);
 
/* Common subexpression elimination */
int cse (cuc_func *f);
 
/* Detect register dependencies */
void reg_dep (cuc_func *func);
 
/* Cuts the tree and marks registers */
void mark_cut (cuc_func *f);
 
/* Unroll loop b times times and return new function. Original
function is unmodified. */
cuc_func *preunroll_loop (cuc_func *func, int b, int preroll, int unroll, char *bb_filename);
 
/* Clean memory and data dependencies */
void clean_deps (cuc_func *func);
 
/* Schedule memory accesses
0 - exact; 1 - strong; 2 - weak; 3 - none */
int schedule_memory (cuc_func *func, int otype);
 
/* Generates verilog file out of insn dataflow */
void output_verilog (cuc_func *func, char *filename, char *funcname);
 
/* Recalculates bb[].cnt values, based on generated profile file */
void recalc_cnts (cuc_func *f, char *bb_filename);
 
/* Calculate timings */
void analyse_timings (cuc_func *func, cuc_timings *timings);
 
/* Calculates facts, that are determined by conditionals */
void insert_conditional_facts (cuc_func *func);
 
/* Width optimization -- detect maximum values */
void detect_max_values (cuc_func *f);
 
/* Inserts n nops before insn 'ref' */
void insert_insns (cuc_func *f, int ref, int n);
 
/* Checks for some anomalies with references */
void cuc_check(cuc_func *f);
 
/* Adds memory dependencies based on ordering type */
void add_memory_dep(cuc_func *f, int otype);
 
/* Prints out instructions */
void print_cuc_insns(char *s, int verbose);
 
/* Build basic blocks */
void build_bb(cuc_func *f);
 
/* Latch outputs in loops */
void add_latches(cuc_func *f);
 
/* split the BB, based on the group numbers in .tmp */
void expand_bb (cuc_func *f, int b);
 
void generate_main(int nfuncs, cuc_func **f, char *filename);
 
void add_dep(dep_list **list, int dep);
 
void dispose_list(dep_list **list);
 
void main_cuc(char *filename);
 
void add_data_dep(cuc_func *f);
#endif /* __DATAF_H__ */
cuc.h Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: bb.c =================================================================== --- bb.c (nonexistent) +++ bb.c (revision 1765) @@ -0,0 +1,1508 @@ +/* bb.c -- OpenRISC Custom Unit Compiler, Basic Block handling + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "sim-config.h" +#include "abstract.h" +#include "cuc.h" +#include "insn.h" +#include "support/profile.h" + +/* prints out bb string */ +void print_bb_num (int num) +{ + if (num < 0) PRINTF ("*"); + else if (num == BBID_END) PRINTF ("END"); + else if (num == BBID_START) PRINTF ("START"); + else PRINTF ("%2x", num); +} + +/* Print out basic blocks */ +void print_cuc_bb (cuc_func *f, char *s) +{ + int i; + PRINTF ("------- %s -------\n", s); + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].insn) PRINTF ("\n---- BB%-2x * %x ---- ", i, f->bb[i].cnt); + else PRINTF ("BB%-2x: %4x-%-4x", i, f->bb[i].first, f->bb[i].last); + PRINTF (" type %02lx tmp %i ", f->bb[i].type, f->bb[i].tmp); + PRINTF ("next "); print_bb_num (f->bb[i].next[0]); + PRINTF (" "); print_bb_num (f->bb[i].next[1]); + PRINTF (" prev "); print_bb_num (f->bb[i].prev[0]); + PRINTF (" "); print_bb_num (f->bb[i].prev[1]); + PRINTF ("\n"); + + if (f->bb[i].insn) print_insns (i, f->bb[i].insn, f->bb[i].ninsn, 0); + } + if (f->nmsched) { + PRINTF ("\nmsched: "); + for (i = 0; i < f->nmsched; i++) + PRINTF ("%x ", f->msched[i]); + PRINTF ("\n\n\n"); + } else PRINTF ("\n"); + fflush (stdout); +} + +/* Copies src basic block into destination */ +void cpy_bb (cuc_bb *dest, cuc_bb *src) +{ + int i, j; + dep_list *d; + assert (dest != src); + *dest = *src; + assert (dest->insn = malloc (sizeof (cuc_insn) * src->ninsn)); + for (i = 0; i < src->ninsn; i++) { + d = src->insn[i].dep; + dest->insn[i] = src->insn[i]; + dest->insn[i].dep = NULL; + while (d) { + add_dep (&dest->insn[i].dep, d->ref); + d = d->next; + } + } + + d = src->mdep; + dest->mdep = NULL; + while (d) { + add_dep (&dest->mdep, d->ref); + d = d->next; + } + if (src->ntim) { + assert (dest->tim = malloc (sizeof (cuc_timings) * src->ntim)); + for (i = 0; i < src->ntim; i++) { + dest->tim[i] = src->tim[i]; + if (src->tim[i].nshared) { + assert (dest->tim[i].shared = malloc (sizeof (int) * src->tim[i].nshared)); + for (j = 0; j < src->tim[i].nshared; j++) + dest->tim[i].shared[j] = src->tim[i].shared[j]; + } + } + } +} + +/* Duplicates function */ +cuc_func *dup_func (cuc_func *f) +{ + cuc_func *n = (cuc_func *) malloc (sizeof (cuc_func)); + int b, i; + for (b = 0; b < f->num_bb; b++) cpy_bb (&n->bb[b], &f->bb[b]); + n->num_bb = f->num_bb; + assert (n->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb)); + for (b = 0; b < f->num_init_bb; b++) n->init_bb_reloc[b] = f->init_bb_reloc[b]; + n->num_init_bb = f->num_init_bb; + for (i = 0; i < MAX_REGS; i++) { + n->saved_regs[i] = f->saved_regs[i]; + n->lur[i] = f->lur[i]; + n->used_regs[i] = f->used_regs[i]; + } + n->start_addr = f->start_addr; + n->end_addr = f->end_addr; + n->orig_time = f->orig_time; + n->nmsched = f->nmsched; + n->num_runs = f->num_runs; + for (i = 0; i < f->nmsched; i++) { + n->msched[i] = f->msched[i]; + n->mtype[i] = f->mtype[i]; + } + n->nfdeps = f->nfdeps; + if (f->nfdeps) { + f->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * f->nfdeps); + for (i = 0; i < f->nfdeps; i++) n->fdeps[i] = f->fdeps[i]; + } + return n; +} + +/* Releases memory allocated by function */ +void free_func (cuc_func *f) +{ + int b, i; + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < f->bb[b].ninsn; i++) + dispose_list (&f->bb[b].insn[i].dep); + if (f->bb[b].insn) free (f->bb[b].insn); + for (i = 0; i < f->bb[b].ntim; i++) + if (f->bb[b].tim[i].nshared && f->bb[b].tim[i].shared) + free (f->bb[b].tim[i].shared); + if (f->bb[b].tim && f->bb[b].ntim) free (f->bb[b].tim); + } + free (f); +} + +/* Recalculates last_used_reg */ +void recalc_last_used_reg (cuc_func *f, int b) +{ + int i; + cuc_bb *bb = &f->bb[b]; + + /* rebuild last used reg array */ + if (bb->insn[0].index == II_LRBB) bb->last_used_reg[LRBB_REG] = 0; + else bb->last_used_reg[LRBB_REG] = -1; + + for (i = 1; i < MAX_REGS - 1; i++) bb->last_used_reg[i] = -1; + + /* Create references */ + for (i = 0; i < bb->ninsn; i++) { + int k; + /* Now check for destination operand(s) */ + for (k = 0; k < MAX_OPERANDS; k++) if (bb->insn[i].opt[k] & OPT_DEST) + if ((bb->insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER + && (int)bb->insn[i].op[k] >= 0) { + bb->last_used_reg[bb->insn[i].op[k]] = REF (b, i); + } + } +} + +/* Set the BB limits */ +void detect_bb (cuc_func *f) +{ + int i, j, end_bb = 0, eb = 0; + + /* Mark block starts/ends */ + for (i = 0; i < num_insn; i++) { + if (end_bb) insn[i].type |= IT_BBSTART; + end_bb = 0; + if (insn[i].type & IT_BRANCH) { + int jt = insn[i].op[0]; + insn[i].type |= IT_BBEND; + end_bb = 1; + if (jt < 0 || jt >= num_insn) { + fprintf (stderr, "Instruction #%i:Jump out of function '%s'.\n", i, insn[i].disasm); + exit (1); + } + if (jt > 0) insn[jt - 1].type |= IT_BBEND; + insn[jt].type |= IT_BBSTART; + } + } + + /* Initialize bb array */ + insn[0].type |= IT_BBSTART; + insn[num_insn - 1].type |= IT_BBEND; + f->num_bb = 0; + for (i = 0; i < num_insn; i++) { + if (insn[i].type & IT_BBSTART) { + f->bb[f->num_bb].first = i; + f->bb[f->num_bb].cnt = 0; + } + /* Determine repetitions of a loop */ + if (insn[i].type & IT_BBEND) { + f->bb[f->num_bb].type = 0; + f->bb[f->num_bb].last = i; + f->bb[f->num_bb].next[0] = f->bb[f->num_bb].next[1] = -1; + f->bb[f->num_bb].tmp = 0; + f->bb[f->num_bb].ntim = 0; + f->num_bb++; + assert (f->num_bb < MAX_BB); + } + } + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INIT"); + + /* Build forward connections between BBs */ + for (i = 0; i < f->num_bb; i++) + if (insn[f->bb[i].last].type & IT_BRANCH) { + int j; + assert (insn[f->bb[i].last].index == II_BF); + /* Find block this instruction jumps to */ + for (j = 0; j < f->num_bb; j++) + if (f->bb[j].first == insn[f->bb[i].last].op[0]) break; + assert (j < f->num_bb); + + /* Convert the jump address to BB link */ + insn[f->bb[i].last].op[0] = j; insn[f->bb[i].last].opt[0] = OPT_BB; + + /* Make a link */ + f->bb[i].next[0] = j; + if (++f->bb[j].tmp > 2) eb++; + f->bb[i].next[1] = i + 1; + if (++f->bb[i + 1].tmp > 2) eb++; + } else if (f->bb[i].last == num_insn - 1) { /* Last instruction doesn't have to do anything */ + } else { + f->bb[i].next[0] = i + 1; + if (++f->bb[i + 1].tmp > 2) eb++; + } + + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_NEXT"); + + /* Build backward connections, but first insert artificial blocks + * to handle more than 2 connections */ + cucdebug (6, "artificial %i %i\n", f->num_bb, eb); + end_bb = f->num_bb + eb; + for (i = f->num_bb - 1; i >= 0; i--) { + j = f->bb[i].tmp; + if (f->bb[i].tmp > 2) f->bb[i].tmp = -f->bb[i].tmp; + f->bb[--end_bb] = f->bb[i]; + reloc[i] = end_bb; + while (j-- > 2) { + f->bb[--end_bb].first = f->bb[i].first; + f->bb[end_bb].last = -1; + f->bb[end_bb].next[0] = -1; + f->bb[end_bb].next[1] = -1; + f->bb[end_bb].tmp = 0; + f->bb[end_bb].cnt = f->bb[i].cnt; + f->bb[end_bb].ntim = 0; + } + } + f->num_bb += eb; + + /* relocate jump instructions */ + for (i = 0; i < num_insn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_BB) + insn[i].op[j] = reloc[insn[i].op[j]]; + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT-reloc"); + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].next[0] >= 0) { + int t = reloc[f->bb[i].next[0]]; + if (f->bb[t].tmp < 0) { + f->bb[t].tmp = -f->bb[t].tmp; + t -= f->bb[t].tmp - 2; + } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2; + f->bb[i].next[0] = t; + } + if (f->bb[i].next[1] >= 0) { + int t = reloc[f->bb[i].next[1]]; + if (f->bb[t].tmp < 0) { + f->bb[t].tmp = -f->bb[t].tmp; + t -= f->bb[t].tmp - 2; + } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2; + f->bb[i].next[1] = t; + } + /* artificial blocks do not have relocations, hardcode them */ + if (f->bb[i].last < 0) f->bb[i].next[0] = i + 1; + } + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT"); + + /* Uncoditional branched do not continue to next block */ + for (i = 0; i < f->num_bb; i++) { + cuc_insn *ii; + if (f->bb[i].last < 0) continue; + ii = &insn[f->bb[i].last]; + /* Unconditional branch? */ + if (ii->type & IT_BRANCH && ii->opt[1] & OPT_CONST) { + change_insn_type (ii, II_NOP); +#if 0 + if (f->bb[i].next[1] == i + 1) f->bb[i].next[0] = f->bb[i].next[1]; +#endif + f->bb[i].next[1] = -1; + } + } + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_UNCOND_JUMP"); + + /* Add backward connections */ + for (i = 0; i < f->num_bb; i++) + f->bb[i].prev[0] = f->bb[i].prev[1] = -1; + + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].next[0] >= 0) { + int t = f->bb[i].next[0]; + if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i; + else { + assert (f->bb[t].prev[1] < 0); + f->bb[t].prev[1] = i; + } + } + if (f->bb[i].next[1] >= 0) { + int t = f->bb[i].next[1]; + if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i; + else { + assert (f->bb[t].prev[1] < 0); + f->bb[t].prev[1] = i; + } + } + } + /* Add START marker */ + assert (f->bb[0].prev[0] < 0); + f->bb[0].prev[0] = BBID_START; + + /* Add END marker */ + assert (f->bb[f->num_bb - 1].next[0] < 0); + assert (f->bb[f->num_bb - 1].next[1] < 0); + f->bb[f->num_bb - 1].next[0] = BBID_END; + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_PREV"); +} + +/* We do a quick check if there are some anomalies with references */ +void cuc_check (cuc_func *f) +{ + int i, j = 0, k = 0; + cucdebug (1, "cuc_check\n"); + for (i = 0; i < f->num_bb; i++) { + if (!f->bb[i].insn && f->bb[i].ninsn) goto err; + for (j = 0; j < f->bb[i].ninsn; j++) { + cuc_insn *ii = &f->bb[i].insn[j]; + if ((ii->index == II_CMOV || ii->index == II_ADD) && ii->type & IT_COND && ii->opt[0] & OPT_DEST) { + k = 0; + assert (ii->opt[k] & OPT_REGISTER); + if ((signed)ii->op[k] >= 0 && ii->op[k] != FLAG_REG && ii->op[k] != LRBB_REG) { + cucdebug (1, "Invalid dest conditional type opt%x op%lx\n", ii->opt[0], ii->op[0]); + goto err; + } + } + for (k = 0; k < MAX_OPERANDS; k++) { + if (ii->opt[k] & OPT_REF) { + int t = ii->op[k]; + if (REF_BB(t) >= f->num_bb || REF_I (t) >= f->bb[REF_BB(t)].ninsn || + ((ii->index == II_CMOV || ii->index == II_ADD) && + (((f->INSN(t).type & IT_COND) != (ii->type & IT_COND) && k < 3) || + ((!(f->INSN(t).type & IT_COND) && k == 3))))) { + cucdebug (1, "Conditional misused\n"); + goto err; + } + } + if (k && ii->opt[k] & OPT_DEST) { + cucdebug (1, "Destination only allowed for op0!\n"); + goto err; + } + } + } + } + return; +err: + cucdebug (1, "Anomaly detected at [%x_%x].%i\n", i, j, k); + print_cuc_bb (f, "ANOMALY"); + cucdebug (1, "Anomaly detected at [%x_%x].%i\n", i, j, k); + exit (1); +} + +/* Build basic blocks */ +void build_bb (cuc_func *f) +{ + int i, j, k; + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].last < 0) f->bb[i].ninsn = MAX_REGS - 1; + else f->bb[i].ninsn = f->bb[i].last - f->bb[i].first + 1 + MAX_REGS - 1; + assert (f->bb[i].ninsn >= MAX_REGS - 1); + f->bb[i].insn = (cuc_insn *) malloc (sizeof (cuc_insn) * f->bb[i].ninsn); + assert (f->bb[i].insn); + f->bb[i].nmemory = 0; + f->bb[i].unrolled = 1; + + /* Save space for conditional moves, exclude r0, place lrbb instead */ + change_insn_type (&f->bb[i].insn[0], II_LRBB); + strcpy (f->bb[i].insn[0].disasm, "lrbb"); + f->bb[i].insn[0].type = IT_UNUSED | IT_COND; + f->bb[i].insn[0].dep = NULL; + f->bb[i].insn[0].op[0] = LRBB_REG; f->bb[i].insn[0].opt[0] = OPT_REGISTER | OPT_DEST; + f->bb[i].insn[0].opt[1] = OPT_LRBB; + f->bb[i].insn[0].opt[2] = f->bb[i].insn[0].opt[3] = OPT_NONE; + for (j = 1; j < MAX_REGS - 1; j++) { + change_insn_type (&f->bb[i].insn[j], II_CMOV); + strcpy (f->bb[i].insn[j].disasm, "cmov"); + f->bb[i].insn[j].type = j == FLAG_REG || j == LRBB_REG ? IT_COND : 0; + f->bb[i].insn[j].dep = NULL; + f->bb[i].insn[j].opt[0] = f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_REGISTER; + f->bb[i].insn[j].opt[0] |= OPT_DEST; + f->bb[i].insn[j].op[0] = f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = j; + f->bb[i].insn[j].op[3] = LRBB_REG; f->bb[i].insn[j].opt[3] = OPT_REGISTER; + } + + /* Relocate instructions */ + for (j = MAX_REGS - 1; j < f->bb[i].ninsn; j++) { + f->bb[i].insn[j] = insn[f->bb[i].first + j - (MAX_REGS - 1)]; + for (k = 0; k < MAX_OPERANDS; k++) + if (f->bb[i].insn[j].opt[k] & OPT_REF) { + int b1; + for (b1 = 0; b1 < i; b1++) + if (f->bb[b1].first <= (signed) f->bb[i].insn[j].op[k] + && (signed)f->bb[i].insn[j].op[k] <= f->bb[b1].last) break; + assert (b1 < f->num_bb); + f->bb[i].insn[j].op[k] = REF (b1, f->bb[i].insn[j].op[k] - f->bb[b1].first + MAX_REGS - 1); + } + if (f->bb[i].insn[j].type & IT_MEMORY) f->bb[i].nmemory++; + } + } + cuc_check (f); +} + +/* Does simplification on blocks A, B, C: + A->B->C, A->C to just A->B->C */ +static void simplify_bb (cuc_func *f, int pred, int s1, int s2, int neg) +{ + cuc_insn *last; + int i; + if (cuc_debug >= 3) print_cuc_bb (f, "BEFORE_SIMPLIFY"); + cucdebug (3, "simplify %x->%x->%x (%i)\n", pred, s1, s2, neg); + assert (s2 != pred); /* Shouldn't occur => stupid */ + f->bb[pred].next[1] = -1; + f->bb[pred].next[0] = s1; + + if (f->bb[s2].prev[0] == pred) { + f->bb[s2].prev[0] = f->bb[s2].prev[1]; + f->bb[s2].prev[1] = -1; + } else if (f->bb[s2].prev[1] == pred) { + f->bb[s2].prev[1] = -1; + } else assert (0); + + last = &f->bb[pred].insn[f->bb[pred].ninsn - 1]; + assert (last->type & IT_BRANCH); + for (i = 0; i < f->bb[s2].ninsn; i++) { + cuc_insn *ii= &f->bb[s2].insn[i]; + if (ii->index == II_LRBB) { + change_insn_type (ii, II_CMOV); + ii->type = IT_COND; + ii->op[1] = neg ? 0 : 1; ii->opt[1] = OPT_CONST; + ii->op[2] = neg ? 1 : 0; ii->opt[2] = OPT_CONST; + ii->op[3] = last->op[1]; ii->opt[3] = last->opt[1]; + } + } + change_insn_type (last, II_NOP); + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_SIMPLIFY"); +} + +/* type == 0; keep predecessor condition + * type == 1; keep successor condition + * type == 2; join loop unrolled blocks */ +static void join_bb (cuc_func *f, int pred, int succ, int type) +{ + int i, j, k, n1, n2, ninsn, add_cond = 0; + unsigned long cond_op = 0, cond_opt = 0; + cuc_insn *insn; + + if (cuc_debug) cuc_check (f); + cucdebug (3, "%x <= %x+%x (%i)\n", pred, pred, succ, type); + cucdebug (3, "%x %x\n", f->bb[pred].ninsn, f->bb[succ].ninsn); + if (cuc_debug >= 3) fflush (stdout); + + n1 = f->bb[pred].ninsn; + n2 = f->bb[succ].ninsn; + if (n1 <= 0 + || !(f->bb[pred].insn[n1 - 1].type & IT_BRANCH)) type = 1; + if (type == 0 && f->bb[succ].prev[0] == f->bb[succ].next[0]) add_cond = 1; + if (type == 2) add_cond = 1; + + //assert (f->bb[pred].next[0] == f->bb[succ].next[0] || type != 2); /* not supported */ + + ninsn = n1 + n2 + (type == 1 ? 0 : 1) + (add_cond ? MAX_REGS : 0); + + insn = (cuc_insn *) malloc (ninsn * sizeof (cuc_insn)); + for (i = 0; i < n1; i++) insn[i] = f->bb[pred].insn[i]; + /* when type == 0, we move the last (jump) instruction to the end */ + if (type == 0 || type == 2) { + /* Move first branch instruction to the end */ + assert (insn[n1 - 1].type & IT_BRANCH); + insn[ninsn - 1] = insn[n1 - 1]; + cond_op = insn[n1 - 1].op[1]; + cond_opt = insn[n1 - 1].opt[1]; + + /* Remove old branch */ + change_insn_type (&insn[n1 - 1], II_NOP); + } + /* Copy second block */ + for (i = 0; i < n2; i++) insn[i + n1] = f->bb[succ].insn[i]; + + /* and when type == 2, we may need to add sfor instruction, to quit when either is true */ + if (type == 2) { + /* Move second branch instruction to the end */ + if (insn[n1 + n2 - 1].type & IT_BRANCH) { + insn[ninsn - 1] = insn[n1 + n2 - 1]; + + /* Use conditional from cmov FLAG_REG, c_p, c_s, c_p */ + insn[ninsn - 1].op[1] = REF (pred, n1 + n2 + FLAG_REG); insn[ninsn - 1].opt[1] = OPT_REF; + + /* Remove old one */ + change_insn_type (&insn[n1 + n2 - 1], II_NOP); + } else change_insn_type (&insn[ninsn - 1], II_NOP); /* do not use branch slot */ + } + +#if 1 + /* LRBB at start of succ BB is not valid anymore */ + if (n1 > 0 && insn[n1].index == II_LRBB) { + if (type == 1) { + /* We have two possibilities, how this could have happened: + 1. we just moved second predecessor of succ to pred, + pred now having two predecessors => everything is ok + 2. we just moved second predecessor of succ to pred, + now, having just one predecessor => LRBB is not needed anymore */ + if (f->bb[pred].prev[1] < 0) { /* handle second option */ + change_insn_type (&insn[n1], II_ADD); + insn[n1].op[1] = 1; insn[n1].opt[1] = OPT_CONST; + insn[n1].op[2] = 0; insn[n1].opt[2] = OPT_CONST; + insn[n1].opt[3] = OPT_NONE; + } + } else { + assert (0); /* not tested yet */ + change_insn_type (&insn[n1], II_NOP); + for (i = n1; i < ninsn; i++) + if (insn[i].index == II_CMOV && insn[i].op[3] == REF (pred, n1)) { + assert (insn[i].opt[3] == OPT_REF); + insn[i].op[3] = cond_op; + insn[i].opt[3] = cond_opt; + if (f->bb[pred].next[0] != succ) { + unsigned long t; /* negate conditional -- exchange */ + assert (f->bb[pred].next[1] == succ); + t = insn[i].op[1]; + insn[i].op[1] = insn[i].op[2]; + insn[i].op[2] = t; + t = insn[i].opt[1]; + insn[i].opt[1] = insn[i].opt[2]; + insn[i].opt[2] = t; + } + } + } + } +#endif + + for (i = 0; i < ninsn; i++) reloc[i] = -1; + + /* Add conditional instructions if required */ + if (add_cond) { + recalc_last_used_reg (f, pred); + recalc_last_used_reg (f, succ); + + /* r0 -- add nop for it */ + change_insn_type (&insn[n1 + n2], II_NOP); + for (i = 1; i < MAX_REGS; i++) { + cuc_insn *ii = &insn[n1 + n2 + i]; + int a = f->bb[pred].last_used_reg[i]; + int b = f->bb[succ].last_used_reg[i]; + + /* We have deleted first branch instruction, now we must setup FLAG_REG, + to point to conditional */ + if (i == FLAG_REG) { + change_insn_type (ii, II_CMOV); + ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0; + ii->dep = NULL; + ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = cond_op; ii->opt[1] = cond_opt; + if (b >= 0) { + ii->op[2] = b; ii->opt[2] = OPT_REF; + } else { + ii->op[2] = cond_op; ii->opt[2] = cond_opt; + } + ii->op[3] = cond_op; ii->opt[3] = cond_opt; + reloc[REF_I(a)] = REF (pred, n1 + n2 + i); + } else if (b < 0) change_insn_type (ii, II_NOP); + else if (a < 0) { + change_insn_type (ii, II_ADD); + ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0; + ii->dep = NULL; + ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = b; ii->opt[1] = OPT_REF; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + } else if (b >= 0) { + change_insn_type (ii, II_CMOV); + ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0; + ii->dep = NULL; + ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = a; ii->opt[1] = OPT_REF; + ii->op[2] = b; ii->opt[2] = OPT_REF; + ii->op[3] = cond_op; ii->opt[3] = cond_opt; + reloc[REF_I(a)] = REF (pred, n1 + n2 + i); + } + sprintf (ii->disasm, "cmov (join BB)"); + } + } + + if (cuc_debug) cuc_check (f); + i = 0; + switch (type) { + case 0: + assert (f->bb[pred].next[0] >= 0); + if (f->bb[pred].next[0] == succ) f->bb[pred].next[0] = f->bb[succ].next[0]; + if (f->bb[pred].next[1] == succ) f->bb[pred].next[1] = f->bb[succ].next[0]; + break; + case 1: + assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END); + f->bb[pred].next[0] = f->bb[succ].next[0]; + f->bb[pred].next[1] = f->bb[succ].next[1]; + break; + case 2: + assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END); + f->bb[pred].next[0] = f->bb[succ].next[0]; + f->bb[pred].next[1] = f->bb[succ].next[1]; + break; + } + if (f->bb[pred].next[0] < 0) f->bb[pred].next[0] = f->bb[pred].next[1]; + if (f->bb[pred].next[0] == f->bb[pred].next[1]) f->bb[pred].next[1] = -1; + + if (type == 0) assert (f->bb[succ].next[1] < 0); + + /* We just did something stupid -- we joined two predecessors into one; + succ may need the information from which block we came. We will repair + this by converting LRBB to CMOV */ + for (j = 0; j < 2; j++) { + int nb = f->bb[pred].next[j]; + int t; + + /* check just valid connections */ + if (nb < 0 || nb == BBID_END) continue; + + /* check type */ + if (f->bb[nb].prev[0] == pred && f->bb[nb].prev[1] == succ) t = 1; + else if (f->bb[nb].prev[1] == pred && f->bb[nb].prev[0] == succ) t = 0; + else continue; + + /* check all LRBB instructions. */ + for (i = 0; i < f->bb[nb].ninsn; i++) + if (f->bb[nb].insn[i].index == II_LRBB) { + cuc_insn *lrbb =&f->bb[nb].insn[i]; + change_insn_type (lrbb, II_CMOV); + lrbb->op[1] = t; lrbb->opt[1] = OPT_CONST; + lrbb->op[2] = 1 - t; lrbb->opt[2] = OPT_CONST; + lrbb->op[3] = cond_op; lrbb->opt[3] = cond_opt; + lrbb->type |= IT_COND; + } + } + + f->bb[succ].type = BB_DEAD; + //PRINTF (" %x %x %x %x %x\n", f->bb[pred].next[0], f->bb[pred].next[1], f->bb[succ].next[0], f->bb[succ].next[1], insn[ninsn - 1].type); + /* remove branch instruction, if there is only one successor */ + if (f->bb[pred].next[1] < 0 && ninsn > 0 && insn[ninsn - 1].type & IT_BRANCH) { + assert (f->bb[pred].next[0] != pred); /* end BB, loop should not be possible */ + change_insn_type (&insn[ninsn - 1], II_NOP); + } + + /* Set max count */ + if (f->bb[pred].cnt < f->bb[succ].cnt) f->bb[pred].cnt = f->bb[succ].cnt; + f->bb[pred].ninsn = ninsn; + f->bb[succ].ninsn = 0; + free (f->bb[pred].insn); f->bb[pred].insn = NULL; + free (f->bb[succ].insn); f->bb[succ].insn = NULL; + f->bb[pred].insn = insn; + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) { + if (f->bb[i].prev[0] == succ) f->bb[i].prev[0] = pred; + if (f->bb[i].prev[1] == succ) f->bb[i].prev[1] = pred; + if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1; + for (j = 0; j < f->bb[i].ninsn; j++) + for (k = 0; k < MAX_OPERANDS; k++) + if (f->bb[i].insn[j].opt[k] & OPT_REF) { + /* Check if we are referencing successor BB -> relocate to second part of + the new block */ + if (REF_BB (f->bb[i].insn[j].op[k]) == succ) { + int t = f->bb[i].insn[j].op[k]; + int ndest = REF (pred, REF_I (t) + n1); + //PRINTF ("%x: %x %x\n", REF(i, j), t, ndest); + + /* We've found a reference to succ. block, being removed, relocate */ + f->bb[i].insn[j].op[k] = ndest; + } else if (REF_BB(f->bb[i].insn[j].op[k]) == pred) { + if (i != pred && reloc[REF_I(f->bb[i].insn[j].op[k])] >= 0) { + f->bb[i].insn[j].op[k] = reloc[REF_I(f->bb[i].insn[j].op[k])]; + } + } + } + } + + if (cuc_debug) cuc_check (f); + if (cuc_debug >= 3) print_cuc_bb (f, "join"); +} + +/* Optimize basic blocks */ +int optimize_bb (cuc_func *f) +{ + int modified = 0; + int i, j; +remove_lrbb: + /* we can remove lrbb instructions from blocks with just one predecessor */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) { + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* exactly one predecessor */ + for (j = 0; j < f->bb[i].ninsn; j++) + if (f->bb[i].insn[j].index == II_LRBB) { + cuc_insn *t; + cucdebug (4, "-lrbb %x.%x\n", i, j); + + /* Change to add LRBB, 0, 0 */ + change_insn_type (&f->bb[i].insn[j], II_ADD); + f->bb[i].insn[j].type &= ~IT_VOLATILE; + f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_CONST; + f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = 0; /* always use left block */ + f->bb[i].insn[j].opt[3] = OPT_NONE; + modified = 1; + if (f->bb[i].prev[0] != BBID_START && f->bb[f->bb[i].prev[0]].ninsn > 0) { + t = &f->bb[f->bb[i].prev[0]].insn[f->bb[f->bb[i].prev[0]].ninsn - 1]; + + /* If the predecessor still has a conditional jump instruction, we must be careful. + If next[0] == next[1] join them. Now we will link lrbb and correct the situation */ + if (t->type & IT_BRANCH) { /* We must set a reference to branch result */ + f->bb[i].insn[j].opt[1] = t->opt[1]; + f->bb[i].insn[j].op[1] = t->op[1]; + /* sometimes branch is not needed anymore */ + if (f->bb[f->bb[i].prev[0]].next[1] < 0) change_insn_type (t, II_NOP); + } + } + } + } + } + + /* Ordering of joining types is cruical -- we should concat all directly connected BBs + together first, so when we do a type != 1 joining, we can remove LRBB, directly by + looking at number of its predeccessors */ + + /* Type 1 joining + 1. link between pred & succ + 2. no other pred's successors + 3. no other succ's predecessors, except if pred has max one */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) { + int p = f->bb[i].prev[0]; + if (p < 0 || p == BBID_START) continue; + /* one successor and max sum of 3 predecessors */ + if (f->bb[p].next[0] >= 0 && f->bb[p].next[1] < 0 + && (f->bb[p].prev[1] < 0 || f->bb[i].prev[1] < 0)) { + /* First we will move all predecessors from succ to pred, and then we will do + real type 1 joining */ + if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START) { + int p1 = f->bb[i].prev[1]; + /* joining is surely not worth another extra memory access */ + if (f->bb[p].nmemory) continue; + if (f->bb[p].prev[0] >= 0) { + assert (f->bb[p].prev[1] < 0); + f->bb[p].prev[1] = p1; + } else f->bb[p].prev[0] = p1; + if (f->bb[p1].next[0] == i) f->bb[p1].next[0] = p; + else if (f->bb[p1].next[1] == i) f->bb[p1].next[1] = p; + else assert (0); + f->bb[i].prev[1] = -1; + } + assert (p >= 0 && f->bb[i].prev[1] < 0); /* one predecessor */ + join_bb (f, p, i, 1); + modified = 1; + goto remove_lrbb; + } + } + + /* Type 0 joining + 1. link between pred & succ + 2. no memory accesses in succ + 3. optional pred's second successors + 4. max. one succ's successors */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START + && f->bb[i].prev[1] < 0 /* one predecessor */ + && f->bb[i].next[1] < 0 /* max. one successor */ + && f->bb[i].nmemory == 0) { /* and no memory acceses */ + join_bb (f, f->bb[i].prev[0], i, 0); + modified = 1; + goto remove_lrbb; + } + + /* Type 2 joining + 1. link between pred & succ + 2. succ has exactly one predeccessor + 3. pred & succ share common successor + 4. optional succ's second successor */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* one predecessor */ + int p = f->bb[i].prev[0]; + if (p == BBID_START) continue; +#if 0 /* not yet supported */ + if (f->bb[p].next[0] == i + && (f->bb[i].next[1] == f->bb[p].next[1] + || f->bb[i].next[1] == f->bb[p].next[0])) { + join_bb (f, p, i, 2); + goto remove_lrbb; + } +#endif + if (f->bb[p].next[1] == i + && (f->bb[p].next[0] == f->bb[i].next[1] + || f->bb[p].next[0] == f->bb[i].next[0])) { + join_bb (f, p, i, 2); + modified = 1; + goto remove_lrbb; + } + } + + /* BB simplify: + 1. a block has exactly 2 successors A and B + 2. A has exactly one successor -- B + 3. A has no memory accesses + to: + flow always goes though A, LRBB is replaced by current block conditional + */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) + if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END + && f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END) { + int a = f->bb[i].next[0]; + int b = f->bb[i].next[1]; + int neg = 0; + /* Exchange? */ + if (f->bb[b].next[0] == a && f->bb[b].next[1] < 0) { + int t = a; + a = b; + b = t; + neg = 1; + } + /* Do the simplification if possible */ + if (f->bb[a].next[0] == b && f->bb[a].next[1] < 0 + && f->bb[a].nmemory == 0) { + simplify_bb (f, i, a, b, neg); + modified = 1; + goto remove_lrbb; + } + } + + return modified; +} + +/* Removes BBs marked as dead */ +int remove_dead_bb (cuc_func *f) +{ + int i, j, k, d = 0; + + for (i = 0; i < f->num_bb; i++) if (f->bb[i].type & BB_DEAD) { + if (f->bb[i].insn) free (f->bb[i].insn); + f->bb[i].insn = NULL; + reloc[i] = -1; + } else { + reloc[i] = d; + f->bb[d++] = f->bb[i]; + } + if (f->num_bb == d) return 0; + f->num_bb = d; + + /* relocate initial blocks */ + for (i = 0; i < f->num_init_bb; i++) + f->init_bb_reloc[i] = reloc[f->init_bb_reloc[i]]; + + /* repair references */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) { + cucdebug (5, "%x %x %x %x %x\n", i, f->bb[i].prev[0], f->bb[i].prev[1], f->bb[i].next[0], f->bb[i].next[1]); + fflush (stdout); + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START) + assert ((f->bb[i].prev[0] = reloc[f->bb[i].prev[0]]) >= 0); + if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START) + assert ((f->bb[i].prev[1] = reloc[f->bb[i].prev[1]]) >= 0); + if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END) + assert ((f->bb[i].next[0] = reloc[f->bb[i].next[0]]) >= 0); + if (f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END) + assert ((f->bb[i].next[1] = reloc[f->bb[i].next[1]]) >= 0); + if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1; + if (f->bb[i].next[0] == f->bb[i].next[1]) f->bb[i].next[1] = -1; + + for (j = 0; j < f->bb[i].ninsn; j++) + for (k = 0; k < MAX_OPERANDS; k++) + if ((f->bb[i].insn[j].opt[k] & OPT_BB) && + ((signed)f->bb[i].insn[j].op[k] >= 0)) { + if (f->bb[i].insn[j].op[k] != BBID_END) + assert ((f->bb[i].insn[j].op[k] = reloc[f->bb[i].insn[j].op[k]]) >= 0); + } else if (f->bb[i].insn[j].opt[k] & OPT_REF) { + int t = f->bb[i].insn[j].op[k]; + assert (reloc[REF_BB(t)] >= 0); + f->bb[i].insn[j].op[k] = REF (reloc[REF_BB(t)], REF_I (t)); + } + } + return 1; +} + +/* Recursive calculation of dependencies */ +static void reg_dep_rec (cuc_func *f, int cur) +{ + int i, j; + cuc_insn *insn = f->bb[cur].insn; + + //PRINTF ("\n %i", cur); + /* Spread only, do not loop */ + if (f->bb[cur].tmp) return; + f->bb[cur].tmp = 1; + //PRINTF ("! "); + + for (i = 0; i < f->bb[cur].ninsn; i++) { + /* Check for destination operand(s) */ + for (j = 0; j < MAX_OPERANDS; j++) if (insn[i].opt[j] & OPT_DEST) + if ((insn[i].opt[j] & ~OPT_DEST) == OPT_REGISTER && (signed)insn[i].op[j] >= 0) { + //PRINTF ("%i:%i,%x ", insn[i].op[j], i, REF (cur, i)); + assert (insn[i].op[j] > 0 && insn[i].op[j] < MAX_REGS); /* r0 should never be dest */ + f->bb[cur].last_used_reg[insn[i].op[j]] = REF (cur, i); + } + } + + if (f->bb[cur].next[0] >= 0 && f->bb[cur].next[0] != BBID_END) + reg_dep_rec (f, f->bb[cur].next[0]); + if (f->bb[cur].next[1] >= 0 && f->bb[cur].next[1] != BBID_END) + reg_dep_rec (f, f->bb[cur].next[1]); +} + +/* Detect register dependencies */ +void reg_dep (cuc_func *f) +{ + int i, b, c; + + /* Set dead blocks */ + for (b = 0; b < f->num_bb; b++) { + f->bb[b].tmp = 0; + for (i = 0; i < MAX_REGS; i++) f->bb[b].last_used_reg[i] = -1; + } + + /* Start with first block and set dependecies of all reachable blocks */ + /* At the same time set last_used_regs */ + reg_dep_rec (f, 0); + + for (i = 0; i < f->num_bb; i++) + if (f->bb[i].tmp) f->bb[i].tmp = 0; + else f->bb[i].type |= BB_DEAD; + + /* Detect loops; mark BBs where loops must be broken */ + for (c = 0; c < f->num_bb; c++) { + int min = 3, minb = 0; + + /* search though all non-visited for minimum number of unvisited predecessors */ + for (b = 0; b < f->num_bb; b++) if (!f->bb[b].tmp) { + int tmp = 0; + if (f->bb[b].prev[0] >= 0 && f->bb[b].prev[0] != BBID_START + && !f->bb[f->bb[b].prev[0]].tmp) tmp++; + if (f->bb[b].prev[1] >= 0 && f->bb[b].prev[1] != BBID_START + && !f->bb[f->bb[b].prev[1]].tmp) tmp++; + if (tmp < min) { + minb = b; + min = tmp; + if (tmp == 0) break; /* We already have the best one */ + } + } + b = minb; + f->bb[b].tmp = 1; /* Mark visited */ + cucdebug (3, "minb %i min %i\n", minb, min); + if (min) { /* We just broke the loop */ + f->bb[b].type |= BB_INLOOP; + } + } + + /* Set real predecessors in cmov instructions to previous blocks */ + for (b = 0; b < f->num_bb; b++) + for (i = 1; i < MAX_REGS - 1; i++) { + int pa, pb; + assert (f->bb[b].insn[i].index == II_CMOV); + assert (f->bb[b].insn[i].opt[0] == (OPT_REGISTER | OPT_DEST)); + assert (f->bb[b].insn[i].op[0] == i); + if (f->bb[b].prev[0] < 0 || f->bb[b].prev[0] == BBID_START) pa = -1; + else pa = f->bb[f->bb[b].prev[0]].last_used_reg[i]; + if (f->bb[b].prev[1] < 0 || f->bb[b].prev[1] == BBID_START) pb = -1; + else pb = f->bb[f->bb[b].prev[1]].last_used_reg[i]; + + /* We do some very simple optimizations right away to make things more readable */ + if (pa < 0 && pb < 0) { + /* Was not used at all */ + change_insn_type (&f->bb[b].insn[i], II_ADD); + f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST; + f->bb[b].insn[i].opt[3] = OPT_NONE; + } else if (pa < 0) { + change_insn_type (&f->bb[b].insn[i], II_ADD); + assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST)); + f->bb[b].insn[i].op[1] = pb; f->bb[b].insn[i].opt[1] = OPT_REF; + f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST; + f->bb[b].insn[i].opt[3] = OPT_NONE; + } else if (pb < 0) { + change_insn_type (&f->bb[b].insn[i], II_ADD); + assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST)); + f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF; + f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST; + f->bb[b].insn[i].opt[3] = OPT_NONE; + } else { + int t = REF (b, 0); /* lrbb should be first instruction */ + assert (f->INSN(t).index == II_LRBB); + + f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF; + assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST)); + + f->bb[b].insn[i].op[2] = pb; f->bb[b].insn[i].opt[2] = OPT_REF; + assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST)); + + /* Update op[3] -- flag register */ + assert (f->bb[b].insn[i].opt[3] == OPT_REGISTER); + assert (f->bb[b].insn[i].op[3] == LRBB_REG); + assert (t >= 0); + f->bb[b].insn[i].opt[3] = OPT_REF; /* Convert already used regs to references */ + f->bb[b].insn[i].op[3] = t; + assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST)); + } + } + + /* assign register references */ + for (b = 0; b < f->num_bb; b++) { + /* rebuild last used reg array */ + f->bb[b].last_used_reg[0] = -1; + if (f->bb[b].insn[0].index == II_LRBB) f->bb[b].last_used_reg[LRBB_REG] = 0; + else f->bb[b].last_used_reg[LRBB_REG] = -1; + + for (i = 1; i < MAX_REGS - 1; i++) + f->bb[b].last_used_reg[i] = -1; + + /* Create references */ + for (i = 0; i < f->bb[b].ninsn; i++) { + int k; + /* Check for source operands first */ + for (k = 0; k < MAX_OPERANDS; k++) { + if (!(f->bb[b].insn[i].opt[k] & OPT_DEST)) { + if (f->bb[b].insn[i].opt[k] & OPT_REGISTER) { + int t = f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]]; + + if (f->bb[b].insn[i].op[k] == 0) { /* Convert r0 to const0 */ + f->bb[b].insn[i].opt[k] = OPT_CONST; + f->bb[b].insn[i].op[k] = 0; + } else if (t >= 0) { + f->bb[b].insn[i].opt[k] = OPT_REF; /* Convert already used regs to references */ + f->bb[b].insn[i].op[k] = t; + assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST)); + //f->INSN(t).op[0] = -1; + } + } else if (f->bb[b].insn[i].opt[k] & OPT_REF) { + //f->INSN(f->bb[b].insn[i].op[k]).op[0] = -1; /* Mark referenced */ + f->INSN(f->bb[b].insn[i].op[k]).type &= ~IT_UNUSED; + } + } + } + + /* Now check for destination operand(s) */ + for (k = 0; k < MAX_OPERANDS; k++) if (f->bb[b].insn[i].opt[k] & OPT_DEST) + if ((f->bb[b].insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER + && (int)f->bb[b].insn[i].op[k] >= 0) { + assert (f->bb[b].insn[i].op[k] != 0); /* r0 should never be dest */ + f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]] = REF (b, i); + } + } + } + + /* Remove all unused lrbb */ + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_UNUSED) change_insn_type (&f->bb[b].insn[i], II_NOP); + + /* SSAs with final register value are marked as outputs */ + assert (f->bb[f->num_bb - 1].next[0] == BBID_END); + for (i = 0; i < MAX_REGS; i++) if (!caller_saved[i]) { + int t = f->bb[f->num_bb - 1].last_used_reg[i]; + /* Mark them volatile, so optimizer does not remove them */ + if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT; + } + { + int t = f->bb[f->num_bb - 1].last_used_reg[i]; + /* Mark them volatile, so optimizer does not remove them */ + if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT; + } +} + +/* split the BB, based on the group numbers in .tmp */ +void expand_bb (cuc_func *f, int b) +{ + int n = f->num_bb; + int mg = 0; + int b1, i, j; + + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp; + + /* Create copies */ + for (b1 = 1; b1 <= mg; b1++) { + assert (f->num_bb < MAX_BB); + cpy_bb (&f->bb[f->num_bb], &f->bb[b]); + f->num_bb++; + } + + /* Relocate */ + for (b1 = 0; b1 < f->num_bb; b1++) + for (i = 0; i < f->bb[b1].ninsn; i++) { + dep_list *d = f->bb[b1].insn[i].dep; + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b1].insn[i].opt[j] & OPT_REF) { + int t = f->bb[b1].insn[i].op[j]; + if (REF_BB(t) == b && f->INSN(t).tmp != 0) + f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t)); + } + while (d) { + if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0) + d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref)); + d = d->next; + } + } + + /* Delete unused instructions */ + for (j = 0; j <= mg; j++) { + if (j == 0) b1 = b; + else b1 = n + j - 1; + for (i = 0; i < f->bb[b1].ninsn; i++) { + if (f->bb[b1].insn[i].tmp != j) + change_insn_type (&f->bb[b1].insn[i], II_NOP); + f->bb[b1].insn[i].tmp = 0; + } + if (j < mg) { + f->bb[b1].next[0] = n + j; + f->bb[b1].next[1] = -1; + f->bb[n + j].prev[0] = b1; + f->bb[n + j].prev[1] = -1; + } else { + i = f->bb[b1].next[0]; + f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1; + f->bb[n + j].prev[1] = -1; + if (i >= 0 && i != BBID_END) { + if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; + if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; + } + i = f->bb[b1].next[1]; + if (i >= 0 && i != BBID_END) { + if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; + if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; + } + } + } +} + +/* Scans sequence of BBs and set bb[].cnt */ +void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename) +{ + FILE *fi, *fo; + struct mprofentry_struct *buf; + const int bufsize = 256; + unsigned long *bb_start; + unsigned long *bb_end; + int b, i, r; + int curbb, prevbb = -1; + unsigned long addr = -1; + unsigned long prevaddr = -1; + int mssum = 0; + int mlsum = 0; + int mscnt = 0; + int mlcnt = 0; + int reopened = 0; + + /* Use already opened stream? */ + if (runtime.sim.fmprof) { + fi = runtime.sim.fmprof; + reopened = 1; + rewind (fi); + } else assert (fi = fopen (mp_filename, "rb")); + assert (fo = fopen (bb_filename, "wb+")); + + assert (bb_start = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb)); + assert (bb_end = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb)); + for (b = 0; b < f->num_bb; b++) { + bb_start[b] = f->start_addr + f->bb[b].first * 4; + bb_end[b] = f->start_addr + f->bb[b].last * 4; + //PRINTF ("%i %x %x\n", b, bb_start[b], bb_end[b]); + f->bb[0].cnt = 0; + } + + buf = (struct mprofentry_struct *) malloc (sizeof (struct mprofentry_struct) * bufsize); + assert (buf); + + //PRINTF ("BBSEQ:\n"); + do { + r = fread (buf, sizeof (struct mprofentry_struct), bufsize, fi); + //PRINTF ("r%i : ", r); + for (i = 0; i < r; i++) { + if (buf[i].type & MPROF_FETCH) { + //PRINTF ("%x, ", buf[i].addr); + if (buf[i].addr >= f->start_addr && buf[i].addr <= f->end_addr) { + assert (buf[i].type & MPROF_32); + prevaddr = addr; + addr = buf[i].addr; + for (b = 0; b < f->num_bb; b++) + if (bb_start[b] <= addr && addr <= bb_end[b]) break; + assert (b < f->num_bb); + curbb = b; + if (prevaddr + 4 != addr) prevbb = -1; + } else curbb = -1; + + /* TODO: do not count interrupts */ + if (curbb != prevbb && curbb >= 0) { + fwrite (&curbb, sizeof (unsigned long), 1, fo); + //PRINTF (" [%i] ", curbb); + f->bb[curbb].cnt++; + prevbb = curbb; + } + } else { + if (verify_memoryarea(buf[i].addr)) { + if (buf[i].type & MPROF_WRITE) mscnt++, mssum += cur_area->ops.delayw; + else mlcnt++, mlsum += cur_area->ops.delayr; + } + } + } + //PRINTF ("\n"); + } while (r == bufsize); + //PRINTF ("\n"); + + runtime.cuc.mdelay[0] = (1. * mlsum) / mlcnt; + runtime.cuc.mdelay[1] = (1. * mssum) / mscnt; + runtime.cuc.mdelay[2] = runtime.cuc.mdelay[3] = 1; + f->num_runs = f->bb[0].cnt; + if (!reopened) fclose (fi); + fclose (fo); + free (buf); + free (bb_end); + free (bb_start); + + /* Initialize basic block relocations */ + f->num_init_bb = f->num_bb; + //PRINTF ("num_init_bb = %i\n", f->num_init_bb); + assert (f->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb)); + for (b = 0; b < f->num_init_bb; b++) f->init_bb_reloc[b] = b; +} + +/* Scans sequence of BBs and set counts for pre/unrolled loop for BB b */ +void count_bb_seq (cuc_func *f, int b, char *bb_filename, int *counts, int preroll, int unroll) +{ + FILE *fi; + const int bufsize = 256; + int i, r; + int *buf; + int cnt = 0; + int times = preroll - 1 + unroll; + + assert (fi = fopen (bb_filename, "rb")); + for (i = 0; i < times; i++) counts[i] = 0; + assert (buf = (int *) malloc (sizeof (int) * bufsize)); + + do { + r = fread (buf, sizeof (int), bufsize, fi); + for (i = 0; i < r; i++) { + /* count consecutive acesses */ + if (f->init_bb_reloc[buf[i]] == b) { + counts[cnt]++; + if (++cnt >= times) cnt = preroll - 1; + } else cnt = 0; + } + } while (r == bufsize); + + log ("Counts %i,%i :", preroll, unroll); + for (i = 0; i < times; i++) log ("%x ", counts[i]); + log ("\n"); + + fclose (fi); + free (buf); +} + +/* relocate all accesses inside of BB b to back/fwd */ +static void relocate_bb (cuc_bb *bb, int b, int back, int fwd) +{ + int i, j; + for (i = 0; i < bb->ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (bb->insn[i].opt[j] & OPT_REF + && REF_BB (bb->insn[i].op[j]) == b) { + int t = REF_I (bb->insn[i].op[j]); + if (t < i) bb->insn[i].op[j] = REF (back, t); + else bb->insn[i].op[j] = REF (fwd, t); + } +} + +/* Preroll if type == 1 or unroll if type == 0 loop in BB b `ntimes' times and return + new function. Original function is unmodified. */ +static cuc_func *roll_loop (cuc_func *f, int b, int ntimes, int type) +{ + int b1, t, i, prevb, prevart_b; + cuc_func *n = dup_func (f); + cuc_bb *ob = &f->bb[b]; + cuc_insn *ii; + + assert (ntimes > 1); + cucdebug (3, "roll type = %i, BB%i x %i (num_bb %i)\n", type, b, ntimes, n->num_bb); + ntimes--; + assert (n->num_bb + ntimes * 2 < MAX_BB); + + prevb = b; + prevart_b = b; + + /* point to first artificial block */ + if (n->bb[b].next[0] != b) { + n->bb[b].next[0] = n->num_bb + 1; + } else if (n->bb[b].next[1] != b) { + n->bb[b].next[1] = n->num_bb + 1; + } + + /* Duplicate the BB */ + for (t = 0; t < ntimes; t++) { + cuc_bb *pb = &n->bb[prevart_b]; + /* Add new block and set links */ + b1 = n->num_bb++; + cpy_bb (&n->bb[b1], ob); + /* Only one should be in loop, so we remove any INLOOP flags from duplicates */ + n->bb[b1].type &= ~BB_INLOOP; + print_cuc_bb (n, "prerollA"); + + printf ("prevb %i b1 %i prevart %i\n", prevb, b1, prevart_b); + /* Set predecessor's successor */ + if (n->bb[prevb].next[0] == b) { + n->bb[prevb].next[0] = b1; + if (pb->next[0] < 0) pb->next[0] = b1 + 1; + else pb->next[1] = b1 + 1; + n->bb[b1].next[1] = b1 + 1; + } else if (n->bb[prevb].next[1] == b) { + if (pb->next[0] < 0) pb->next[0] = b1 + 1; + else pb->next[1] = b1 + 1; + n->bb[b1].next[0] = b1 + 1; + n->bb[prevb].next[1] = b1; + } else assert (0); + + /* Set predecessor */ + n->bb[b1].prev[0] = prevb; + n->bb[b1].prev[1] = -1; + + /* Relocate backward references to current instance and forward references + to previous one */ + relocate_bb (&n->bb[b1], b, b1, prevb); + + /* add artificial block, just to join accesses */ + b1 = n->num_bb++; + cpy_bb (&n->bb[b1], ob); + n->bb[b1].cnt = 0; + + for (i = 0; i < ob->ninsn - 1; i++) { + ii = &n->bb[b1].insn[i]; + if (ob->insn[i].opt[0] & OPT_DEST) { + change_insn_type (ii, II_CMOV); + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (prevart_b, i); ii->opt[1] = OPT_REF; + ii->op[2] = REF (b1 - 1, i); ii->opt[2] = OPT_REF; + + /* Take left one, if we should have finished the first iteration*/ + if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[3] = pb->insn[pb->ninsn - 1].op[1]; ii->opt[3] = pb->insn[pb->ninsn - 1].opt[1]; + } else { + assert (pb->insn[pb->ninsn - 1].type & IT_COND); + ii->op[3] = REF (prevart_b, pb->ninsn - 1); ii->opt[3] = OPT_REF; + } + ii->dep = NULL; + ii->type = ob->insn[i].type & IT_COND; + } else { + change_insn_type (ii, II_NOP); + } + } + + /* Add conditional or instruction at the end, prioritizing flags */ + ii = &n->bb[b1].insn[ob->ninsn - 1]; + change_insn_type (ii, II_CMOV); + ii->op[0] = FLAG_REG; ii->opt[0] = OPT_REGISTER | OPT_DEST; + if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[1] = pb->insn[pb->ninsn - 1].op[1]; + ii->opt[1] = pb->insn[pb->ninsn - 1].opt[1]; + } else { + ii->op[1] = REF (prevart_b, pb->ninsn - 1); + ii->opt[1] = OPT_REF; + } + if (n->bb[b1 - 1].insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].op[1]; + ii->opt[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].opt[1]; + } else { + ii->op[2] = REF (b1 - 1, pb->ninsn - 1); + ii->opt[2] = OPT_REF; + } + /* {z = x || y;} is same as {z = x ? x : y;} */ + ii->op[3] = ii->op[1]; ii->opt[3] = ii->opt[1]; + ii->type = IT_COND; + + /* Only one should be in loop, so we remove any INLOOP flags from duplicates */ + n->bb[b1].type &= ~BB_INLOOP; + n->bb[b1].prev[0] = prevart_b; + n->bb[b1].prev[1] = b1 - 1; + n->bb[b1].next[0] = -1; + n->bb[b1].next[1] = -1; + + prevb = b1 - 1; + prevart_b = b1; + print_cuc_bb (n, "prerollB"); + } + + print_cuc_bb (n, "preroll0"); + n->bb[prevart_b].next[0] = ob->next[0] == b ? ob->next[1] : ob->next[0]; + + print_cuc_bb (n, "preroll1"); + /* repair BB after loop, to point back to latest artificial BB */ + b1 = n->bb[prevart_b].next[0]; + if (b1 >= 0 && b1 != BBID_END) { + if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = prevart_b; + else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = prevart_b; + else assert (0); + } + + if (type) { + /* Relink to itself */ + /* Set predecessor's successor */ + if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = prevb; + else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = prevb; + else assert (0); + n->bb[prevb].prev[1] = prevb; + + /* Set predecessor */ + if (n->bb[b].prev[0] == b) { + n->bb[b].prev[0] = n->bb[b].prev[1]; + n->bb[b].prev[1] = -1; + } else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = -1; + else assert (0); + } else { + /* Relink back to start of the loop */ + /* Set predecessor's successor */ + if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = b; + else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = b; + else assert (0); + + /* Set predecessor */ + if (n->bb[b].prev[0] == b) n->bb[b].prev[0] = prevb; + else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = prevb; + else assert (0); + } + + print_cuc_bb (n, "preroll2"); + + /* Relocate backward references to current instance and forward references + to previous one */ + relocate_bb (&n->bb[b], b, b, prevb); + + /* Relocate all other blocks to point to latest prevart_b */ + for (i = 0; i < f->num_bb; i++) + if (i != b) relocate_bb (&n->bb[i], b, prevart_b, prevart_b); + + return n; +} + +/* Unroll loop b unroll times and return new function. Original + function is unmodified. */ +cuc_func *preunroll_loop (cuc_func *f, int b, int preroll, int unroll, char *bb_filename) +{ + int b1, i; + cuc_func *n, *t; + int *counts; + + if (preroll > 1) { + t = roll_loop (f, b, preroll, 1); + b1 = t->num_bb - 2; + if (unroll > 1) { + //print_cuc_bb (t, "preunroll1"); + n = roll_loop (t, b1, unroll, 0); + free_func (t); + } else n = t; + } else { + b1 = b; + if (unroll > 1) n = roll_loop (f, b1, unroll, 0); + else return dup_func (f); + } + + /* Assign new counts to functions */ + assert (counts = (int *)malloc (sizeof (int) * (preroll - 1 + unroll))); + count_bb_seq (n, b, bb_filename, counts, preroll, unroll); + for (i = 0; i < preroll - 1 + unroll; i++) { + if (i == 0) b1 = b; + else b1 = f->num_bb + (i - 1) * 2; + n->bb[b1].cnt = counts[i]; + } + + //print_cuc_bb (n, "preunroll"); + free (counts); + return n; +} +
bb.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: insn.c =================================================================== --- insn.c (nonexistent) +++ insn.c (revision 1765) @@ -0,0 +1,1426 @@ +/* insn.c -- OpenRISC Custom Unit Compiler, instruction support + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "abstract.h" +#include "sim-config.h" +#include "cuc.h" +#include "insn.h" + +/* Table of known instructions. Watch out for indexes I_*! */ +const cuc_known_insn known[II_LAST + 1] = { +{"add", 1, "assign \1 = \2 + \3;"}, +{"sub", 0, "assign \1 = \2 - \3;"}, +{"and", 1, "assign \1 = \2 & \3;"}, +{"or", 1, "assign \1 = \2 | \3;"}, +{"xor", 1, "assign \1 = \2 ^ \3;"}, +{"mul", 1, "assign \1 = \2 * \3;"}, + +{"srl", 0, "assign \1 = \2 >> \3;"}, +{"sll", 0, "assign \1 = \2 << \3;"}, +{"sra", 0, "assign \1 = ({32{\2[31]}} << (6'd32-{1'b0, \3}))\n\ + | \2 >> \3;"}, + +{"lb", 0, "always @(posedge clk)"}, +{"lh", 0, "always @(posedge clk)"}, +{"lw", 0, "always @(posedge clk)"}, +{"sb", 0, "/* mem8[\2] = \1 */"}, +{"sh", 0, "/* mem16[\2] = \1 */"}, +{"sw", 0, "/* mem32[\2] = \1 */"}, + +{"sfeq", 1, "assign \1 = \2 == \3;"}, +{"sfne", 1, "assign \1 = \2 != \3;"}, +{"sfle", 0, "assign \1 = \2 <= \3;"}, +{"sflt", 0, "assign \1 = \2 < \3;"}, +{"sfge", 0, "assign \1 = \2 >= \3;"}, +{"sfgt", 0, "assign \1 = \2 > \3;"}, +{"bf", 0, ""}, + +{"lrbb", 0,"always @(posedge clk or posedge rst)"}, +{"cmov", 0,"assign \1 = \4 ? \2 : \3;"}, +{"reg", 0, "always @(posedge clk)"}, + +{"nop", 1, ""}, +{"call", 0, "/* function call */"}}; + +/* Find known instruction and attach them to insn */ +void change_insn_type (cuc_insn *i, int index) +{ + int j; + assert (index >= 0 && index <= II_LAST); + i->index = index; + if (i->index == II_NOP) { + for (j = 0; j < MAX_OPERANDS; j++) i->opt[j] = OPT_NONE; + i->type = 0; + i->dep = NULL; + i->disasm[0] = '\0'; + } +} + +/* Returns instruction name */ +const char *cuc_insn_name (cuc_insn *ii) { + if (ii->index < 0 || ii->index > II_LAST) return "???"; + else return known[ii->index].name; +} + +/* Prints out instructions */ +void print_insns (int bb, cuc_insn *insn, int ninsn, int verbose) +{ + int i, j; + for (i = 0; i < ninsn; i++) { + char tmp[10]; + dep_list *l = insn[i].dep; + sprintf (tmp, "[%x_%x]", bb, i); + PRINTF ("%-8s%c %-4s ", tmp, insn[i].index >= 0 ? ':' : '?', cuc_insn_name (&insn[i])); + if (verbose) { + PRINTF ("%-20s insn = %08lx, index = %i, type = %04x ", + insn[i].disasm, insn[i].insn, insn[i].index, insn[i].type); + } else PRINTF ("type = %04x ", insn[i].type); + for (j = 0; j < MAX_OPERANDS; j++) { + if (insn[i].opt[j] & OPT_DEST) PRINTF ("*"); + switch (insn[i].opt[j] & ~OPT_DEST) { + case OPT_NONE: + break; + case OPT_CONST: + if (insn[i].type & IT_COND && (insn[i].index == II_CMOV + || insn[i].index == II_ADD)) + PRINTF ("%lx, ", insn[i].op[j]); + else + PRINTF ("0x%08lx, ", insn[i].op[j]); + break; + case OPT_JUMP: + PRINTF ("J%lx, ", insn[i].op[j]); + break; + case OPT_REGISTER: + PRINTF ("r%li, ", insn[i].op[j]); + break; + case OPT_REF: + PRINTF ("[%lx_%lx], ", REF_BB(insn[i].op[j]), REF_I(insn[i].op[j])); + break; + case OPT_BB: + PRINTF ("BB "); + print_bb_num (insn[i].op[j]); + PRINTF (", "); + break; + case OPT_LRBB: + PRINTF ("LRBB, "); + break; + default: + fprintf (stderr, "Invalid operand type %s(%x_%x) = %x\n", + cuc_insn_name (&insn[i]), i, j, insn[i].opt[j]); + assert (0); + } + } + if (l) { + PRINTF ("\n\tdep:"); + while (l) { + PRINTF (" [%lx_%lx],", REF_BB (l->ref), REF_I (l->ref)); + l = l->next; + } + } + PRINTF ("\n"); + } +} + +void add_dep (dep_list **list, int dep) +{ + dep_list *ndep; + dep_list **tmp = list; + + while (*tmp) { + if ((*tmp)->ref == dep) return; /* already there */ + tmp = &((*tmp)->next); + } + ndep = (dep_list *)malloc (sizeof (dep_list)); + ndep->ref = dep; + ndep->next = NULL; + *tmp = ndep; +} + +void dispose_list (dep_list **list) +{ + while (*list) { + dep_list *tmp = *list; + *list = tmp->next; + free (tmp); + } +} + +void add_data_dep (cuc_func *f) +{ + int b, i, j; + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) { + fflush (stdout); + if (insn[i].opt[j] & OPT_REF) { + /* Copy list from predecessor */ + dep_list *l = f->INSN(insn[i].op[j]).dep; + while (l) { + add_dep (&insn[i].dep, l->ref); + l = l->next; + } + /* add predecessor */ + add_dep (&insn[i].dep, insn[i].op[j]); + } + } + } +} + +/* Inserts n nops before insn 'ref' */ +void insert_insns (cuc_func *f, int ref, int n) +{ + int b1, i, j; + int b = REF_BB(ref); + int ins = REF_I(ref); + + assert (b < f->num_bb); + assert (ins <= f->bb[b].ninsn); + assert (f->bb[b].ninsn + n < MAX_INSNS); + if (cuc_debug >= 8) print_cuc_bb (f, "PREINSERT"); + f->bb[b].insn = (cuc_insn *) realloc (f->bb[b].insn, + (f->bb[b].ninsn + n) * sizeof (cuc_insn)); + + /* Set up relocations */ + for (i = 0; i < f->bb[b].ninsn; i++) + if (i < ins) reloc[i] = i; + else reloc[i] = i + n; + + /* Move instructions, based on relocations */ + for (i = f->bb[b].ninsn - 1; i >= 0; i--) f->bb[b].insn[reloc[i]] = f->bb[b].insn[i]; + for (i = 0; i < n; i++) change_insn_type (&f->bb[b].insn[ins + i], II_NOP); + + f->bb[b].ninsn += n; + for (b1 = 0; b1 < f->num_bb; b1++) { + dep_list *d = f->bb[b1].mdep; + while (d) { + if (REF_BB (d->ref) == b && REF_I (d->ref) >= ins) + d->ref = REF (b, REF_I (d->ref) + n); + d = d->next; + } + for (i = 0; i < f->bb[b1].ninsn; i++) { + d = f->bb[b1].insn[i].dep; + while (d) { + if (REF_BB (d->ref) == b && REF_I (d->ref) >= ins) + d->ref = REF (b, REF_I (d->ref) + n); + d = d->next; + } + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b1].insn[i].opt[j] & OPT_REF && REF_BB (f->bb[b1].insn[i].op[j]) == b + && REF_I (f->bb[b1].insn[i].op[j]) >= ins) + f->bb[b1].insn[i].op[j] = REF (b, REF_I (f->bb[b1].insn[i].op[j]) + n); + } + } + for (i = 0; i < f->nmsched; i++) + if (REF_BB(f->msched[i]) == b) f->msched[i] = REF (b, reloc[REF_I (f->msched[i])]); + if (cuc_debug >= 8) print_cuc_bb (f, "POSTINSERT"); + cuc_check (f); +} + +/* returns nonzero, if instruction was simplified */ +int apply_edge_condition (cuc_insn *ii) +{ + unsigned int c = ii->op[2]; + + switch (ii->index) { + case II_AND: + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { + change_insn_type (ii, II_ADD); + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_OR: + if (ii->opt[2] & OPT_CONST && c == 0x0) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0xffffffff; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SUB: + if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_MUL: + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else + if (ii->opt[2] & OPT_CONST && c == 1) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else + if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { + change_insn_type (ii, II_SUB); + ii->op[2] = ii->op[1]; ii->opt[2] = ii->opt[1]; + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + return 1; + } else break; + case II_SRL: + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] & OPT_CONST && c >= 32) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SLL: + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] & OPT_CONST && c >= 32) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SRA: + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SFEQ: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] == ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SFNE: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] != ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SFLE: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] <= ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) { + change_insn_type (ii, II_SFEQ); + } else break; + case II_SFLT: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] < ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + } break; + case II_SFGE: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] >= ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = 1; ii->opt[1] = OPT_CONST; + } else break; + case II_SFGT: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] > ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) { + change_insn_type (ii, II_SFNE); + } else break; + case II_CMOV: + if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + return 1; + } + if (ii->opt[3] & OPT_CONST) { + change_insn_type (ii, II_ADD); + if (ii->op[3]) { + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + } else { + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + } + ii->opt[3] = OPT_NONE; + return 1; + } + if (ii->type & IT_COND) { + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + if (ii->op[1] && !ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[3]; ii->opt[1] = ii->opt[3]; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + return 1; + } + if (ii->op[1] && ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[1] = 1; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + return 1; + } + if (!ii->op[1] && !ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + return 1; + } + } + if (ii->op[1] == ii->op[3] && ii->opt[1] == ii->opt[3]) { + ii->op[1] = 1; ii->opt[1] = OPT_CONST; + return 1; + } + if (ii->op[2] == ii->op[3] && ii->opt[2] == ii->opt[3]) { + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } + } + break; + } + return 0; +} + +/* First primary input */ +static unsigned long tmp_op, tmp_opt; + +/* Recursive function that searches for primary inputs; + returns 0 if cmov can be replaced by add */ +static int cmov_needed (cuc_func *f, int ref) +{ + cuc_insn *ii = &f->INSN(ref); + int j; + + cucdebug (4, " %x", ref); + /* mark visited, if already marked, we have a loop, ignore */ + if (ii->tmp) return 0; + ii->tmp = 1; + + /* handle normal movs separately */ + if (ii->index == II_ADD && !(ii->type & IT_VOLATILE) + && ii->opt[2] == OPT_CONST && ii->op[2] == 0) { + if (ii->opt[1] == OPT_REF) { + if (cmov_needed (f, ii->op[1])) { + ii->tmp = 0; + return 1; + } + } else { + if (tmp_opt == OPT_NONE) { + tmp_op = ii->op[1]; + tmp_opt = ii->opt[1]; + } else if (tmp_opt != ii->opt[1] || tmp_op != ii->op[1]) { + ii->tmp = 0; + return 1; + } + } + ii->tmp = 0; + return 0; + } + + /* Is this instruction CMOV? no => add to primary inputs */ + if ((ii->index != II_CMOV) || (ii->type & IT_VOLATILE)) { + if (tmp_opt == OPT_NONE) { + tmp_op = ref; + tmp_opt = OPT_REF; + ii->tmp = 0; + return 0; + } else if (tmp_opt != OPT_REF || tmp_op != ref) { + ii->tmp = 0; + return 1; + } else { + ii->tmp = 0; + return 0; + } + } + + for (j = 1; j < 3; j++) { + cucdebug (4, "(%x:%i)", ref, j); + if (ii->opt[j] == OPT_REF) { + if (cmov_needed (f, ii->op[j])) { + ii->tmp = 0; + return 1; + } + } else { + if (tmp_opt == OPT_NONE) { + tmp_op = ii->op[j]; + tmp_opt = ii->opt[j]; + } else if (tmp_opt != ii->opt[j] || tmp_op != ii->op[j]) { + ii->tmp = 0; + return 1; + } + } + } + + ii->tmp = 0; + return 0; +} + +/* Search and optimize complex cmov assignments */ +int optimize_cmovs (cuc_func *f) +{ + int modified = 0; + int b, i; + + /* Mark all instructions unvisited */ + for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) + for (i = 0; i < f->bb[b].ninsn; i++) f->bb[b].insn[i].tmp = 0; + + for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) { + for (i = 0; i < f->bb[b].ninsn; i++) { + cuc_insn *ii = &f->bb[b].insn[i]; + if (ii->index == II_CMOV && !(ii->type & IT_VOLATILE)) { + tmp_opt = OPT_NONE; + cucdebug (4, "\n"); + if (!cmov_needed (f, REF(b, i))) { + assert (tmp_opt != OPT_NONE); + change_insn_type (ii, II_ADD); + ii->op[1] = tmp_op; ii->opt[1] = tmp_opt; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + modified = 1; + } + } + } + } + return modified; +} + +/* returns number of instructions, using instruction ref */ +static int insn_uses (cuc_func *f, int ref) +{ + int b, i, j; + int cnt = 0; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == ref) cnt++; + return cnt; +} + +/* handles some common CMOV, CMOV-CMOV cases; + returns nonzero if anything optimized */ +static int optimize_cmov_more (cuc_func *f, int ref) +{ + int t = 0; + cuc_insn *ii = &f->INSN(ref); + assert (ii->index == II_CMOV); + + /* In case of x = cmov x, y; or x = cmov y, x; we have + asynchroneous loop -> remove it */ + if ((ii->opt[1] & OPT_REF) && ii->op[1] == ref) t = 1; + if ((ii->opt[2] & OPT_REF) && ii->op[2] == ref) t = 2; + if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) t = 2; + if (t) { + change_insn_type (ii, II_ADD); + cucdebug (2, "%8x:cmov %i\n", ref, t); + ii->opt[t] = OPT_CONST; + ii->op[t] = 0; + ii->opt[3] = OPT_NONE; + return 1; + } + if (!(ii->type & IT_COND)) { + for (t = 1; t <= 2; t++) { + /* cmov L, X, Y, C1 + cmov Z, L, Y, C2 + can be replaced with simpler: + cmov L, C1, C2, C2 + cmov Z, X, Y, L */ + if (ii->opt[t] == OPT_REF && f->INSN(ii->op[t]).index == II_CMOV) { + int r = ii->op[t]; + unsigned long x, xt, y, yt; + cuc_insn *prev = &f->INSN(r); + cuc_check (f); + cucdebug (3, "%x-%x\n", ref, r); + assert (!(prev->type & IT_COND)); + if (prev->op[3 - t] != ii->op[3 - t] || prev->opt[3 - t] != ii->opt[3 - t] + || insn_uses (f, r) > 1) continue; + cucdebug (3, "%x-%x cmov more\n", ref, r); + prev->type |= IT_COND; + x = prev->op[t]; xt = prev->opt[t]; + y = prev->op[3 - t]; yt = prev->opt[3 - t]; + prev->op[t] = ii->op[3]; prev->opt[t] = ii->opt[3]; /* C2 */ + ii->op[3] = r; ii->opt[3] = OPT_REF; /* L */ + prev->op[3 - t] = prev->op[3]; prev->opt[3 - t] = prev->opt[3]; /* C1 */ + prev->op[3] = prev->op[t]; prev->opt[3] = prev->opt[t]; /* C2 */ + ii->op[t] = x; ii->opt[t] = xt; /* X */ + ii->op[3 - t] = y; ii->opt[3 - t] = yt; /* Y */ + prev->op[0] = -1; prev->opt[0] = OPT_REGISTER | OPT_DEST; + cuc_check (f); + return 1; + } + } + } + + if (ii->opt[3] & OPT_REF) { + cuc_insn *prev = &f->INSN(ii->op[3]); + assert (prev->type & IT_COND); + if (prev->index == II_CMOV) { + /* negated conditional: + cmov x, 0, 1, y + cmov z, a, b, x + is replaced by + cmov z, b, a, y */ + if (prev->opt[1] & OPT_CONST && prev->opt[2] & OPT_CONST + && !prev->op[1] && prev->op[2]) { + unsigned long t; + t = ii->op[1]; ii->op[1] = ii->op[2]; ii->op[2] = t; + t = ii->opt[1]; ii->opt[1] = ii->opt[2]; ii->opt[2] = t; + ii->op[3] = prev->op[3]; ii->opt[3] = prev->opt[3]; + } + } else if (prev->index == II_ADD) { + /* add x, y, 0 + cmov z, a, b, x + is replaced by + cmov z, a, b, y */ + if (prev->opt[2] & OPT_CONST && prev->op[2] == 0) { + ii->op[3] = prev->op[1]; ii->opt[3] = prev->opt[1]; + return 1; + } + } + } + + /* Check if both choices can be pushed through */ + if (ii->opt[1] & OPT_REF && ii->opt[2] & OPT_REF + /* Usually doesn't make sense to move conditionals though => more area */ + && !(ii->type & IT_COND)) { + cuc_insn *a, *b; + a = &f->INSN(ii->op[1]); + b = &f->INSN(ii->op[2]); + if (a->index == b->index && !(a->type & IT_VOLATILE) && !(b->type & IT_VOLATILE)) { + int diff = -1; + int i; + for (i = 0; i < MAX_OPERANDS; i++) + if (a->opt[i] != b->opt[i] || !(a->op[i] == b->op[i] || a->opt[i] & OPT_REGISTER)) { + if (diff == -1) diff = i; else diff = -2; + } + /* If diff == -1, it will be eliminated by CSE */ + if (diff >= 0) { + cuc_insn tmp, cmov; + int ref2 = REF (REF_BB (ref), REF_I (ref) + 1); + insert_insns (f, ref, 1); + a = &f->INSN(f->INSN(ref2).op[1]); + b = &f->INSN(f->INSN(ref2).op[2]); + cucdebug (4, "ref = %x %lx %lx\n", ref, f->INSN(ref2).op[1], + f->INSN(ref2).op[2]); + if (cuc_debug >= 7) { + print_cuc_bb (f, "AAA"); + cuc_check (f); + } + tmp = *a; + cmov = f->INSN(ref2); + tmp.op[diff] = ref; tmp.opt[diff] = OPT_REF; + cmov.op[0] = -1; cmov.opt[0] = OPT_REGISTER | OPT_DEST; + cmov.op[1] = a->op[diff]; cmov.opt[1] = a->opt[diff]; + cmov.op[2] = b->op[diff]; cmov.opt[2] = b->opt[diff]; + change_insn_type (&cmov, II_CMOV); + cmov.type &= ~IT_COND; + cucdebug (4, "ref2 = %x %lx %lx\n", ref2, cmov.op[1], cmov.op[2]); + if (cmov.opt[1] & OPT_REF && cmov.opt[2] & OPT_REF + && f->INSN(cmov.op[1]).type & IT_COND) { + assert (f->INSN(cmov.op[2]).type & IT_COND); + cmov.type |= IT_COND; + } + f->INSN(ref) = cmov; + f->INSN(ref2) = tmp; + if (cuc_debug >= 6) print_cuc_bb (f, "BBB"); + cuc_check (f); + return 1; + } + } + } + return 0; +} + +/* Optimizes dataflow tree */ +int optimize_tree (cuc_func *f) +{ + int b, i, j; + int modified; + int gmodified = 0; + + do { + modified = 0; + if (cuc_debug) cuc_check (f); + for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) { + for (i = 0; i < f->bb[b].ninsn; i++) { + cuc_insn *ii = &f->bb[b].insn[i]; + /* We tend to have the third parameter const if instruction is cumutative */ + if ((ii->opt[1] & OPT_CONST) && !(ii->opt[2] & OPT_CONST)) { + int cond = ii->index == II_SFEQ || ii->index == II_SFNE + || ii->index == II_SFLT || ii->index == II_SFLE + || ii->index == II_SFGT || ii->index == II_SFGE; + if (known[ii->index].comutative || cond) { + unsigned long t = ii->opt[1]; + ii->opt[1] = ii->opt[2]; + ii->opt[2] = t; + t = ii->op[1]; + ii->op[1] = ii->op[2]; + ii->op[2] = t; + modified = 1; cucdebug (2, "%08x:<>\n", REF(b, i)); + if (cond) { + if (ii->index == II_SFEQ) ii->index = II_SFNE; + else if (ii->index == II_SFNE) ii->index = II_SFEQ; + else if (ii->index == II_SFLE) ii->index = II_SFGT; + else if (ii->index == II_SFLT) ii->index = II_SFGE; + else if (ii->index == II_SFGE) ii->index = II_SFLT; + else if (ii->index == II_SFGT) ii->index = II_SFLE; + else assert (0); + } + } + } + + /* Try to do the promotion */ + /* We have two consecutive expressions, containing constants, + * if previous is a simple expression we can handle it simply: */ + for (j = 0; j < MAX_OPERANDS; j++) + if (ii->opt[j] & OPT_REF) { + cuc_insn *t = &f->INSN(ii->op[j]); + if (f->INSN(ii->op[j]).index == II_ADD + && f->INSN(ii->op[j]).opt[2] & OPT_CONST + && f->INSN(ii->op[j]).op[2] == 0 + && !(ii->type & IT_MEMORY && t->type & IT_MEMADD)) { + /* do not promote through add-mem, and branches */ + modified = 1; + cucdebug (2, "%8x:promote%i %8lx %8lx\n", REF (b, i), j, ii->op[j], t->op[1]); + ii->op[j] = t->op[1]; + ii->opt[j] = t->opt[1]; + } + } + + /* handle some CMOV cases more deeply */ + if (ii->index == II_CMOV && optimize_cmov_more (f, REF (b, i))) { + modified = 1; + continue; + } + + /* Do nothing to volatile instructions */ + if (ii->type & IT_VOLATILE) continue; + + /* Check whether we can simplify the instruction */ + if (apply_edge_condition (ii)) { + modified = 1; + continue; + } + /* We cannot do anything more if at least one is not constant */ + if (!(ii->opt[2] & OPT_CONST)) continue; + + if (ii->opt[1] & OPT_CONST) { /* We have constant expression */ + unsigned long value; + int ok = 1; + /* Was constant expression already? */ + if (ii->index == II_ADD && !ii->op[2]) continue; + + if (ii->index == II_ADD) value = ii->op[1] + ii->op[2]; + else if (ii->index == II_SUB) value = ii->op[1] - ii->op[2]; + else if (ii->index == II_SLL) value = ii->op[1] << ii->op[2]; + else if (ii->index == II_SRL) value = ii->op[1] >> ii->op[2]; + else if (ii->index == II_MUL) value = ii->op[1] * ii->op[2]; + else if (ii->index == II_OR) value = ii->op[1] | ii->op[2]; + else if (ii->index == II_XOR) value = ii->op[1] ^ ii->op[2]; + else if (ii->index == II_AND) value = ii->op[1] & ii->op[2]; + else ok = 0; + if (ok) { + change_insn_type (ii, II_ADD); + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = value; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + modified = 1; cucdebug (2, "%8x:const\n", REF (b, i)); + } + } else if (ii->opt[1] & OPT_REF) { + cuc_insn *prev = &f->INSN(ii->op[1]); + /* Is this just a move? */ + if (ii->index == II_ADD + && !(ii->type & IT_MEMADD) && ii->op[2] == 0) { + int b1, i1, j1; + cucdebug (2, "%8x:link %8lx: ", REF(b, i), ii->op[1]); + if (!(prev->type & (IT_OUTPUT | IT_VOLATILE))) { + assert (ii->opt[0] & OPT_DEST); + prev->op[0] = ii->op[0]; prev->opt[0] = ii->opt[0]; + prev->type |= ii->type & IT_OUTPUT; + for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD)) + for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) + for (j1 = 0; j1 < MAX_OPERANDS; j1++) + if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF) + && f->bb[b1].insn[i1].op[j1] == REF(b, i)) { + cucdebug (2, "%x ", REF (b1, i1)); + f->bb[b1].insn[i1].op[j1] = ii->op[1]; + } + cucdebug (2, "\n"); + change_insn_type (ii, II_NOP); + } + } else if (prev->opt[2] & OPT_CONST) { + /* Handle some common cases */ + /* add - add joining */ + if (ii->index == II_ADD && prev->index == II_ADD) { + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] += prev->op[2]; + modified = 1; cucdebug (2, "%8x: add-add\n", REF(b, i)); + } else /* add - sub joining */ + if (ii->index == II_ADD && prev->index == II_SUB) { + change_insn_type (&insn[i], II_SUB); + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] += prev->op[2]; + modified = 1; cucdebug (2, "%8x: add-sub\n", REF(b, i)); + } else /* sub - add joining */ + if (ii->index == II_SUB && prev->index == II_ADD) { + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] += prev->op[2]; + modified = 1; cucdebug (2, "%8x: sub-add\n", REF(b, i)); + } else /* add - sfxx joining */ + if (prev->index == II_ADD && ( + ii->index == II_SFEQ || ii->index == II_SFNE + || ii->index == II_SFLT || ii->index == II_SFLE + || ii->index == II_SFGT || ii->index == II_SFGE)) { + if (ii->opt[2] & OPT_CONST && ii->op[2] < 0x80000000) { + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] -= prev->op[2]; + modified = 1; cucdebug (2, "%8x: add-sfxx\n", REF(b, i)); + } + } else /* sub - sfxx joining */ + if (prev->index == II_SUB && ( + ii->index == II_SFEQ || ii->index == II_SFNE + || ii->index == II_SFLT || ii->index == II_SFLE + || ii->index == II_SFGT || ii->index == II_SFGE)) { + if (ii->opt[2] & OPT_CONST && ii->op[2] < 0x80000000) { + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] += prev->op[2]; + modified = 1; cucdebug (2, "%8x: sub-sfxx\n", REF(b, i)); + } + } + } + } + } + } + if (modified) gmodified = 1; + } while (modified); + return gmodified; +} + +/* Remove nop instructions */ +int remove_nops (cuc_func *f) +{ + int b; + int modified = 0; + for (b = 0; b < f->num_bb; b++) { + int c, d = 0, i, j; + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (insn[i].index != II_NOP) { + reloc [i] = d; + insn[d++] = insn[i]; + } else { + reloc[i] = d; /* For jumps only */ + } + if (f->bb[b].ninsn != d) modified = 1; + f->bb[b].ninsn = d; + + /* Relocate references from all basic blocks */ + for (c = 0; c < f->num_bb; c++) + for (i = 0; i < f->bb[c].ninsn; i++) { + dep_list *d = f->bb[c].insn[i].dep; + for (j = 0; j < MAX_OPERANDS; j++) + if ((f->bb[c].insn[i].opt[j] & OPT_REF) + && REF_BB(f->bb[c].insn[i].op[j]) == b) + f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]); + + while (d) { + if (REF_BB(d->ref) == b) d->ref = REF (b, reloc[REF_I (d->ref)]); + d = d->next; + } + } + } + return modified; +} + +static void unmark_tree (cuc_func *f, int ref) +{ + cuc_insn *ii = &f->INSN(ref); + cucdebug (5, "%x ", ref); + if (ii->type & IT_UNUSED) { + int j; + ii->type &= ~IT_UNUSED; + for (j = 0; j < MAX_OPERANDS; j++) + if (ii->opt[j] & OPT_REF) unmark_tree (f, ii->op[j]); + } +} + +/* Remove unused assignments */ +int remove_dead (cuc_func *f) +{ + int b, i; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + f->bb[b].insn[i].type |= IT_UNUSED; + + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) { + cuc_insn *ii = &f->bb[b].insn[i]; + if (ii->type & IT_VOLATILE || ii->type & IT_OUTPUT + || (II_IS_LOAD (ii->index) && (f->memory_order == MO_NONE || f->memory_order == MO_WEAK)) + || II_IS_STORE (ii->index)) { + unmark_tree (f, REF (b, i)); + cucdebug (5, "\n"); + } + } + + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_UNUSED) { + change_insn_type (&f->bb[b].insn[i], II_NOP); + } + + return remove_nops (f); +} + +/* Removes trivial register assignments */ +int remove_trivial_regs (cuc_func *f) +{ + int b, i; + for (i = 0; i < MAX_REGS; i++) f->saved_regs[i] = caller_saved[i]; + + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) { + if (insn[i].index == II_ADD + && insn[i].opt[0] & OPT_REGISTER + && insn[i].opt[1] & OPT_REGISTER && insn[i].op[0] == insn[i].op[1] + && insn[i].opt[2] & OPT_CONST && insn[i].op[2] == 0) { + if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1; + change_insn_type (&insn[i], II_NOP); + } + } + } + if (cuc_debug >= 2) { + PRINTF ("saved regs "); + for (i = 0; i < MAX_REGS; i++) PRINTF ("%i:%i ", i, f->saved_regs[i]); + PRINTF ("\n"); + } + return remove_nops (f); +} + +/* Determine inputs and outputs */ +void set_io (cuc_func *f) +{ + int b, i, j; + /* Determine register usage */ + for (i = 0; i < MAX_REGS; i++) { + f->lur[i] = -1; + f->used_regs[i] = 0; + } + if (cuc_debug > 5) print_cuc_bb (f, "SET_IO"); + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b].insn[i].opt[j] & OPT_REGISTER && f->bb[b].insn[i].op[j] >= 0) { + if (f->bb[b].insn[i].opt[j] & OPT_DEST) f->lur[f->bb[b].insn[i].op[j]] = REF (b, i); + else f->used_regs[f->bb[b].insn[i].op[j]] = 1; + } + } +} + +/* relocate all accesses inside of BB b to back/fwd */ +#if 0 +static void relocate_bb (cuc_bb *bb, int b, int back, int fwd) +{ + int i, j; + for (i = 0; i < bb->ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (bb->insn[i].opt[j] & OPT_REF + && REF_BB (bb->insn[i].op[j]) == b) { + int t = REF_I (bb->insn[i].op[j]); + if (t < i) bb->insn[i].op[j] = REF (back, t); + else bb->insn[i].op[j] = REF (fwd, t); + } +} +#endif + +/* Latch outputs in loops */ +void add_latches (cuc_func *f) +{ + int b, i, j; + + //print_cuc_bb (f, "ADD_LATCHES a"); + /* Cuts the tree and marks registers */ + mark_cut (f); + + /* Split BBs with more than one group */ + for (b = 0; b < f->num_bb; b++) expand_bb (f, b); + remove_nops (f); + //print_cuc_bb (f, "ADD_LATCHES 0"); + + /* Convert accesses in BB_INLOOP type block to latched */ + for (b = 0; b < f->num_bb; b++) { + int j; + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] == OPT_REF) { + int t = f->bb[b].insn[i].op[j]; + /* If we are pointing to a INLOOP block from outside, or forward + (= previous loop iteration) we must register that data */ + if ((f->bb[REF_BB(t)].type & BB_INLOOP || config.cuc.no_multicycle) + && !(f->INSN(t).type & (IT_BRANCH | IT_COND)) + && (REF_BB(t) != b || REF_I(t) >= i)) { + f->INSN(t).type |= IT_LATCHED; + } + } + } + //print_cuc_bb (f, "ADD_LATCHES 1"); + + /* Add latches at the end of blocks as needed */ + for (b = 0; b < f->num_bb; b++) { + int nreg = 0; + cuc_insn *insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_LATCHED) nreg++; + if (nreg) { + insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (f->bb[b].ninsn + nreg)); + j = 0; + for (i = 0; i < f->bb[b].ninsn; i++) { + insn[i] = f->bb[b].insn[i]; + if (insn[i].type & IT_LATCHED) { + cuc_insn *ii = &insn[f->bb[b].ninsn + j++]; + change_insn_type (ii, II_REG); + ii->op[0] = -1; ii->opt[0] = OPT_DEST | OPT_REGISTER; + ii->op[1] = REF (b, i); ii->opt[1] = OPT_REF; + ii->opt[2] = ii->opt[3] = OPT_NONE; + ii->dep = NULL; + ii->type = IT_VOLATILE; + sprintf (ii->disasm, "reg %i_%i", b, i); + } + } + f->bb[b].ninsn += nreg; + free (f->bb[b].insn); + f->bb[b].insn = insn; + } + } + //print_cuc_bb (f, "ADD_LATCHES 2"); + + /* Repair references */ + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + /* If destination instruction is latched, use register instead */ + if (f->bb[b].insn[i].opt[j] == OPT_REF + && f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) { + int b1, i1; + b1 = REF_BB (f->bb[b].insn[i].op[j]); + //cucdebug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]); + if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) { + for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) { + assert (f->bb[b1].insn[i1].index == II_REG); + if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) { + f->bb[b].insn[i].op[j] = REF (b1, i1); + break; + } + } + } + } +} + +/* CSE -- common subexpression elimination */ +int cse (cuc_func *f) +{ + int modified = 0; + int b, i, j, b1, i1, b2, i2; + for (b1 = 0; b1 < f->num_bb; b1++) + for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) if (f->bb[b1].insn[i1].index != II_NOP + && f->bb[b1].insn[i1].index != II_LRBB && !(f->bb[b1].insn[i1].type & IT_MEMORY) + && !(f->bb[b1].insn[i1].type & IT_MEMADD)) + for (b2 = 0; b2 < f->num_bb; b2++) + for (i2 = 0; i2 < f->bb[b2].ninsn; i2++) + if (f->bb[b2].insn[i2].index != II_NOP && f->bb[b2].insn[i2].index != II_LRBB + && !(f->bb[b2].insn[i2].type & IT_MEMORY) && !(f->bb[b2].insn[i2].type & IT_MEMADD) + && (b1 != b2 || i2 > i1)) { + cuc_insn *ii1 = &f->bb[b1].insn[i1]; + cuc_insn *ii2 = &f->bb[b2].insn[i2]; + int ok = 1; + + /* Do we have an exact match? */ + if (ii1->index != ii2->index) continue; + if (ii2->type & IT_VOLATILE) continue; + + /* Check all operands also */ + for (j = 0; j < MAX_OPERANDS; j++) { + if (ii1->opt[j] != ii2->opt[j]) { + ok = 0; + break; + } + if (ii1->opt[j] & OPT_DEST) continue; + if (ii1->opt[j] != OPT_NONE && ii1->op[j] != ii2->op[j]) { + ok = 0; + break; + } + } + + if (ok) { + /* remove duplicated instruction and relink the references */ + cucdebug (3, "%x - %x are same\n", REF(b1, i1), REF(b2, i2)); + change_insn_type (ii2, II_NOP); + modified = 1; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b].insn[i].opt[j] & OPT_REF + && f->bb[b].insn[i].op[j] == REF (b2, i2)) + f->bb[b].insn[i].op[j] = REF (b1, i1); + } + } + return modified; +} + +static int count_cmovs (cuc_insn *ii, int match) +{ + int c = 0, j; + if (match & 2) { + for (j = 0; j < MAX_OPERANDS; j++) + if (ii->opt[j] & OPT_DEST) c++; + } + if (match & 1) { + for (j = 0; j < MAX_OPERANDS; j++) + if (!(ii->opt[j] & OPT_DEST) && ii->opt[j] & OPT_REF) c++; + } else { + for (j = 0; j < MAX_OPERANDS; j++) + if (!(ii->opt[j] & OPT_DEST) && ii->opt[j] != OPT_NONE) c++; + } + return c; +} + +static void search_csm (int iter, cuc_func *f, cuc_shared_list *list); +static cuc_shared_list *main_list; +static int *iteration; + +/* CSM -- common subexpression matching -- resource sharing + We try to match tree of instruction inside a BB with as many + matches as possible. All possibilities are collected and + options, making situation worse are removed */ +void csm (cuc_func *f) +{ + int b, i, j; + int cnt; + cuc_shared_list *list; + cuc_timings timings; + + analyse_timings (f, &timings); + main_list = NULL; + for (b = 0; b < f->num_bb; b++) { + assert (iteration = (int *)malloc (sizeof (int) * f->bb[b].ninsn)); + for (i = 0; i < f->bb[b].ninsn; i++) { + int cnt = 0, cntc = 0; + double size = 0., sizec = 0.; + int j2 = 0; + for (j = 0; j < f->bb[b].ninsn; j++) + if (f->bb[b].insn[i].index == f->bb[b].insn[j].index) { + int ok = 1; + for (j2 = 0; j2 < MAX_OPERANDS; j2++) if (!(f->bb[b].insn[j].opt[j2] & OPT_REF)) + if (f->bb[b].insn[j].opt[j2] != f->bb[b].insn[i].opt[j2] + || f->bb[b].insn[j].op[j2] != f->bb[b].insn[i].opt[j2]) { + ok = 0; + break; + } + if (ok) { + cntc++; + sizec = sizec + insn_size (&f->bb[b].insn[j]); + } else { + cnt++; + size = size + insn_size (&f->bb[b].insn[j]); + } + iteration[j] = 0; + } else iteration[j] = -1; + if (cntc > 1) { + assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); + list->next = main_list; + list->from = NULL; + list->ref = REF (b, i); + list->cnt = cnt; + list->cmatch = 1; + list->cmovs = count_cmovs (&f->bb[b].insn[i], 3); + list->osize = sizec; + list->size = ii_size (f->bb[b].insn[i].index, 1); + main_list = list; + search_csm (0, f, list); + } + if (cnt > 1) { + assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); + list->next = main_list; + list->from = NULL; + list->ref = REF (b, i); + list->cnt = cnt + cntc; + list->cmatch = 0; + list->cmovs = count_cmovs (&f->bb[b].insn[i], 2); + list->osize = size + sizec; + list->size = ii_size (f->bb[b].insn[i].index, 0); + main_list = list; + search_csm (0, f, list); + } + } + free (iteration); + } + + for (list = main_list; list; list = list->next) list->dead = 0; + cnt = 0; + for (list = main_list; list; list = list->next) if (!list->dead) cnt++; + cucdebug (1, "noptions = %i\n", cnt); + + /* Now we will check the real size of the 'improvements'; if the size + actually increases, we abandom the option */ + for (list = main_list; list; list = list->next) + if (list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size >= list->osize) list->dead = 1; + + cnt = 0; + for (list = main_list; list; list = list->next) if (!list->dead) cnt++; + cucdebug (1, "noptions = %i\n", cnt); + + /* Count number of instructions grouped */ + for (list = main_list; list; list = list->next) { + cuc_shared_list *l = list; + int c = 0; + while (l) { + c++; + if (f->INSN(l->ref).type & (IT_VOLATILE | IT_MEMORY | IT_MEMADD)) list->dead = 1; + l = l->from; + } + list->ninsn = c; + } + + cnt = 0; + for (list = main_list; list; list = list->next) + if (!list->dead) cnt++; + cucdebug (1, "noptions = %i\n", cnt); + +#if 1 + /* We can get a lot of options here, so we will delete duplicates */ + for (list = main_list; list; list = list->next) if (!list->dead) { + cuc_shared_list *l; + for (l = list->next; l; l = l->next) if (!l->dead) { + int ok = 1; + cuc_shared_list *t1 = list; + cuc_shared_list *t2 = l; + while (ok && t1 && t2) { + if (f->INSN(t1->ref).index == f->INSN(t2->ref).index) { + /* If other operands are matching, we must check for them also */ + if (t1->cmatch) { + int j; + for (j = 0; j < MAX_OPERANDS; j++) + if (!(f->INSN(t1->ref).opt[j] & OPT_REF) || !(f->INSN(t2->ref).opt[j] & OPT_REF) + || f->INSN(t1->ref).opt[j] != f->INSN(t2->ref).opt[j] + || f->INSN(t1->ref).op[j] != f->INSN(t2->ref).op[j]) { + ok = 0; + break; + } + } + + /* This option is duplicate, remove */ + if (ok) t1->dead = 1; + } + t1 = t1->from; + t2 = t2->from; + } + } + } + cnt = 0; + for (list = main_list; list; list = list->next) if (!list->dead) cnt++; + cucdebug (1, "noptions = %i\n", cnt); +#endif + /* Print out */ + for (list = main_list; list; list = list->next) if (!list->dead) { + cuc_shared_list *l = list; + cucdebug (1, "%-4s cnt %3i ninsn %3i size %8.1f osize %8.1f cmovs %3i @", + cuc_insn_name (&f->INSN(list->ref)), list->cnt, list->ninsn, + list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size, list->osize, list->cmovs); + while (l) { + cucdebug (1, "%c%x,", l->cmatch ? '.' : '!', l->ref); + l = l->from; + } + cucdebug (1, "\n"); + } + + /* Calculate estimated timings */ + for (b = 0; b < f->num_bb; b++) { + cnt = 0; + for (list = main_list; list; list = list->next) + if (!list->dead && REF_BB(list->ref) == b) cnt++; + + f->bb[b].ntim = cnt; + if (!cnt) { + f->bb[b].tim = NULL; + continue; + } + assert (f->bb[b].tim = (cuc_timings *)malloc (sizeof (cuc_timings) * cnt)); + + cnt = 0; + for (list = main_list; list; list = list->next) if (!list->dead && REF_BB(list->ref) == b) { + cuc_shared_list *l = list; + f->bb[b].tim[cnt].b = b; + f->bb[b].tim[cnt].preroll = f->bb[b].tim[cnt].unroll = 1; + f->bb[b].tim[cnt].nshared = list->ninsn; + assert (f->bb[b].tim[cnt].shared = (cuc_shared_item *) + malloc (sizeof(cuc_shared_item) * list->ninsn)); + for (i = 0; i < list->ninsn; i++, l = l->from) { + f->bb[b].tim[cnt].shared[i].ref = l->ref; + f->bb[b].tim[cnt].shared[i].cmatch = l->cmatch; + } + f->bb[b].tim[cnt].new_time = timings.new_time + f->bb[b].cnt * (list->cnt - 1); + f->bb[b].tim[cnt].size = timings.size + + list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size - list->osize; + cnt++; + } + } +} + +/* Recursive function for searching through instruction graph */ +static void search_csm (int iter, cuc_func *f, cuc_shared_list *list) +{ + int b, i, j, i1; + cuc_shared_list *l; + b = REF_BB(list->ref); + i = REF_I(list->ref); + + for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] & OPT_REF) { + int t = f->bb[b].insn[i].op[j]; + int cnt = 0, cntc = 0; + double size = 0., sizec = 0.; + + /* Mark neighbours */ + for (i1 = 0; i1 < f->bb[b].ninsn; i1++) { + if (iteration[i1] == iter && f->bb[b].insn[i1].opt[j] & OPT_REF) { + int t2 = f->bb[b].insn[i1].op[j]; + if (f->INSN(t).index == f->INSN(t2).index && f->INSN(t2).opt[j] & OPT_REF) { + int j2; + int ok = 1; + iteration[REF_I(t2)] = iter + 1; + for (j2 = 0; j2 < MAX_OPERANDS; j2++) if (!(f->bb[b].insn[i1].opt[j2] & OPT_REF)) + if (f->bb[b].insn[i1].opt[j2] != f->bb[b].insn[i].opt[j2] + || f->bb[b].insn[i1].op[j2] != f->bb[b].insn[i].opt[j2]) { + ok = 0; + break; + } + if (ok) { + cntc++; + sizec = sizec + insn_size (&f->bb[b].insn[i1]); + } else { + cnt++; + size = size + insn_size (&f->bb[b].insn[i1]); + } + } + } + } + + if (cntc > 1) { + assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); + l->next = main_list; + main_list = l; + l->from = list; + l->ref = t; + l->cnt = cnt; + l->cmatch = 1; + l->cmovs = list->cmovs + count_cmovs (&f->bb[b].insn[i], 1) - 1; + l->size = list->size + ii_size (f->bb[b].insn[i].index, 1); + l->osize = sizec; + search_csm (iter + 1, f, l); + } + if (cnt > 1) { + assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); + l->next = main_list; + main_list = l; + l->from = list; + l->ref = t; + l->cnt = cnt + cntc; + l->cmatch = 0; + l->osize = size + sizec; + l->cmovs = list->cmovs + count_cmovs (&f->bb[b].insn[i], 0) - 1; + l->size = list->size + ii_size (f->bb[b].insn[i].index, 0); + search_csm (iter + 1, f, l); + } + + /* Unmark them back */ + for (i1 = 0; i1 < f->bb[b].ninsn; i1++) if (iteration[i1] > iter) iteration[i1] = -1; + } +} + +/* Displays shared instructions */ +void print_shared (cuc_func *rf, cuc_shared_item *shared, int nshared) +{ + int i, first = 1; + for (i = 0; i < nshared; i++) { + PRINTF ("%s%s%s", first ? "" : "-", cuc_insn_name (&rf->INSN(shared[i].ref)), + shared[i].cmatch ? "!" : ""); + first = 0; + } +} + +/* Common subexpression matching -- resource sharing, generation pass + + Situation here is much simpler than with analysis -- we know the instruction sequence + we are going to share, but we are going to do this on whole function, not just one BB. + We can find sequence in reference function, as pointed from "shared" */ +void csm_gen (cuc_func *f, cuc_func *rf, cuc_shared_item *shared, int nshared) +{ + int b, i, cnt = 0; + /* FIXME: some code here (2) */ + PRINTF ("Replacing: "); + print_shared (rf, shared, nshared); + + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) { + } + + PRINTF ("\nFound %i matches.\n", cnt); +} + Index: verilog.c =================================================================== --- verilog.c (nonexistent) +++ verilog.c (revision 1765) @@ -0,0 +1,1035 @@ +/* verilog.c -- OpenRISC Custom Unit Compiler, verilog generator + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "abstract.h" +#include "cuc.h" +#include "insn.h" +#include "profiler.h" +#include "sim-config.h" +#include "misc.h" + +/* Shortcut */ +#define GEN(x...) fprintf (fo, x) + +/* Find index of load/store/call */ +int find_lsc_index (cuc_func *f, int ref) +{ + int c = 0; + int i; + int load; + + if (f->INSN(ref).index == II_CALL) { + for (i = 0; i < f->nmsched; i++) { + if (f->msched[i] == ref) break; + if (f->mtype[i] & MT_CALL) c++; + } + } else { + load = II_IS_LOAD (f->INSN(ref).index); + for (i = 0; i < f->nmsched; i++) { + if (f->msched[i] == ref) break; + if ((load && (f->mtype[i] & MT_LOAD)) + || (!load && (f->mtype[i] & MT_STORE))) c++; + } + } + return c; +} + +/* Print out dependencies as verilog expression */ +void print_deps (FILE *fo, cuc_func *f, int b, dep_list *t, int registered) +{ + if (t) { + int first = 0; + while (t) { + if (f->INSN(t->ref).type & IT_MEMORY) { + GEN ("%s%c_end[%i]", first ? " && " : "", + II_IS_LOAD (f->INSN(t->ref).index) ? 'l' : 's', find_lsc_index (f, t->ref)); + } else if (f->INSN(t->ref).index == II_CALL) { + GEN ("%sf_end[%i]", first ? " && " : "", find_lsc_index (f, t->ref)); + } else { + PRINTF ("print_deps: err %lx\n", t->ref); + assert (0); + } + first = 1; + t = t->next; + } + } else { + if (registered) GEN ("bb_start_r[%i]", b); + else GEN ("bb_start[%i]", b); + } +} + +char *print_op_v (cuc_func *f, char *s, int ref, int j) +{ + unsigned long op = f->INSN(ref).op[j]; + unsigned long opt = f->INSN(ref).opt[j]; + switch (opt & ~OPT_DEST) { + case OPT_NONE: assert (0); break; + case OPT_CONST: if (f->INSN(ref).type & IT_COND && (f->INSN(ref).index == II_CMOV + || f->INSN(ref).index == II_ADD)) { + assert (op == 0 || op == 1); + sprintf (s, "1'b%lx", op); + } else sprintf (s, "32'h%lx", op); + break; + case OPT_REGISTER: + if (opt & OPT_DEST) sprintf (s, "t%x_%x", REF_BB(ref), REF_I(ref)); + else sprintf (s, "r%li_%c", op, opt & OPT_DEST ? 'o' : 'i'); + break; +#if 0 + case OPT_FREG: assert (opt & OPT_DEST); + sprintf (s, "fr%i_o", op); + break; +#endif + case OPT_REF: sprintf (s, "t%lx_%lx", REF_BB(op), REF_I(op)); break; + } + return s; +} + +/* Prints out specified instruction */ +void print_insn_v (FILE *fo, cuc_func *f, int b, int i) +{ + cuc_insn *ii = &f->bb[b].insn[i]; + char *s = known[ii->index].rtl; + char tmp[200] = ""; + + assert (s); + while (*s) { + if (*s <= MAX_OPERANDS) { + char t[30]; + sprintf (tmp, "%s%s", tmp, print_op_v (f, t, REF(b, i), *s - 1)); + } else if (*s == '\b') sprintf (tmp, "%s%i", tmp, b); + else sprintf (tmp, "%s%c", tmp, *s); + s++; + } + GEN ("%-40s /* %s */\n", tmp, ii->disasm); + if (ii->type & IT_MEMORY) { + int nls = find_lsc_index (f, REF (b, i)); + if (II_IS_LOAD (ii->index)) { + int nm; + for (nm = 0; nm < f->nmsched; nm++) if (f->msched[nm] == REF (b, i)) break; + assert (nm < f->nmsched); + + GEN (" if (l_end[%i]) t%x_%x <= #Tp ", nls, b, i); + switch (f->mtype[nm] & (MT_WIDTH | MT_SIGNED)) { + case 1: GEN ("l_dat_i & 32'hff;\n"); + break; + case 2: GEN ("l_dat_i & 32'hffff;\n"); + break; + case 4 | MT_SIGNED: + case 4: GEN ("l_dat_i;\n"); + break; + case 1 | MT_SIGNED: + GEN ("{24{l_dat_i[7]}, l_dat_i[7:0]};\n"); + break; + case 2 | MT_SIGNED: + GEN ("{16{l_dat_i[15]}, l_dat_i[15:0]};\n"); + break; + default: assert (0); + } + } + } else if (ii->index == II_LRBB) { + GEN (" if (rst) t%x_%x <= #Tp 1'b0;\n", b, i); + assert (f->bb[b].prev[0] >= 0); + if (f->bb[b].prev[0] == BBID_START) + GEN (" else if (bb_start[%i]) t%x_%x <= #Tp start_i;\n", b, b, i); + else + GEN (" else if (bb_start[%i]) t%x_%x <= #Tp bb_stb[%i];\n", b, b, i, f->bb[b].prev[0]); + } else if (ii->index == II_REG) { + assert (ii->opt[1] == OPT_REF); + GEN (" if ("); + if (f->bb[b].mdep) print_deps (fo, f, b, f->bb[b].mdep, 0); + else GEN ("bb_stb[%i]", b); + GEN (") t%x_%x <= #Tp t%lx_%lx;\n", b, i, + REF_BB (ii->op[1]), REF_I (ii->op[1])); + } +} + +/* Outputs binary number */ +/* +static char *bin_str (unsigned long x, int len) +{ + static char bx[33]; + char *s = bx; + while (len > 0) *s++ = '0' + ((x >> --len) & 1); + *s = '\0'; + return bx; +} +*/ + +/* Returns index of branch instruction inside a block b */ +static int branch_index (cuc_bb *bb) +{ + int i; + for (i = bb->ninsn - 1; i >= 0; i--) + if (bb->insn[i].type & IT_BRANCH) return i; + return -1; +} + +static void print_turn_off_dep (FILE *fo, cuc_func *f, dep_list *dep) +{ + while (dep) { + assert (f->INSN(dep->ref).type & IT_MEMORY || f->INSN(dep->ref).index == II_CALL); + GEN (" %c_stb[%i] <= #Tp 1'b0;\n", f->INSN(dep->ref).index == II_CALL ? 'f' + : II_IS_LOAD (f->INSN(dep->ref).index) ? 'l' : 's', find_lsc_index (f, dep->ref)); + dep = dep->next; + } +} + +static int func_index (cuc_func *f, int ref) +{ + int i; + unsigned long addr; + assert (f->INSN(ref).index == II_CALL && f->INSN(ref).opt[0] & OPT_CONST); + addr = f->INSN(ref).op[0]; + for (i = 0; i < f->nfdeps; i++) + if (f->fdeps[i]->start_addr == addr) return i; + + assert (0); + return -1; +} + +/* Generates verilog file out of insn dataflow */ +void output_verilog (cuc_func *f, char *filename, char *funcname) +{ + FILE *fo; + int b, i, j; + int ci = 0, co = 0; + int nloads = 0, nstores = 0, ncalls = 0; + char tmp[256]; + sprintf (tmp, "%s.v", filename); + + log ("Generating verilog file \"%s\"\n", tmp); + PRINTF ("Generating verilog file \"%s\"\n", tmp); + if ((fo = fopen (tmp, "wt+")) == NULL) { + fprintf (stderr, "Cannot open '%s'\n", tmp); + exit (1); + } + + /* output header */ + GEN ("/* %s -- generated by Custom Unit Compiler\n", tmp); + GEN (" (C) 2002 Opencores\n"); + GEN (" function \"%s\"\n", funcname); + GEN (" at %08lx - %08lx\n", f->start_addr, f->end_addr); + GEN (" num BBs %i */\n\n", f->num_bb); + + GEN ("`include \"timescale.v\"\n\n"); + GEN ("module %s (clk, rst,\n", filename); + GEN (" l_adr_o, l_dat_i, l_req_o,\n"); + GEN (" l_sel_o, l_linbrst_o, l_rdy_i,\n"); + GEN (" s_adr_o, s_dat_o, s_req_o,\n"); + GEN (" s_sel_o, s_linbrst_o, s_rdy_i,\n"); + + GEN ("/* inputs */ "); + for (i = 0; i < MAX_REGS; i++) + if (f->used_regs[i]) { + GEN ("r%i_i, ", i); + ci++; + } + if (!ci) GEN ("/* NONE */"); + + GEN ("\n/* outputs */ "); + for (i = 0; i < MAX_REGS; i++) + if (f->lur[i] >= 0 && !f->saved_regs[i]) { + GEN ("r%i_o, ", i); + co++; + } + + if (!co) GEN ("/* NONE */"); + if (f->nfdeps) { + GEN ("\n/* f. calls */, fstart_o, %sfend_i, fr11_i, ", + log2_int (f->nfdeps) > 0 ? "fid_o, " : ""); + for (i = 0; i < 6; i++) GEN ("fr%i_o, ", i + 3); + } + GEN ("\n start_i, end_o, busy_o);\n\n"); + + GEN ("parameter Tp = 1;\n\n"); + + GEN ("input clk, rst;\n"); + GEN ("input start_i;\t/* Module starts when set to 1 */ \n"); + GEN ("output end_o;\t/* Set when module finishes, cleared upon start_i == 1 */\n"); + GEN ("output busy_o;\t/* Set when module should not be interrupted */\n"); + GEN ("\n/* Bus signals */\n"); + GEN ("output l_req_o, s_req_o;\n"); + GEN ("input l_rdy_i, s_rdy_i;\n"); + GEN ("output [3:0] l_sel_o, s_sel_o;\n"); + GEN ("output [31:0] l_adr_o, s_adr_o;\n"); + GEN ("output l_linbrst_o, s_linbrst_o;\n"); + GEN ("input [31:0] l_dat_i;\n"); + GEN ("output [31:0] s_dat_o;\n\n"); + + GEN ("reg l_req_o, s_req_o;\n"); + GEN ("reg [31:0] l_adr_o, s_adr_o;\n"); + GEN ("reg [3:0] l_sel_o, s_sel_o;\n"); + GEN ("reg [31:0] s_dat_o;\n"); + GEN ("reg l_linbrst_o, s_linbrst_o;\n"); + + if (ci || co) GEN ("\n/* module ports */\n"); + if (ci) { + int first = 1; + GEN ("input [31:0]"); + for (i = 0; i < MAX_REGS; i++) + if (f->used_regs[i]) { + GEN ("%sr%i_i", first ? " " : ", ", i); + first = 0; + } + GEN (";\n"); + } + + if (co) { + int first = 1; + GEN ("output [31:0]"); + for (i = 0; i < MAX_REGS; i++) + if (f->lur[i] >= 0 && !f->saved_regs[i]) { + GEN ("%sr%i_o", first ? " " : ", ", i); + first = 0; + } + GEN (";\n"); + } + + if (f->nfdeps) { + GEN ("\n/* Function calls */\n"); + GEN ("output [31:0] fr3_o"); + for (i = 1; i < 6; i++) GEN (", fr%i_o", i + 3); + GEN (";\n"); + GEN ("input [31:0] fr11_i;\n"); + if (log2_int(f->nfdeps) > 0) GEN ("output [%i:0] fid_o;\n", log2_int(f->nfdeps)); + GEN ("output fstart_o;\n"); + GEN ("input fend_i;\n"); + } + + /* Count loads & stores */ + for (i = 0; i < f->nmsched; i++) + if (f->mtype[i] & MT_STORE) nstores++; + else if (f->mtype[i] & MT_LOAD) nloads++; + else if (f->mtype[i] & MT_CALL) ncalls++; + + /* Output internal registers for loads */ + if (nloads) { + int first = 1; + int num = 0; + GEN ("\n/* internal registers for loads */\n"); + for (i = 0; i < f->nmsched; i++) + if (f->mtype[i] & MT_LOAD) { + GEN ("%st%x_%x", first ? "reg [31:0] " : ", ", + REF_BB(f->msched[i]), REF_I(f->msched[i])); + + if (num >= 8) { + GEN (";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) GEN (";\n"); + } + + /* Internal register for function return value */ + if (f->nfdeps) { + GEN ("\n/* Internal register for function return value */\n"); + GEN ("reg [31:0] fr11_r;\n"); + } + + GEN ("\n/* 'zero or one' hot state machines */\n"); + if (nloads) GEN ("reg [%i:0] l_stb; /* loads */\n", nloads - 1); + if (nstores) GEN ("reg [%i:0] s_stb; /* stores */\n", nstores - 1); + GEN ("reg [%i:0] bb_stb; /* basic blocks */\n", f->num_bb - 1); + + { + int first = 2; + int num = 0; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_COND + && f->bb[b].insn[i].index != II_REG + && f->bb[b].insn[i].index != II_LRBB) { + if (first == 2) GEN ("\n/* basic block condition wires */\n"); + GEN ("%st%x_%x", first ? "wire " : ", ", b, i); + if (num >= 8) { + GEN (";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) GEN (";\n"); + + GEN ("\n/* forward declaration of normal wires */\n"); + num = 0; + first = 1; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (!(f->bb[b].insn[i].type & (IT_COND | IT_BRANCH)) + && f->bb[b].insn[i].index != II_REG + && f->bb[b].insn[i].index != II_LRBB) { + /* Exclude loads */ + if (f->bb[b].insn[i].type & IT_MEMORY && II_IS_LOAD (f->bb[b].insn[i].index)) continue; + GEN ("%st%x_%x", first ? "wire [31:0] " : ", ", b, i); + if (num >= 8) { + GEN (";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) GEN (";\n"); + + GEN ("\n/* forward declaration registers */\n"); + num = 0; + first = 1; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].index == II_REG + && f->bb[b].insn[i].index != II_LRBB) { + GEN ("%st%x_%x", first ? "reg [31:0] " : ", ", b, i); + if (num >= 8) { + GEN (";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) GEN (";\n"); + + num = 0; + first = 1; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].index != II_REG + && f->bb[b].insn[i].index == II_LRBB) { + GEN ("%st%x_%x", first ? "reg " : ", ", b, i); + if (num >= 8) { + GEN (";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) GEN (";\n"); + } + + if (nloads || nstores) GEN ("\n/* dependencies */\n"); + if (nloads) GEN ("wire [%i:0] l_end = l_stb & {%i{l_rdy_i}};\n", + nloads - 1, nloads); + if (nstores) GEN ("wire [%i:0] s_end = s_stb & {%i{s_rdy_i}};\n", + nstores - 1, nstores); + if (ncalls) GEN ("wire [%i:0] f_end = f_stb & {%i{fend_i}};\n", + ncalls - 1, ncalls); + + GEN ("\n/* last dependency */\n"); + GEN ("wire end_o = "); + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < 2; i++) if (f->bb[b].next[i] == BBID_END) { + GEN ("bb_stb[%i]", b); + if (f->bb[b].mdep) { + GEN (" && "); + print_deps (fo, f, b, f->bb[b].mdep, 0); + } + /* Is branch to BBID_END conditional? */ + if (f->bb[b].next[1 - i] >= 0) { + int bidx = branch_index (&f->bb[b]); + char t[30]; + print_op_v (f, t, REF (b, bidx), 1); + GEN (" && %s%s", i ? "" : "!", t); + } + } + } + GEN (";\n"); + GEN ("wire busy_o = |bb_stb;\n"); + + + GEN ("\n/* Basic block triggers */\n"); + GEN ("wire [%2i:0] bb_start = {\n", f->num_bb - 1); + for (b = f->num_bb - 1; b >= 0; b--) { + GEN (" /* bb_start[%2i] */ ", b); + for (i = 0; i < 2; i++) if (f->bb[b].prev[i] >= 0 && f->bb[b].prev[i] != BBID_START) { + cuc_bb *prev = &f->bb[f->bb[b].prev[i]]; + int t; + if (i) GEN ("\n || "); + if (prev->mdep) { + print_deps (fo, f, f->bb[b].prev[i], prev->mdep, 0); + GEN (" && "); + } + GEN ("bb_stb[%i]", f->bb[b].prev[i]); + if (prev->next[0] >= 0 && prev->next[0] != BBID_END + && prev->next[1] >= 0 && prev->next[1] != BBID_END) { + int bi = REF (f->bb[b].prev[i], branch_index (&f->bb[f->bb[b].prev[i]])); + int ci; + assert (bi >= 0); + ci = f->INSN(bi).op[1]; + t = prev->next[0] == b; + GEN (" && "); + if (f->INSN(bi).opt[1] & OPT_REF) { + GEN ("%st%x_%x", t ? "" : "!", REF_BB(ci), REF_I(ci)); + } else { + cucdebug (5, "%x!%x!%x\n", bi, ci, f->INSN(bi).opt[1]); + assert (f->INSN(bi).opt[1] & OPT_CONST); + GEN ("%s%i", t ? "" : "!", ci); + } + } + } else break; + if (!i) GEN ("start_i"); + if (b == 0) GEN ("};\n"); + else GEN (",\n"); + } + + GEN ("\n/* Register the bb_start */\n"); + GEN ("reg [%2i:0] bb_start_r;\n\n", f->num_bb - 1); + GEN ("always @(posedge rst or posedge clk)\n"); + GEN ("begin\n"); + GEN (" if (rst) bb_start_r <= #Tp %i'b0;\n", f->num_bb); + GEN (" else if (end_o) bb_start_r <= #Tp %i'b0;\n", f->num_bb); + GEN (" else bb_start_r <= #Tp bb_start;\n"); + GEN ("end\n"); + + GEN ("\n/* Logic */\n"); + /* output body */ + for (b = 0; b < f->num_bb; b++) { + GEN ("\t\t/* BB%i */\n", b); + for (i = 0; i < f->bb[b].ninsn; i++) + print_insn_v (fo, f, b, i); + GEN ("\n"); + } + + if (co) { + GEN ("\n/* Outputs */\n"); + for (i = 0; i < MAX_REGS; i++) + if (f->lur[i] >= 0 && !f->saved_regs[i]) + GEN ("assign r%i_o = t%x_%x;\n", i, REF_BB(f->lur[i]), + REF_I(f->lur[i])); + } + + if (nstores) { + int cur_store; + GEN ("\n/* Memory stores */\n"); + GEN ("always @(s_stb"); + for (i = 0; i < f->nmsched; i++) + if (f->mtype[i] & MT_STORE) { + char t[30]; + unsigned long opt = f->INSN(f->msched[i]).opt[0]; + if ((opt & ~OPT_DEST) != OPT_CONST) { + GEN (" or %s", print_op_v (f, t, f->msched[i], 0)); + } + } + + cur_store = 0; + GEN (")\nbegin\n"); + for (i = 0; i < f->nmsched; i++) if (f->mtype[i] & MT_STORE) { + char t[30]; + GEN (" %sif (s_stb[%i]) s_dat_o = %s;\n", cur_store == 0 ? "" : "else ", cur_store, + print_op_v (f, t, f->msched[i], 0)); + cur_store++; + //PRINTF ("msched[%i] = %x (mtype %x) %x\n", i, f->msched[i], f->mtype[i], f->INSN(f->msched[i]).op[0]); + } + GEN (" else s_dat_o = 32'hx;\n"); + GEN ("end\n"); + } + + /* Generate load and store state machine */ +#if 0 + GEN ("\n/* Load&store state machine */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN (" if (rst) begin\n"); + if (nloads) GEN (" l_stb <= #Tp %i'h0;\n", nloads); + if (nstores) GEN (" s_stb <= #Tp %i'h0;\n", nstores); + GEN (" end else begin\n"); + for (i = 0; i < f->nmsched; i++) if (f->mtype[i] & MT_LOAD || f->mtype[i] & MT_STORE) { + int cur = 0; + dep_list *dep = f->INSN(f->msched[i]).dep; + assert (f->INSN(f->msched[i]).opt[1] & (OPT_REF | OPT_REGISTER)); + GEN (" if ("); + print_deps (fo, f, REF_BB(f->msched[i]), f->INSN(f->msched[i]).dep, 1); + GEN (") begin\n"); + print_turn_off_dep (fo, f, dep); + GEN (" %c_stb[%i] <= #Tp 1'b1;\n", f->mtype[i] & MT_LOAD ? 'l' : 's', cur++); + GEN (" end\n"); + } + GEN (" if (%c_end[%i]) %c_stb <= #Tp %i'h0;\n", c, cur - 1, c, cur); + GEN (" end\n"); +#endif + + /* Generate state generator machine */ + for (j = 0; j < 2; j++) { + char c; + char *s; + + switch (j) { + case 0: c = 'l'; s = "Load"; break; + case 1: c = 's'; s = "Store"; break; + case 2: c = 'c'; s = "Calls"; break; + } + if ((j == 0 && nloads) + || (j == 1 && nstores) + || (j == 2 && ncalls)) { + int cur = 0; + char t[30]; + + GEN ("\n/* %s state generator machine */\n", s); + GEN ("always @("); + for (i = 0; i < f->nmsched; i++) { + print_op_v (f, t, f->msched[i], 1); + GEN ("%s or ", t); + } + GEN ("bb_start_r"); + if (nloads) GEN (" or l_end"); + if (nstores) GEN (" or s_end"); + GEN (")\n"); + GEN ("begin\n "); + cucdebug (1, "%s\n", s); + for (i = 0; i < f->nmsched; i++) + if ((j == 0 && f->mtype[i] & MT_LOAD) + || (j == 1 && f->mtype[i] & MT_STORE) + || (j == 2 && f->mtype[i] & MT_CALL)) { + cucdebug (1, "msched[%i] = %x (mtype %x)\n", i, f->msched[i], f->mtype[i]); + assert (f->INSN(f->msched[i]).opt[1] & (OPT_REF | OPT_REGISTER)); + GEN ("if ("); + print_deps (fo, f, REF_BB(f->msched[i]), f->INSN(f->msched[i]).dep, 1); + GEN (") begin\n"); + GEN (" %c_req_o = 1'b1;\n", c); + GEN (" %c_sel_o[3:0] = 4'b", c); + switch (f->mtype[i] & MT_WIDTH) { + case 1: GEN ("0001 << (%s & 32'h3);\n", + print_op_v (f, t, f->msched[i], 1)); break; + case 2: GEN ("0011 << ((%s & 32'h1) << 1);\n", + print_op_v (f, t, f->msched[i], 1)); break; + case 4: GEN ("1111;\n"); break; + default: assert (0); + } + GEN (" %c_linbrst_o = 1'b%i;\n", c, + (f->mtype[i] & MT_BURST) && !(f->mtype[i] & MT_BURSTE) ? 1 : 0); + GEN (" %c_adr_o = t%lx_%lx & ~32'h3;\n", c, + REF_BB(f->INSN(f->msched[i]).op[1]), REF_I(f->INSN(f->msched[i]).op[1])); + GEN (" end else "); + } + GEN ("if (%c_end[%i]) begin\n", c, cur - 1); + GEN (" %c_req_o = 1'b0;\n", c); + GEN (" %c_sel_o[3:0] = 4'bx;\n", c); + GEN (" %c_linbrst_o = 1'b0;\n", c); + GEN (" %c_adr_o = 32'hx;\n", c); + GEN (" end else begin\n"); + GEN (" %c_req_o = 1'b0;\n", c); + GEN (" %c_sel_o[3:0] = 4'bx;\n", c); + GEN (" %c_linbrst_o = 1'b0;\n", c); + GEN (" %c_adr_o = 32'hx;\n", c); + GEN (" end\n"); + GEN ("end\n"); + } + } + + if (ncalls) { + int cur_call = 0; + GEN ("\n/* Function calls state machine */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN ("begin\n"); + GEN (" if (rst) begin\n"); + GEN (" f_stb <= #Tp %i'h0;\n", nstores); + for (i = 0; i < 6; i++) GEN (" fr%i_o <= #Tp 32'h0;\n", i + 3); + if (log2_int(ncalls)) GEN (" fid_o <= #Tp %i'h0;\n", log2_int (f->nfdeps)); + GEN (" fstart_o <= #Tp 1'b0;\n"); + //GEN (" f11_r <= #Tp 32'h0;\n"); + GEN (" end else begin\n"); + cucdebug (1, "calls \n"); + for (i = 0; i < f->nmsched; i++) if (f->mtype[i] & MT_CALL) { + dep_list *dep = f->INSN(f->msched[i]).dep; + cucdebug (1, "msched[%i] = %x (mtype %x)\n", i, f->msched[i], f->mtype[i]); + assert (f->INSN(f->msched[i]).opt[1] & (OPT_REF | OPT_REGISTER)); + GEN (" if ("); + print_deps (fo, f, REF_BB(f->msched[i]), f->INSN(f->msched[i]).dep, 1); + GEN (") begin\n"); + print_turn_off_dep (fo, f, dep); + GEN (" f_stb[%i] <= #Tp 1'b1;\n", cur_call++); + GEN (" fstart_o <= #Tp 1'b1;\n"); + if (log2_int (f->nfdeps)) + GEN (" fid_o <= #Tp %i'h%x;\n", log2_int (f->nfdeps), func_index (f, f->msched[i])); + + for (j = 0; j < 6; j++) + GEN (" fr%i_o <= #Tp t%x_%x;\n", j + 3, + REF_BB (f->msched[i]), REF_I (f->msched[i]) - 6 + i); + GEN (" end\n"); + } + GEN (" if (f_end[%i]) begin\n", ncalls - 1); + GEN (" f_stb <= #Tp %i'h0;\n", ncalls); + GEN (" f_start_o <= #Tp 1'b0;\n"); + GEN (" end\n"); + GEN (" end\n"); + GEN ("end\n"); + } + + GEN ("\n/* Basic blocks state machine */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN ("begin\n"); + GEN (" if (rst) bb_stb <= #Tp %i'h%x;\n", f->num_bb, 0); + GEN (" else if (end_o) begin\n"); + GEN (" bb_stb <= #Tp %i'h%x;\n", f->num_bb, 0); + for (i = 0; i < f->num_bb; i++) { + GEN (" end else if (bb_start[%i]) begin\n", i); + GEN (" bb_stb <= #Tp %i'h%x;\n", f->num_bb, 1 << i); + } + GEN (" end else if (end_o) begin\n"); + GEN (" bb_stb <= #Tp %i'h%x;\n", f->num_bb, 0); + GEN (" end\n"); + GEN ("end\n"); + + /* output footer */ + GEN ("\nendmodule\n"); + + fclose (fo); +} + +void generate_main (int nfuncs, cuc_func **f, char *filename) +{ + FILE *fo; + int i, j, nrf, first; + char tmp[256]; + int ncallees[MAX_FUNCS]; + int nl[MAX_FUNCS], ns[MAX_FUNCS]; + int maxncallees = 0; + sprintf (tmp, "%s_top.v", filename); + + for (i = 0, nrf = 0; i < nfuncs; i++) { + nl[i] = ns[i] = 0; + ncallees[i] = 0; + if (f[i]) { + f[i]->tmp = nrf++; + for (j = 0; j < f[i]->nmsched; j++) + if (f[i]->mtype[j] & MT_LOAD) nl[i]++; + else if (f[i]->mtype[j] & MT_STORE) ns[i]++; + for (j = 0; j < f[i]->nfdeps; j++) + ncallees[f[i]->fdeps[j]->tmp]++; + } + } + if (!nrf) return; + + for (i = 0; i < nrf; i++) + if (maxncallees < ncallees[i]) maxncallees = ncallees[i]; + + log ("Generating verilog file \"%s\"\n", tmp); + PRINTF ("Generating verilog file \"%s\"\n", tmp); + if ((fo = fopen (tmp, "wt+")) == NULL) { + fprintf (stderr, "Cannot open '%s'\n", tmp); + exit (1); + } + + /* output header */ + GEN ("/* %s -- generated by Custom Unit Compiler\n", tmp); + GEN (" (C) 2002 Opencores */\n\n"); + GEN ("/* Includes %i functions:", nrf); + for (i = 0; i < nfuncs; i++) if (f[i]) + GEN ("\n%s", prof_func[i].name); + GEN (" */\n\n"); + + GEN ("`include \"timescale.v\"\n\n"); + GEN ("module %s (clk, rst,\n", filename); + GEN (" /* Load and store master Wishbone ports */\n"); + GEN (" l_adr_o, l_dat_i, l_cyc_o, l_stb_o,\n"); + GEN (" l_sel_o, l_linbrst_o, l_rdy_i, l_we_o,\n"); + GEN (" s_adr_o, s_dat_o, s_cyc_o, s_stb_o,\n"); + GEN (" s_sel_o, s_linbrst_o, s_rdy_i, s_we_o,\n\n"); + GEN (" /* cuc interface */\n"); + GEN (" cuc_stb_i, cuc_adr_i, cuc_dat_i, cuc_dat_o, cuc_we_i, cuc_rdy_o);\n\n"); + + GEN ("parameter Tp = 1;\n"); + GEN ("\n/* module ports */\n"); + GEN ("input clk, rst, cuc_stb_i, cuc_we_i;\n"); + GEN ("input l_rdy_i, s_rdy_i;\n"); + GEN ("output l_cyc_o, l_stb_o, l_we_o, l_linbrst_o;\n"); + GEN ("reg l_cyc_o, l_stb_o, l_we_o, l_linbrst_o;\n"); + GEN ("output s_cyc_o, s_stb_o, s_we_o, s_linbrst_o;\n"); + GEN ("reg s_cyc_o, s_stb_o, s_we_o, s_linbrst_o;\n"); + GEN ("output cuc_rdy_o; /* Not registered ! */\n"); + GEN ("output [3:0] l_sel_o, s_sel_o;\n"); + GEN ("reg [3:0] l_sel_o, s_sel_o;\n"); + GEN ("output [31:0] l_adr_o, s_adr_o, s_dat_o, cuc_dat_o;\n"); + GEN ("reg [31:0] l_adr_o, s_adr_o, s_dat_o, cuc_dat_o;\n"); + GEN ("input [15:0] cuc_adr_i;\n"); + GEN ("input [31:0] l_dat_i, cuc_dat_i;\n\n"); + + GEN ("wire [%2i:0] i_we, i_re, i_finish, i_selected, i_first_reg;\n", nrf - 1); + GEN ("wire [%2i:0] i_bidok, i_start_bid, i_start_bidok, main_start, main_end;\n", nrf - 1); + GEN ("wire [%2i:0] i_start, i_end, i_start_block, i_busy;\n", nrf - 1); + GEN ("wire [%2i:0] i_l_req, i_s_req;\n", nrf - 1); + GEN ("reg [%2i:0] i_go_bsy, main_start_r;\n", nrf - 1); + + GEN ("assign i_selected = {\n"); + for (i = 0; i < nrf; i++) + GEN (" cuc_adr_i[15:6] == %i%s\n", i, i < nrf - 1 ? "," : "};"); + + GEN ("assign i_first_reg = {\n"); + for (i = 0; i < nfuncs; i++) if (f[i]) { + for (j = 0; j <= MAX_REGS; j++) if (f[i]->used_regs[j]) break; + GEN (" cuc_adr_i[5:0] == %i%s\n", j, f[i]->tmp < nrf - 1 ? "," : "};"); + } + + GEN ("assign i_we = {%i{cuc_stb_i && cuc_we_i}} & i_selected;\n", nrf); + GEN ("assign i_re = {%i{cuc_stb_i && !cuc_we_i}} & i_selected;\n", nrf); + + GEN ("assign i_start = i_go_bsy & {%i{cuc_rdy_o}};\n", nrf); + GEN ("assign i_start_bidok = {\n"); + for (i = 0; i < nrf; i++) + GEN (" i_start_bid[%i] < %i%s\n", i, i, i < nrf - 1 ? "," : "};"); + GEN ("assign main_start = i_start & i_selected & i_first_reg & i_we;\n"); + GEN ("assign main_end = {%i{i_end}} & i_selected;\n"); + + GEN ("\nalways @(posedge clk or posedge rst)\n"); + GEN ("begin\n"); + GEN (" if (rst) i_go_bsy <= #Tp %i'b0;\n", nrf); + GEN (" else i_go_bsy <= #Tp i_we | ~i_finish & i_go_bsy;\n"); + GEN ("end\n"); + + + /* Function specific data */ + for (i = 0; i < nfuncs; i++) if (f[i]) { + int ci = 0, co = 0; + int fn = f[i]->tmp; + GEN ("\n/* Registers for function %s */\n", prof_func[i].name); + for (j = 0, first = 1; j < MAX_REGS; j++) if (f[i]->used_regs[j]) { + GEN ("%s i%i_r%ii", first ? "/* inputs */\nreg [31:0]" : ",", fn, j); + first = 0; + ci++; + } + if (ci) GEN (";\n"); + + for (j = 0, first = 1; j < MAX_REGS; j++) + if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j]) { + GEN ("%s i%i_r%io", first ? "/* outputs */\nwire [31:0]" : ",", fn, j); + first = 0; + co++; + } + if (co) GEN (";\n"); + GEN ("wire [31:0] i%i_l_adr, i%i_s_adr;\n", fn, fn); + + GEN ("always @(posedge clk or posedge rst)\n"); + GEN (" if (rst) main_start_r <= #Tp %i'b0;\n", nrf); + GEN (" else main_start_r <= #Tp main_start & i_start_bidok | i_busy | ~i_end & main_start_r;\n"); + + if (ci) { + GEN ("\n/* write register access */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN ("begin\n"); + GEN (" if (rst) begin\n"); + for (j = 0; j < MAX_REGS; j++) if (f[i]->used_regs[j]) + GEN (" i%i_r%ii <= #Tp 32'h0;\n", fn, j); + GEN (" end else if (!i_go_bsy[%i] && i_we[%i])\n", fn, fn); + GEN (" case (cuc_adr_i[5:0])\n"); + for (j = 0; j < MAX_REGS; j++) if (f[i]->used_regs[j]) + GEN (" %-2i: i%i_r%ii <= #Tp cuc_dat_i;\n", j, fn, j); + GEN (" endcase\n"); + GEN ("end\n"); + } + + GEN ("\n"); + } + + /* Generate machine for reading all function registers. Register read can be + delayed till function completion */ + { + int co; + GEN ("/* read register access - data */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN (" if (rst) cuc_dat_o <= #Tp 32'h0;\n"); + GEN (" else if (cuc_stb_i && cuc_we_i) begin\n"); + GEN (" "); + + for (i = 0; i < nfuncs; i++) if (f[i]) { + co = 0; + for (j = 0; j < MAX_REGS; j++) + if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j]) co++; + + GEN ("if (cuc_adr_i[15:6] == %i)", f[i]->tmp); + if (co) { + first = 1; + GEN ("\n case (cuc_adr_i[5:0])\n"); + for (j = 0; j < MAX_REGS; j++) + if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j]) + GEN (" %-2i: cuc_dat_o <= #Tp i%i_r%io;\n", j, f[i]->tmp, j); + GEN (" endcase\n"); + } else { + GEN (" cuc_dat_o <= #Tp 32'hx;\n"); + } + GEN (" else "); + } + GEN ("cuc_dat_o <= #Tp 32'hx;\n"); + GEN (" end else cuc_dat_o <= #Tp 32'hx;\n"); + + GEN ("\n/* read register access - acknowledge */\n"); + GEN ("assign cuc_rdy_o = cuc_stb_i && cuc_we_i && |(i_selected & main_end);\n"); + } + + /* Store/load Wishbone bridge */ + for (j = 0; j < 2; j++) { + char t = j ? 's' : 'l'; + GEN ("\n/* %s Wishbone bridge */\n", j ? "store" : "load"); + GEN ("reg [%i:0] %cm_sel;\n", log2_int (nrf), t); + GEN ("reg [%i:0] %cm_bid;\n", log2_int (nrf), t); + GEN ("reg %ccyc_ip;\n\n", t); + GEN ("always @(posedge clk)\n"); + GEN ("begin\n"); + GEN (" %c_we_o <= #Tp 1'b%i;\n", t, j); + GEN (" %c_cyc_o <= #Tp |i_%c_req;\n", t, t); + GEN (" %c_stb_o <= #Tp |i_%c_req;\n", t, t); + GEN ("end\n"); + + GEN ("\n/* highest bid */\n"); + GEN ("always @("); + for (i = 0; i < nrf; i++) GEN ("%si_%c_req", i > 0 ? " or " : "", t); + GEN (")\n"); + for (i = 0; i < nrf; i++) GEN (" %sif (i_%c_req) %cm_bid = %i'h%x;\n", + i ? "else " : "", t, t, log2_int (nrf) + 1, i); + + GEN ("\n/* selected transfer */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN (" if (rst) %cm_sel <= #Tp %i'h0;\n", t, log2_int (nrf) + 1); + GEN (" else if (%c_rdy_i) %cm_sel <= #Tp %i'h0;\n", t, t, log2_int (nrf) + 1); + GEN (" else if (!%ccyc_ip) %cm_sel <= #Tp %cm_bid;\n", t, t, t); + + GEN ("\n/* Cycle */\n"); + GEN ("\nalways @(posedge clk or posedge rst)\n"); + GEN (" if (rst) %ccyc_ip <= #Tp 1'b0;\n", t); + GEN (" else if (%c_rdy_i) %ccyc_ip <= #Tp 1'b0;\n", t, t); + GEN (" else %ccyc_ip <= #Tp %c_cyc_o;\n", t, t); + } + + GEN ("\n/* Acknowledge */\n"); + for (i = 0; i < nrf; i++) { + GEN ("wire i%i_s_rdy = ((sm_bid == %i & !scyc_ip) | sm_sel == %i) & s_rdy_i;\n", i, i, i); + GEN ("wire i%i_l_rdy = ((lm_bid == %i & !lcyc_ip) | lm_sel == %i) & l_rdy_i;\n", i, i, i); + } + + GEN ("\n/* data, address selects and burst enables */\n"); + for (i = 0; i < nrf; i++) GEN ("wire [31:0] i%i_s_dat;\n", i); + for (i = 0; i < nrf; i++) GEN ("wire i%i_s_linbrst, i%i_l_linbrst;\n", i, i); + for (i = 0; i < nrf; i++) GEN ("wire [3:0] i%i_s_sel, i%i_l_sel;\n", i, i); + for (i = 0; i < nrf; i++) GEN ("wire [31:0] i%i_l_dat = l_dat_i;\n", i); + GEN ("\nalways @(posedge clk)\n"); + GEN ("begin\n"); + GEN (" s_dat_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n sm_bid == %i ? i%i_s_dat : ", i, i); + GEN ("i%i_s_dat;\n", nrf - 1); + GEN (" s_adr_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n sm_bid == %i ? i%i_s_adr : ", i, i); + GEN ("i%i_s_adr;\n", nrf - 1); + GEN (" s_sel_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n sm_bid == %i ? i%i_s_sel : ", i, i); + GEN ("i%i_s_sel;\n", nrf - 1); + GEN (" s_linbrst_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n sm_bid == %i ? i%i_s_linbrst : ", i, i); + GEN ("i%i_s_linbrst;\n", nrf - 1); + GEN ("end\n\n"); + + GEN ("always @(posedge clk)\n"); + GEN ("begin\n"); + GEN (" l_adr_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n lm_bid == %i ? i%i_l_adr : ", i, i); + GEN ("i%i_l_adr;\n", nrf - 1); + GEN (" l_sel_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n lm_bid == %i ? i%i_l_sel : ", i, i); + GEN ("i%i_l_sel;\n", nrf - 1); + GEN (" l_linbrst_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n lm_bid == %i ? i%i_l_linbrst : ", i, i); + GEN ("i%i_l_linbrst;\n", nrf - 1); + GEN ("end\n\n"); + + /* start/end signals */ + GEN ("\n\n/* start/end signals */\n"); + for (i = 0; i < nrf; i++) { + if (log2_int (maxncallees + 1)) + GEN ("wire [%i:0] i%i_current = i%i_busy ? i%i_current_r : i%i_start_bid;\n", + log2_int (maxncallees + 1), i, i, i, i); + else GEN ("wire i%i_current = 0;\n", i); + } + GEN ("\n"); + + for (i = 0, j = 0; i < nfuncs; i++) if (f[i]) { + if (log2_int (ncallees[i])) { + GEN ("reg [%i:0] i%i_start_bid;\n", log2_int (ncallees[i]), j); + GEN ("always @(start%i", f[i]->tmp); + for (j = 0, first = 1; j < f[i]->nfdeps; j++) + if (f[i]->fdeps[j]) GEN (", "); + GEN (")\n"); + GEN ("begin !!!\n"); //TODO + GEN (" \n"); + GEN ("end\n"); + } + GEN ("wire i%i_start = main_start[%i];\n", j, j); + j++; + } + GEN ("\n"); + + for (i = 0; i < nfuncs; i++) if (f[i]) { + int nf = f[i]->tmp; + GEN ("\n%s%s i%i(.clk(clk), .rst(rst),\n", filename, prof_func[i].name, nf); + GEN (" .l_adr_o(i%i_l_adr), .l_dat_i(i%i_l_dat), .l_req_o(i_l_req[%i]),\n", + nf, nf, nf); + GEN (" .l_sel_o(i%i_l_sel), .l_linbrst_o(i%i_l_linbrst), .l_rdy_i(i%i_l_rdy),\n", + nf, nf, nf); + GEN (" .s_adr_o(i%i_s_adr), .s_dat_o(i%i_s_dat), .s_req_o(i_s_req[%i]),\n", + nf, nf, nf); + GEN (" .s_sel_o(i%i_s_sel), .s_linbrst_o(i%i_s_linbrst), .s_rdy_i(i%i_s_rdy),\n", + nf, nf, nf); + GEN (" "); + for (j = 0, first = 1; j < MAX_REGS; j++) if (f[i]->used_regs[j]) + GEN (".r%i_i(i%i_r%ii), ", j, nf, j), first = 0; + + if (first) GEN ("\n "); + for (j = 0, first = 1; j < MAX_REGS; j++) + if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j]) + GEN (".r%i_o(i%i_r%io), ", j, nf, j), first = 0; + if (first) GEN ("\n "); + if (f[i]->nfdeps) { + GEN (".fstart_o(i_fstart[%i]), .fend_i(i_fend[%i]), .fid_o(i%i_fid),\n", i, i, i), + GEN (" .fr3_o(i%i_fr3), .fr4_o(i%i_fr4), .fr5_o(i%i_fr5), .fr6_o(i%i_fr6),\n"); + GEN (" .fr7_o(i%i_fr7), .fr8_o(i%i_fr8), .fr11_i(i%i_fr11i),\n "); + } + GEN (".start_i(i_start[%i]), .end_o(i_end[%i]), .busy_o(i_busy[%i]));\n", nf, nf, nf); + } + + /* output footer */ + GEN ("\nendmodule\n"); + + fclose (fo); +} +
verilog.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: cuc.c =================================================================== --- cuc.c (nonexistent) +++ cuc.c (revision 1765) @@ -0,0 +1,876 @@ +/* cuc.c -- OpenRISC Custom Unit Compiler + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Main file, including code optimization and command prompt */ + +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "abstract.h" +#include "sim-config.h" +#include "cuc.h" +#include "insn.h" +#include "profiler.h" +#include "opcode/or32.h" +#include "parse.h" +#include "debug.h" + +FILE *flog; +int cuc_debug = 0; + +/* Last used registers by software convention */ +/* Note that r11 is caller saved register, and we can destroy it. + Due to CUC architecture we must always return something, even garbage (so that + caller knows, we are finished, when we send acknowledge). + In case r11 was not used (trivial register assignment) we will remove it later, + but if we assigned a value to it, it must not be removed, so caller_saved[11] = 0 */ +const int caller_saved[MAX_REGS] = { + 0, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 0, 0, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 1, 1}; + +/* Does all known instruction optimizations */ +void cuc_optimize (cuc_func *func) +{ + int modified = 0; + int first = 1; + log ("Optimizing.\n"); + do { + modified = 0; + clean_deps (func); + if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_CLEAN_DEPS"); + if (optimize_cmovs (func)) { + if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_CMOVS"); + modified = 1; + } + if (cuc_debug) cuc_check (func); + if (optimize_tree (func)) { + if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_TREE1"); + modified = 1; + } + if (remove_nops (func)) { + if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS"); + modified = 1; + } + if (cuc_debug) cuc_check (func); + if (remove_dead (func)) { + if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD"); + modified = 1; + } + if (cuc_debug) cuc_check (func); + if (cse (func)) { + log ("Common subexpression elimination.\n"); + if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_CSE"); + modified = 1; + } + if (first) { + insert_conditional_facts (func); + if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_COND_FACT"); + if (cuc_debug) cuc_check (func); + first = 0; + } + if (optimize_bb (func)) { + if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_OPT_BB"); + modified = 1; + } + if (cuc_debug) cuc_check (func); + if (remove_nops (func)) { + if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS"); + modified = 1; + } + if (remove_dead_bb (func)) { + if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD_BB"); + modified = 1; + } + if (remove_trivial_regs (func)) { + if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL"); + modified = 1; + } + if (remove_nops (func)) { + if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS"); + modified = 1; + } + add_memory_dep (func, func->memory_order); + if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP"); + add_data_dep (func); + if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP"); + if (schedule_memory (func, func->memory_order)) { + if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM"); + modified = 1; + } + } while (modified); + set_io (func); +#if 0 + detect_max_values (func); + if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_MAX_VALUES"); +#endif +} + +/* Pre/unrolls basic block and optimizes it */ +cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j) +{ + cuc_func *func; + cucdebug (2, "BB%i unroll %i times preroll %i times\n", b, j, i); + log ("BB%i unroll %i times preroll %i times\n", b, j, i); + func = preunroll_loop (f, b, i, j, bb_filename); + if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_PREUNROLL"); + cuc_optimize (func); + analyse_timings (func, timings); + + cucdebug (2, "new_time = %i, old_time = %i, size = %f\n", + timings->new_time, func->orig_time, timings->size); + log ("new time = %icyc, old_time = %icyc, size = %.0f gates\n", + timings->new_time, func->orig_time, timings->size); + //output_verilog (func, argv[1]); + free_func (func); + timings->b = b; + timings->unroll = j; + timings->preroll = i; + timings->nshared = 0; + return timings; +} + +/* Simple comparison function */ +int tim_comp (cuc_timings *a, cuc_timings *b) +{ + if (a->new_time < b->new_time) return -1; + else if (a->new_time > b->new_time) return 1; + else return 0; +} + +/* Analyses function; done when cuc command is entered in (sim) prompt */ +cuc_func *analyse_function (char *module_name, long orig_time, + unsigned long start_addr, unsigned long end_addr, + int memory_order, int num_runs) +{ + cuc_timings timings; + cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func)); + cuc_func *saved; + int b, i, j; + char tmp1[256]; + char tmp2[256]; + + func->orig_time = orig_time; + func->start_addr = start_addr; + func->end_addr = end_addr; + func->memory_order = memory_order; + func->nfdeps = 0; + func->fdeps = NULL; + func->num_runs = num_runs; + + sprintf (tmp1, "%s.bin", module_name); + cucdebug (2, "Loading %s.bin\n", module_name); + if (cuc_load (tmp1)) { + free (func); + return NULL; + } + + log ("Detecting basic blocks\n"); + detect_bb (func); + if (cuc_debug >= 2) print_cuc_insns ("WITH_BB_LIMITS", 0); + + //sprintf (tmp1, "%s.bin.mp", module_name); + sprintf (tmp2, "%s.bin.bb", module_name); + generate_bb_seq (func, config.sim.mprof_fn, tmp2); + log ("Assuming %i clk cycle load (%i cyc burst)\n", runtime.cuc.mdelay[0], runtime.cuc.mdelay[2]); + log ("Assuming %i clk cycle store (%i cyc burst)\n", runtime.cuc.mdelay[1], runtime.cuc.mdelay[3]); + + build_bb (func); + if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_BUILD_BB"); + reg_dep (func); + + log ("Detecting dependencies\n"); + if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_REG_DEP"); + cuc_optimize (func); + +#if 0 + csm (func); +#endif + assert (saved = dup_func (func)); + + timings.preroll = timings.unroll = 1; + timings.nshared = 0; + + add_latches (func); + if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES"); + analyse_timings (func, &timings); + + free_func (func); + log ("Base option: pre%i,un%i,sha%i: %icyc %.1f\n", + timings.preroll, timings.unroll, timings.nshared, timings.new_time, timings.size); + saved->timings = timings; + +#if 1 + /* detect and unroll simple loops */ + for (b = 0; b < saved->num_bb; b++) { + cuc_timings t[MAX_UNROLL * MAX_PREROLL]; + cuc_timings *ut; + cuc_timings *cut = &t[0]; + int nt = 1; + double csize; + saved->bb[b].selected_tim = -1; + + /* Is it a loop? */ + if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue; + log ("Found loop at BB%x. Trying to unroll.\n", b); + t[0] = timings; + t[0].b = b; + t[0].preroll = 1; + t[0].unroll = 1; + t[0].nshared = 0; + + sprintf (tmp1, "%s.bin.bb", module_name); + i = 1; + do { + cuc_timings *pt; + cuc_timings *cpt = cut; + j = 1; + + do { + pt = cpt; + cpt = preunroll_bb (tmp1, saved, &t[nt++], b, ++j, i); + } while (j <= MAX_PREROLL && pt->new_time > cpt->new_time); + i++; + ut = cut; + cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i); + } while (i <= MAX_UNROLL && ut->new_time > cut->new_time); + + /* Sort the timings */ +#if 0 + if (cuc_debug >= 3) + for (i = 0; i < nt; i++) PRINTF ("%i:%i,%i: %icyc\n", + t[i].b, t[i].preroll, t[i].unroll, t[i].new_time); +#endif + + qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp); + + /* Delete timings, that have worst time and bigger size than other */ + j = 1; + csize = t[0].size; + for (i = 1; i < nt; i++) + if (t[i].size < csize) t[j++] = t[i]; + nt = j; + + cucdebug (1, "Available options\n"); + for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n", + t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size); + /* Add results from CSM */ + j = nt; + for (i = 0; i < saved->bb[b].ntim; i++) { + int i1; + for (i1 = 0; i1 < nt; i1++) { + t[j] = t[i1]; + t[j].size += saved->bb[b].tim[i].size - timings.size; + t[j].new_time += saved->bb[b].tim[i].new_time - timings.new_time; + t[j].nshared = saved->bb[b].tim[i].nshared; + t[j].shared = saved->bb[b].tim[i].shared; + if (++j >= MAX_UNROLL * MAX_PREROLL) goto full; + } + } + +full: + nt = j; + + cucdebug (1, "Available options:\n"); + for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n", + t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size); + + /* Sort again with new timings added */ + qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp); + + /* Delete timings, that have worst time and bigger size than other */ + j = 1; + csize = t[0].size; + for (i = 1; i < nt; i++) + if (t[i].size < csize) t[j++] = t[i]; + nt = j; + + cucdebug (1, "Available options:\n"); + for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n", + t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size); + + if (saved->bb[b].ntim) free (saved->bb[b].tim); + saved->bb[b].ntim = nt; + assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt)); + + /* Copy options in reverse order -- smallest first */ + for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i]; + + log ("Available options:\n"); + for (i = 0; i < saved->bb[b].ntim; i++) { + log ("%i:pre%i,un%i,sha%i: %icyc %.1f\n", + saved->bb[b].tim[i].b, saved->bb[b].tim[i].preroll, saved->bb[b].tim[i].unroll, + saved->bb[b].tim[i].nshared, saved->bb[b].tim[i].new_time, saved->bb[b].tim[i].size); + } + } +#endif + return saved; +} + +/* Utility option formatting functions */ +static const char *option_char = "?abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + +/*static */char *gen_option (char *s, int bb_no, int f_opt) +{ + if (bb_no >= 0) sprintf (s, "%i", bb_no); + assert (f_opt <= strlen (option_char)); + sprintf (s, "%s%c", s, option_char[f_opt]); + return s; +} + +/*static */void print_option (int bb_no, int f_opt) +{ + char tmp1[10]; + char tmp2[10]; + sprintf (tmp2, "%s", gen_option (tmp1, bb_no, f_opt)); + PRINTF ("%3s", tmp2); +} + +static char *format_func_options (char *s, cuc_func *f) +{ + int b, first = 1; + *s = '\0'; + for (b = 0; b < f->num_bb; b++) + if (f->bb[b].selected_tim >= 0) { + char tmp[10]; + sprintf (s, "%s%s%s", s, first ? "" : ",", gen_option (tmp, b, f->bb[b].selected_tim)); + first = 0; + } + return s; +} + +static void options_cmd (int func_no, cuc_func *f) +{ + int b, i; + char tmp[30]; + char *name = prof_func[func_no].name; + PRINTF ("-----------------------------------------------------------------------------\n"); + PRINTF ("|%-28s|pre/unrolled|shared| time | gates |old_time|\n", + strstrip (tmp, name, 28)); + PRINTF ("| BASE |%4i / %4i | %4i |%8i|%8.f|%8i|\n", 1, 1, 0, + f->timings.new_time, f->timings.size, f->orig_time); + for (b = 0; b < f->num_bb; b++) { + /* Print out results */ + for (i = 1; i < f->bb[b].ntim; i++) { /* First one is base option */ + int time = f->bb[b].tim[i].new_time - f->timings.new_time; + double size = f->bb[b].tim[i].size - f->timings.size; + PRINTF ("| "); + print_option (b, i); + PRINTF (" |%4i / %4i | %4i |%+8i|%+8.f| |\n", + f->bb[b].tim[i].preroll, f->bb[b].tim[i].unroll, f->bb[b].tim[i].nshared, + time, size); + } + } +} + +/* Generates a function, based on specified parameters */ +cuc_func *generate_function (cuc_func *rf, char *name, char *cut_filename) +{ + int b; + char tmp[256]; + cuc_timings tt; + cuc_func *f; + assert (f = dup_func (rf)); + + if (cuc_debug >= 2) print_cuc_bb (f, "BEFORE_GENERATE"); + log ("Generating function %s.\n", name); + PRINTF ("Generating function %s.\n", name); + + format_func_options (tmp, rf); + if (strlen (tmp)) PRINTF ("Applying options: %s\n", tmp); + else PRINTF ("Using basic options.\n"); + + /* Generate function as specified by options */ + for (b = 0; b < f->num_bb; b++) { + cuc_timings *st; + if (rf->bb[b].selected_tim < 0) continue; + st = &rf->bb[b].tim[rf->bb[b].selected_tim]; + sprintf (tmp, "%s.bin.bb", name); + preunroll_bb (&tmp[0], f, &tt, b, st->preroll, st->unroll); + if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_PREUNROLL"); + } + for (b = 0; b < f->num_bb; b++) { + cuc_timings *st; + if (rf->bb[b].selected_tim < 0) continue; + st = &rf->bb[b].tim[rf->bb[b].selected_tim]; + if (!st->nshared) continue; + assert (0); + //csm_gen (f, rf, st->nshared, st->shared); + } + add_latches (f); + if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_LATCHES"); + analyse_timings (f, &tt); + + sprintf (tmp, "%s%s", cut_filename, name); + output_verilog (f, tmp, name); + return f; +} + +/* Calculates required time, based on selected options */ +int calc_cycles (cuc_func *f) +{ + int b, ntime = f->timings.new_time; + for (b = 0; b < f->num_bb; b++) + if (f->bb[b].selected_tim >= 0) { + assert (f->bb[b].selected_tim < f->bb[b].ntim); + ntime += f->bb[b].tim[f->bb[b].selected_tim].new_time - f->timings.new_time; + } + return ntime; +} + +/* Calculates required size, based on selected options */ +double calc_size (cuc_func *f) +{ + int b; + double size = f->timings.size; + for (b = 0; b < f->num_bb; b++) + if (f->bb[b].selected_tim >= 0) { + assert (f->bb[b].selected_tim < f->bb[b].ntim); + size += f->bb[b].tim[f->bb[b].selected_tim].size - f->timings.size; + } + return size; +} + +/* Dumps specified function to file (hex) */ +unsigned long extract_function (char *out_fn, unsigned long start_addr) +{ + FILE *fo; + unsigned long a = start_addr; + int x = 0; + assert (fo = fopen (out_fn, "wt+")); + + do { + unsigned long d = eval_direct32 (a, 0, 0); + int index = insn_decode (d); + assert (index >= 0); + if (x) x++; + if (strcmp (insn_name (index), "l.jr") == 0) x = 1; + a += 4; + fprintf (fo, "%08lx\n", d); + } while (x < 2); + + fclose (fo); + return a - 4; +} + +static cuc_func *func[MAX_FUNCS]; +static int func_v[MAX_FUNCS]; + +/* Detects function dependencies and removes */ +static void set_func_deps () +{ + int f, b, i, j; +restart: + for (f = 0; f < prof_nfuncs - 1; f++) if (func[f]) { + int fused[MAX_FUNCS] = {0}; + int c = 0; + for (b = 0; b < func[f]->num_bb; b++) + for (i = 0; i < func[f]->bb[b].ninsn; i++) { + cuc_insn *ii = &func[f]->bb[b].insn[i]; + if (ii->index == II_CALL) { + assert (ii->opt[0] == OPT_CONST); + for (j = 0; j < prof_nfuncs - 1; j++) + if (func[j] && func[j]->start_addr == ii->op[0]) break; + if (j >= prof_nfuncs - 1) { + log ("%s is calling unknown function, address %08lx\n", + prof_func[f].name, ii->op[0]); + debug (1, "%s is calling unknown function, address %08lx\n", + prof_func[f].name, ii->op[0]); + free_func (func[f]); + func[f] = NULL; + goto restart; + } else if (f == j) { + log ("%s is recursive, ignoring\n", prof_func[f].name); + debug (1, "%s is recursive, ignoring\n", prof_func[f].name); + free_func (func[f]); + func[f] = NULL; + goto restart; + } else fused[j]++; + } + } + for (i = 0; i < MAX_FUNCS; i++) if (fused[i]) c++; + if (func[f]->nfdeps) free (func[f]->fdeps); + func[f]->nfdeps = c; + func[f]->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * c); + for (i = 0, j = 0; i < MAX_FUNCS; i++) + if (fused[i]) func[f]->fdeps[j++] = func[i]; + } + + /* Detect loops */ + { + int change; + for (f = 0; f < MAX_FUNCS; f++) if (func[f]) func[f]->tmp = 0; + do { + change = 0; + for (f = 0; f < MAX_FUNCS; f++) if (func[f] && !func[f]->tmp) { + int o = 1; + for (i = 0; i < func[f]->nfdeps; i++) + if (!func[f]->fdeps[i]->tmp) {o = 0; break;} + if (o) { + func[f]->tmp = 1; + change = 1; + } + } + } while (change); + + change = 0; + for (f = 0; f < MAX_FUNCS; f++) if (func[f] && !func[f]->tmp) { + free_func (func[f]); + func[f] = NULL; + change = 1; + } + if (change) goto restart; + } +} + +void main_cuc (char *filename) +{ + int i, j; + char tmp1[256]; + char filename_cut[256]; +#if 0 /* Select prefix, based on binary program name */ + for (i = 0; i < sizeof (filename_cut); i++) { + if (isalpha(filename[i])) filename_cut[i] = filename[i]; + else { + filename_cut[i] = '\0'; + break; + } + } +#else + strcpy (filename_cut, "cu"); +#endif + + PRINTF ("Entering OpenRISC Custom Unit Compiler command prompt\n"); + PRINTF ("Using profile file \"%s\" and memory profile file \"%s\".\n", config.sim.prof_fn, config.sim.mprof_fn); + sprintf (tmp1, "%s.log", filename_cut); + PRINTF ("Analyzing. (log file \"%s\").\n", tmp1); + assert (flog = fopen (tmp1, "wt+")); + + /* Loads in the specified timings table */ + PRINTF ("Using timings from \"%s\" at %s\n",config.cuc.timings_fn, + generate_time_pretty (tmp1, config.sim.clkcycle_ps)); + load_timing_table (config.cuc.timings_fn); + runtime.cuc.cycle_duration = 1000. * config.sim.clkcycle_ps; + PRINTF ("Multicycle logic %s, bursts %s, %s memory order.\n", + config.cuc.no_multicycle ? "OFF" : "ON", config.cuc.enable_bursts ? "ON" : "OFF", + config.cuc.memory_order == MO_NONE ? "no" : config.cuc.memory_order == MO_WEAK ? "weak" : + config.cuc.memory_order == MO_STRONG ? "strong" : "exact"); + + prof_set (1, 0); + assert (prof_acquire (config.sim.prof_fn) == 0); + + if (config.cuc.calling_convention) + PRINTF ("Assuming OpenRISC standard calling convention.\n"); + + /* Try all functions except "total" */ + for (i = 0; i < prof_nfuncs - 1; i++) { + long orig_time; + unsigned long start_addr, end_addr; + orig_time = prof_func[i].cum_cycles; + start_addr = prof_func[i].addr; + + /* Extract the function from the binary */ + sprintf (tmp1, "%s.bin", prof_func[i].name); + end_addr = extract_function (tmp1, start_addr); + + log ("Testing function %s (%08lx - %08lx)\n", prof_func[i].name, start_addr, + end_addr); + PRINTF ("Testing function %s (%08lx - %08lx)\n", prof_func[i].name, + start_addr, end_addr); + func[i] = analyse_function (prof_func[i].name, orig_time, start_addr, + end_addr, config.cuc.memory_order, prof_func[i].calls); + func_v[i] = 0; + } + set_func_deps (); + + while (1) { + char *s; +wait_command: + PRINTF ("(cuc) "); + fflush (stdout); +wait_command_empty: + s = fgets(tmp1, sizeof tmp1, stdin); + usleep (100); + if (!s) goto wait_command_empty; + for (s = tmp1; *s != '\0' && *s != '\n' && *s != '\r'; s++); + *s = '\0'; + + /* quit command */ + if (strcmp (tmp1, "q") == 0 || strcmp (tmp1, "quit") == 0) { + /* Delete temporary files */ + for (i = 0; i < prof_nfuncs - 1; i++) { + sprintf (tmp1, "%s.bin", prof_func[i].name); + log ("Deleting temporary file %s %s\n", tmp1, remove (tmp1) ? "FAILED" : "OK"); + sprintf (tmp1, "%s.bin.bb", prof_func[i].name); + log ("Deleting temporary file %s %s\n", tmp1, remove (tmp1) ? "FAILED" : "OK"); + } + break; + + /* profile command */ + } else if (strcmp (tmp1, "p") == 0 || strcmp (tmp1, "profile") == 0) { + int ntime = 0; + int size = 0; + PRINTF ("-----------------------------------------------------------------------------\n"); + PRINTF ("|function name |calls|avg cycles |old%%| max. f. | impr. f.| options |\n"); + PRINTF ("|--------------------+-----+------------+----+----------|---------+---------|\n"); + for (j = 0; j < prof_nfuncs; j++) { + int bestcyc = 0, besti = 0; + char tmp[100]; + for (i = 0; i < prof_nfuncs; i++) + if (prof_func[i].cum_cycles > bestcyc) { + bestcyc = prof_func[i].cum_cycles; + besti = i; + } + i = besti; + PRINTF ("|%-20s|%5li|%12.1f|%3.0f%%| ", + strstrip (tmp, prof_func[i].name, 20), prof_func[i].calls, + ((double)prof_func[i].cum_cycles / prof_func[i].calls), + (100. * prof_func[i].cum_cycles / prof_cycles)); + if (func[i]) { + double f = 1.0; + if (func_v[i]) { + int nt = calc_cycles (func[i]); + int s = calc_size (func[i]); + f = 1. * func[i]->orig_time / nt; + ntime += nt; + size += s; + } else ntime += prof_func[i].cum_cycles; + PRINTF ("%8.1f |%8.1f | %-8s|\n", 1.f * prof_func[i].cum_cycles + / func[i]->timings.new_time, f, format_func_options (tmp, func[i])); + } else { + PRINTF (" N/A | N/A | N/A |\n"); + ntime += prof_func[i].cum_cycles; + } + prof_func[i].cum_cycles = -prof_func[i].cum_cycles; + } + for (i = 0; i < prof_nfuncs; i++) + prof_func[i].cum_cycles = -prof_func[i].cum_cycles; + PRINTF ("-----------------------------------------------------------------------------\n"); + PRINTF ("Total %i cycles (was %i), total added gates = %i. Speed factor %.1f\n", + ntime, prof_cycles, size, 1. * prof_cycles / ntime); + + /* debug command */ + } else if (strncmp (tmp1, "d", 1) == 0 || strncmp (tmp1, "debug", 5) == 0) { + sscanf (tmp1, "%*s %i", &cuc_debug); + if (cuc_debug < 0) cuc_debug = 0; + if (cuc_debug > 9) cuc_debug = 9; + + /* generate command */ + } else if (strcmp (tmp1, "g") == 0 || strcmp (tmp1, "generate") == 0) { + /* check for function dependencies */ + for (i = 0; i < prof_nfuncs; i++) + if (func[i]) func[i]->tmp = func_v[i]; + for (i = 0; i < prof_nfuncs; i++) if (func[i]) + for (j = 0; j < func[i]->nfdeps; j++) + if (!func[i]->fdeps[j] || !func[i]->fdeps[j]->tmp) { + PRINTF ("Function %s must be selected for translation (required by %s)\n", + prof_func[j].name, prof_func[i].name); + goto wait_command; + } + for (i = 0; i < prof_nfuncs; i++) + if (func[i] && func_v[i]) generate_function (func[i], prof_func[i].name, filename_cut); + generate_main (prof_nfuncs, func, filename_cut); + + /* list command */ + } else if (strcmp (tmp1, "l") == 0 || strcmp (tmp1, "list") == 0) { + /* check for function dependencies */ + for (i = 0; i < prof_nfuncs; i++) + if (func_v[i]) { + PRINTF ("%s\n", prof_func[i].name); + } + + /* selectall command */ + } else if (strcmp (tmp1, "sa") == 0 || strcmp (tmp1, "selectall") == 0) { + int f; + for (f = 0; f < prof_nfuncs; f++) if (func[f]) { + func_v[f] = 1; + PRINTF ("Function %s selected for translation.\n", prof_func[f].name); + } + + /* select command */ + } else if (strncmp (tmp1, "s", 1) == 0 || strncmp (tmp1, "select", 6) == 0) { + char tmp[50], ch; + int p, o, b, f; + p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch); + if (p < 1) PRINTF ("Invalid parameters.\n"); + else { + /* Check if we have valid option */ + for (f = 0; f < prof_nfuncs; f++) + if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break; + if (f < prof_nfuncs) { + if (p == 1) { + if (func[f]) { + func_v[f] = 1; + PRINTF ("Function %s selected for translation.\n", prof_func[f].name); + } else PRINTF ("Function %s not suitable for translation.\n", prof_func[f].name); + } else { + if (!func_v[f]) + PRINTF ("Function %s not yet selected for translation.\n", prof_func[f].name); + if (p < 3) goto invalid_option; + for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++); + if (!option_char[o]) goto invalid_option; + if (b < 0 || b >= func[f]->num_bb) goto invalid_option; + if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option; + + /* select an option */ + func[f]->bb[b].selected_tim = o; + if (func[f]->bb[b].tim[o].nshared) { + PRINTF ("Option has shared instructions: "); + print_shared (func[f], func[f]->bb[b].tim[o].shared, func[f]->bb[b].tim[o].nshared); + PRINTF ("\n"); + } + goto wait_command; +invalid_option: + PRINTF ("Invalid option.\n"); + } + } else PRINTF ("Invalid function.\n"); + } + + /* unselect command */ + } else if (strncmp (tmp1, "u", 1) == 0 || strncmp (tmp1, "unselect", 8) == 0) { + char tmp[50], ch; + int p, o, b, f; + p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch); + if (p < 1) PRINTF ("Invalid parameters.\n"); + else { + /* Check if we have valid option */ + for (f = 0; f < prof_nfuncs; f++) + if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break; + if (f < prof_nfuncs) { + if (p == 1) { + if (func[f]) { + func_v[f] = 0; + PRINTF ("Function %s unselected for translation.\n", prof_func[f].name); + } else PRINTF ("Function %s not suitable for translation.\n", prof_func[f].name); + } else { + if (p < 3) goto invalid_option; + for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++); + if (!option_char[o]) goto invalid_option; + if (b < 0 || b >= func[f]->num_bb) goto invalid_option; + if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option; + + /* select an option */ + func[f]->bb[b].selected_tim = -1; + } + } else PRINTF ("Invalid function.\n"); + } + + /* options command */ + } else if (strcmp (tmp1, "o") == 0 || strcmp (tmp1, "options") == 0) { + int any = 0; + PRINTF ("Available options:\n"); + for (i = 0; i < prof_nfuncs; i++) + if (func[i]) { + options_cmd (i, func[i]); + any = 1; + } + if (any) PRINTF ("-----------------------------------------------------------------------------\n"); + else PRINTF ("Sorry. No available options.\n"); + + /* Ignore empty string */ + } else if (strcmp (tmp1, "") == 0) { + + /* help command */ + } else { + if (strcmp (tmp1, "h") != 0 && strcmp (tmp1, "help") != 0) + PRINTF ("Unknown command.\n"); + PRINTF ("OpenRISC Custom Unit Compiler command prompt\n"); + PRINTF ("Available commands:\n"); + PRINTF (" h | help displays this help\n"); + PRINTF (" q | quit returns to or1ksim prompt\n"); + PRINTF (" p | profile displays function profiling\n"); + PRINTF (" d | debug # sets debug level (0-9)\n"); + PRINTF (" o | options displays available options\n"); + PRINTF (" s | select func [option] selects an option/function\n"); + PRINTF (" u | unselect func [option] unselects an option/function\n"); + PRINTF (" g | generate generates verilog file\n"); + PRINTF (" l | list displays selected functions\n"); + } + } + + /* Dispose memory */ + for (i = 0; i < prof_nfuncs -1; i++) + if (func[i]) free_func (func[i]); + + fclose (flog); +} + +/*----------------------------------------------------[ CUC Configuration ]---*/ +void cuc_calling_convention(union param_val val, void *dat) +{ + config.cuc.calling_convention = val.int_val; +} + +void cuc_enable_bursts(union param_val val, void *dat) +{ + config.cuc.enable_bursts = val.int_val; +} + +void cuc_no_multicycle(union param_val val, void *dat) +{ + config.cuc.no_multicycle = val.int_val; +} + +void cuc_memory_order(union param_val val, void *dat) +{ + if (strcmp (val.str_val, "none") == 0) + config.cuc.memory_order = MO_NONE; + else if (strcmp (val.str_val, "weak") == 0) + config.cuc.memory_order = MO_WEAK; + else if (strcmp (val.str_val, "strong") == 0) + config.cuc.memory_order = MO_STRONG; + else if (strcmp (val.str_val, "exact") == 0) { + config.cuc.memory_order = MO_EXACT; + } else { + char tmp[200]; + sprintf (tmp, "invalid memory order '%s'.\n", val.str_val); + CONFIG_ERROR(tmp); + } +} + +void cuc_timings_fn(union param_val val, void *dat) +{ + strcpy(config.cuc.timings_fn, val.str_val); +} + +void reg_cuc_sec(void) +{ + struct config_section *sec = reg_config_sec("cuc", NULL, NULL); + + reg_config_param(sec, "calling_convention", paramt_int, cuc_calling_convention); + reg_config_param(sec, "enable_bursts", paramt_int, cuc_enable_bursts); + reg_config_param(sec, "no_multicycle", paramt_int, cuc_no_multicycle); + reg_config_param(sec, "memory_order", paramt_word, cuc_memory_order); + reg_config_param(sec, "timings_fn", paramt_str, cuc_timings_fn); +}
cuc.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: adv.c =================================================================== --- adv.c (nonexistent) +++ adv.c (revision 1765) @@ -0,0 +1,298 @@ +/* adv.c -- OpenRISC Custom Unit Compiler, Advanced Optimizations + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "sim-config.h" +#include "abstract.h" +#include "cuc.h" +#include "insn.h" +#include "support/profile.h" +#include "misc.h" + +/* Marks successor of b with mask m */ +static void mark_successors (cuc_func *f, int b, int m, int stopb) +{ + if (b < 0 || b == BBID_END) return; + if (f->bb[b].tmp & m) return; + f->bb[b].tmp |= m; + /* mark stopb also; and stop searching -- we will gen new result in stopb */ + if (b == stopb) return; + mark_successors (f, f->bb[b].next[0], m, stopb); + mark_successors (f, f->bb[b].next[1], m, stopb); +} + +static unsigned long mask (unsigned long c) +{ + if (c) return (1 << (log2_int (c) + 1)) - 1; + else return 0; +} + +/* Calculates facts, that are determined by conditionals */ +void insert_conditional_facts (cuc_func *f) +{ + int b, j; + int b1, i1, j1; + cuc_insn n[2]; + for (b = 0; b < f->num_bb; b++) if (f->bb[b].ninsn > 0) { + cuc_insn *ii = &f->bb[b].insn[f->bb[b].ninsn - 1]; + /* We have following situation + x <= ... + sfxx f, x, CONST + bf ..., f */ + if (ii->type & IT_BRANCH && ii->opt[1] & OPT_REF && REF_BB(ii->op[1]) == b + && f->INSN(ii->op[1]).opt[2] & OPT_CONST) { + int ok = 0; + unsigned long c = f->INSN(ii->op[1]).op[2]; + int rref = f->INSN(ii->op[1]).op[1]; + unsigned long r; + if (!(f->INSN(ii->op[1]).opt[1] & OPT_REF)) continue; + r = f->INSN(rref).op[0]; + + /* Assignment must be in same basic block */ + if (REF_BB(rref) != b) continue; + + for (j = 0; j < 2; j++) { + change_insn_type (&n[j], II_ADD); + n[j].type = 0; + n[j].dep = NULL; + n[j].op[0] = r; n[j].opt[0] = OPT_REGISTER | OPT_DEST; + n[j].op[1] = 0; n[j].opt[1] = OPT_CONST; + n[j].op[2] = rref; n[j].opt[2] = OPT_REF; + n[j].opt[3] = OPT_NONE; + sprintf (n[j].disasm, "conditional %s fact", j ? "false" : "true"); + } + + /* First get the conditional and two instruction to place after the current BB */ + switch (f->INSN(ii->op[1]).index) { + case II_SFEQ: + change_insn_type (&n[0], II_ADD); + n[0].op[0] = r; n[0].opt[0] = OPT_REGISTER | OPT_DEST; + n[0].op[1] = 0; n[0].opt[1] = OPT_CONST; + n[0].op[2] = c; n[0].opt[2] = OPT_CONST; + ok = 1; + break; + case II_SFNE: + change_insn_type (&n[1], II_ADD); + n[1].op[0] = r; n[1].opt[0] = OPT_REGISTER | OPT_DEST; + n[1].op[1] = 0; n[1].opt[1] = OPT_CONST; + n[1].op[2] = c; n[1].opt[2] = OPT_CONST; + ok = 2; + break; + case II_SFLT: + change_insn_type (&n[0], II_AND); + n[0].op[0] = r; n[0].opt[0] = OPT_REGISTER | OPT_DEST; + n[0].op[1] = rref; n[0].opt[1] = OPT_REF; + n[0].op[2] = mask (c); n[0].opt[2] = OPT_CONST; + ok = 1; + break; + case II_SFGT: + change_insn_type (&n[1], II_ADD); + n[1].op[0] = r; n[1].opt[0] = OPT_REGISTER | OPT_DEST; + n[1].op[1] = rref; n[1].opt[1] = OPT_REF; + n[1].op[2] = mask (c + 1); n[1].opt[2] = OPT_CONST; + ok = 2; + break; + case II_SFLE: + change_insn_type (&n[0], II_AND); + n[0].op[0] = r; n[0].opt[0] = OPT_REGISTER | OPT_DEST; + n[0].op[1] = rref; n[0].opt[1] = OPT_REF; + n[0].op[2] = mask (c); n[0].opt[2] = OPT_CONST; + ok = 1; + break; + case II_SFGE: + change_insn_type (&n[1], II_ADD); + n[1].op[0] = r; n[1].opt[0] = OPT_REGISTER | OPT_DEST; + n[1].op[1] = rref; n[1].opt[1] = OPT_REF; + n[1].op[2] = mask (c + 1); n[1].opt[2] = OPT_CONST; + ok = 2; + break; + default: + ok = 0; + break; + } + + /* Now add two BBs at the end and relink */ + if (ok) { + int cnt = 0; + cucdebug (1, "%x rref %x cnt %i\n", b, rref, cnt); + fflush (stdout); + for (j = 0; j < 2; j++) { + int nb = f->num_bb++; + int sb; + assert (nb < MAX_BB); + f->bb[nb].type = 0; + f->bb[nb].first = -1; f->bb[nb].last = -1; + f->bb[nb].prev[0] = b; f->bb[nb].prev[1] = -1; + sb = f->bb[nb].next[0] = f->bb[b].next[j]; f->bb[nb].next[1] = -1; + assert (cnt >= 0); + cucdebug (2, "%x %x %x rref %x cnt %i\n", b, sb, nb, rref, cnt); + fflush (stdout); + assert (sb >= 0); + f->bb[b].next[j] = nb; + if (sb != BBID_END) { + if (f->bb[sb].prev[0] == b) f->bb[sb].prev[0] = nb; + else if (f->bb[sb].prev[1] == b) f->bb[sb].prev[1] = nb; + else assert (0); + } + f->bb[nb].insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (cnt + 1)); + assert (f->bb[nb].insn); + f->bb[nb].insn[0] = n[j]; + f->bb[nb].ninsn = cnt + 1; + f->bb[nb].mdep = NULL; + f->bb[nb].nmemory = 0; + f->bb[nb].cnt = 0; + f->bb[nb].unrolled = 0; + f->bb[nb].ntim = 0; + f->bb[nb].selected_tim = -1; + } + for (b1 = 0; b1 < f->num_bb; b1++) f->bb[b1].tmp = 0; + + /* Find successor blocks and change links accordingly */ + mark_successors (f, f->num_bb - 2, 2, b); + mark_successors (f, f->num_bb - 1, 1, b); + for (b1 = 0; b1 < f->num_bb - 2; b1++) if (f->bb[b1].tmp == 1 || f->bb[b1].tmp == 2) { + int end; + if (REF_BB (rref) == b1) end = REF_I (rref) + 1; + else end = f->bb[b1].ninsn; + for (i1 = 0; i1 < end; i1++) + for (j1 = 0; j1 < MAX_OPERANDS; j1++) + if (f->bb[b1].insn[i1].opt[j1] & OPT_REF && f->bb[b1].insn[i1].op[j1] == rref) + f->bb[b1].insn[i1].op[j1] = REF (f->num_bb - f->bb[b1].tmp, 0); + } + if (cuc_debug >= 3) print_cuc_bb (f, "FACT"); + } + } + } +} + +static unsigned long max_op (cuc_func *f, int ref, int o) +{ + if (f->INSN(ref).opt[o] & OPT_REF) return f->INSN(f->INSN(ref).op[o]).max; + else if (f->INSN(ref).opt[o] & OPT_CONST) return f->INSN(ref).op[o]; + else if (f->INSN(ref).opt[o] & OPT_REGISTER) return 0xffffffff; + else assert (0); +} + +/* Returns maximum value, based on inputs */ +static unsigned long calc_max (cuc_func *f, int ref) +{ + cuc_insn *ii = &f->INSN(ref); + if (ii->type & IT_COND) return 1; + switch (ii->index) { + case II_ADD : return MIN ((unsigned long long) max_op (f, ref, 1) + + (unsigned long long)max_op (f, ref, 2), 0xffffffff); + case II_SUB : return 0xffffffff; + case II_AND : return MIN (max_op (f, ref, 1), max_op (f, ref, 2)); + case II_OR : return max_op (f, ref, 1) | max_op (f, ref, 2); + case II_XOR : return max_op (f, ref, 1) | max_op (f, ref, 2); + case II_MUL : return MIN ((unsigned long long) max_op (f, ref, 1) + * (unsigned long long)max_op (f, ref, 2), 0xffffffff); + case II_SLL : if (ii->opt[2] & OPT_CONST) return max_op (f, ref, 1) << ii->op[2]; + else return max_op (f, ref, 1); + case II_SRA : return max_op (f, ref, 1); + case II_SRL : if (ii->opt[2] & OPT_CONST) return max_op (f, ref, 1) >> ii->op[2]; + else return max_op (f, ref, 1); + case II_LB : return 0xff; + case II_LH : return 0xffff; + case II_LW : return 0xffffffff; + case II_SB : + case II_SH : + case II_SW : return 0; + case II_SFEQ: + case II_SFNE: + case II_SFLE: + case II_SFLT: + case II_SFGE: + case II_SFGT: return 1; + case II_BF : return 0; + case II_LRBB: return 1; + case II_CMOV: return MAX (max_op (f, ref, 1), max_op (f, ref, 2)); + case II_REG : return max_op (f, ref, 1); + case II_NOP : assert (0); + case II_CALL: assert (0); + default: assert (0); + } + return -1; +} + +/* Width optimization -- detect maximum values; + these values are actually estimates, since the problem + is to hard otherwise... + We calculate these maximums iteratively -- we are slowly + approaching final solution. This algorithm is surely finite, + but can be very slow; so we stop after some iterations; + normal loops should be in this range */ +void detect_max_values (cuc_func *f) +{ + int b, i; + int modified = 0; + int iteration = 0; + + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < f->bb[b].ninsn; i++) f->bb[b].insn[i].max = 0; + f->bb[b].tmp = 1; + } + + /* Repeat until something is changing */ + do { + modified = 0; + for (b = 0; b < f->num_bb; b++) { + if (f->bb[b].tmp) { + for (i = 0; i < f->bb[b].ninsn; i++) { + unsigned long m = calc_max (f, REF (b, i)); + if (m > f->bb[b].insn[i].max) { + f->bb[b].insn[i].max = m; + modified = 1; + } + } + } + } + if (iteration++ > CUC_WIDTH_ITERATIONS) break; + } while (modified); + + /* Something bad has happened; now we will assign 0xffffffff to all unsatisfied + instructions; this one is stoppable in O(n ^ 2) */ + if (iteration > CUC_WIDTH_ITERATIONS) { + do { + modified = 0; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) { + unsigned long m = calc_max (f, REF (b, i)); + if (m > f->bb[b].insn[i].max) { + f->bb[b].insn[i].max = 0xffffffff; + modified = 1; + } + } + } while (modified); + } + cucdebug (1, "detect_max_values %i iterations\n", iteration); +} + Index: load.c =================================================================== --- load.c (nonexistent) +++ load.c (revision 1765) @@ -0,0 +1,536 @@ +/* load.c -- OpenRISC Custom Unit Compiler, instruction loading and converting + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "abstract.h" +#include "sim-config.h" +#include "cuc.h" +#include "opcode/or32.h" +#include "insn.h" + +static const cuc_conv conv[] = { +{"l.add", II_ADD}, {"l.addi", II_ADD}, +{"l.movhi", II_OR}, +{"l.sub", II_SUB}, {"l.subi", II_SUB}, +{"l.and", II_AND}, {"l.andi", II_AND}, +{"l.xor", II_XOR}, {"l.xori", II_XOR}, +{"l.or", II_OR }, {"l.ori", II_OR}, +{"l.mul", II_MUL}, {"l.muli", II_MUL}, + +{"l.sra", II_SRA}, {"l.srai", II_SRA}, +{"l.srl", II_SRL}, {"l.srli", II_SRL}, +{"l.sll", II_SLL}, {"l.slli", II_SLL}, + +{"l.lbz",II_LB | II_MEM}, {"l.lbs", II_LB | II_MEM | II_SIGNED}, +{"l.lhz",II_LH | II_MEM}, {"l.lhs", II_LH | II_MEM | II_SIGNED}, +{"l.lwz",II_LW | II_MEM}, {"l.lws", II_LW | II_MEM | II_SIGNED}, +{"l.sb", II_SB | II_MEM}, {"l.sh", II_SH | II_MEM}, {"l.sw", II_SW | II_MEM}, +{"l.sfeq", II_SFEQ }, {"l.sfeqi", II_SFEQ}, +{"l.sfne", II_SFNE }, {"l.sfnei", II_SFNE}, +{"l.sflts", II_SFLT | II_SIGNED}, {"l.sfltis", II_SFLT | II_SIGNED}, +{"l.sfltu", II_SFLT}, {"l.sfltiu", II_SFLT}, +{"l.sfgts", II_SFGT | II_SIGNED}, {"l.sfgtis", II_SFGT | II_SIGNED}, +{"l.sfgtu", II_SFGT}, {"l.sfgtiu", II_SFGT}, +{"l.sfges", II_SFGE | II_SIGNED}, {"l.sfgeis", II_SFGE | II_SIGNED}, +{"l.sfgeu", II_SFGE}, {"l.sfgeiu", II_SFGE}, +{"l.sfles", II_SFLE | II_SIGNED}, {"l.sfleis", II_SFLE | II_SIGNED}, +{"l.sfleu", II_SFLE}, {"l.sfleiu", II_SFLE}, +{"l.j", II_BF }, +{"l.bf", II_BF }, +{"l.jal", II_CALL }, +{"l.nop", II_NOP } +}; + +/* Instructions from function */ +cuc_insn insn[MAX_INSNS]; +int num_insn; +int reloc[MAX_INSNS]; + +/* Prints out instructions */ +void print_cuc_insns (char *s, int verbose) +{ + PRINTF ("****************** %s ******************\n", s); + print_insns (0, insn, num_insn,verbose); + PRINTF ("\n\n"); +} + +void xchg_insn (int i, int j) +{ + cuc_insn t; + t = insn[i]; + insn[i] = insn[j]; + insn[j] = t; +} + +/* Negates conditional instruction */ +void negate_conditional (cuc_insn *ii) +{ + assert (ii->type & IT_COND); + + if (ii->index == II_SFEQ) change_insn_type (ii, II_SFNE); + else if (ii->index == II_SFNE) change_insn_type (ii, II_SFEQ); + else if (ii->index == II_SFLT) change_insn_type (ii, II_SFGE); + else if (ii->index == II_SFGT) change_insn_type (ii, II_SFLE); + else if (ii->index == II_SFLE) change_insn_type (ii, II_SFGT); + else if (ii->index == II_SFGE) change_insn_type (ii, II_SFLT); + else assert (0); +} + +/* Remove delay slots */ +void remove_dslots () +{ + int i; + int in_delay = 0; + for (i = 0; i < num_insn; i++) { + if (in_delay) insn[i].type |= IT_INDELAY; + in_delay = 0; + if (insn[i].type & IT_BRANCH) in_delay = 1; + if (insn[i].type & IT_INDELAY) { + cuc_insn *ii; + cuc_insn *bi; + assert (i >= 2); + ii = &insn[i - 2]; + bi = &insn[i - 1]; + /* delay slot should not be a branch target! */ + assert ((insn[i].type & IT_BBSTART) == 0); + assert ((bi->type & IT_INDELAY) == 0); + insn[i].type &= ~IT_INDELAY; /* no more in delay slot */ + + /* Get the value we need before the actual jump */ + if (bi->opt[1] & OPT_REGISTER && bi->op[1] >= 0) { + int r = bi->op[1]; + assert (ii->index == II_NOP); + change_insn_type (ii, II_ADD); + ii->type = IT_COND; + ii->dep = NULL; + ii->op[0] = r; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = r; ii->opt[1] = OPT_REGISTER; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + bi->op[1] = i - 2; bi->opt[1] = OPT_REF; + } + xchg_insn (i, i - 1); + } + } + assert (in_delay == 0); +} + +/* Convert local variables (uses stack frame -- r1) to internal values */ +void detect_locals () +{ + int stack[CUC_MAX_STACK]; + int i, can_remove_stack = 1; + int real_stack_size = 0; + + for (i = 0; i < CUC_MAX_STACK; i++) stack[i] = -1; + + for (i = 0; i < num_insn; i++) { + /* sw off (r1),rx */ + if (insn[i].index == II_SW + && (insn[i].opt[0] & OPT_CONST) + && insn[i].op[1] == 1 && (insn[i].opt[1] & OPT_REGISTER)) { + + if (insn[i].op[0] < CUC_MAX_STACK/* && insn[i].op[1] >= 4*/) { /* Convert to normal move */ + stack[insn[i].op[0]] = i; + insn[i].type &= IT_INDELAY | IT_BBSTART; + change_insn_type (&insn[i], II_ADD); + insn[i].op[0] = -1; insn[i].opt[0] = OPT_REGISTER | OPT_DEST; + insn[i].op[1] = insn[i].op[2]; insn[i].opt[1] = insn[i].opt[2]; + insn[i].op[2] = 0; insn[i].opt[2] = OPT_CONST; + } else can_remove_stack = 0; + /* lw rx,off (r1) */ + } else if (insn[i].index == II_LW + && (insn[i].opt[1] & OPT_CONST) + && insn[i].op[2] == 1 && (insn[i].opt[2] & OPT_REGISTER)) { + + if (insn[i].op[1] < CUC_MAX_STACK && stack[insn[i].op[1]] >= 0) { /* Convert to normal move */ + insn[i].type &= IT_INDELAY | IT_BBSTART; + change_insn_type (&insn[i], II_ADD); + insn[i].op[1] = stack[insn[i].op[1]]; insn[i].opt[1] = OPT_REF; + insn[i].op[2] = 0; insn[i].opt[2] = OPT_CONST; + } else can_remove_stack = 0; + /* Check for defined stack size */ + } else if (insn[i].index == II_ADD && !real_stack_size + && (insn[i].opt[0] & OPT_REGISTER) && insn[i].op[0] == 1 + && (insn[i].opt[1] & OPT_REGISTER) && insn[i].op[1] == 1 + && (insn[i].opt[2] & OPT_CONST)) { + real_stack_size = -insn[i].op[2]; + } + } + //assert (can_remove_stack); /* TODO */ +} + +/* Disassemble one instruction from insn index and generate parameters */ +const char *build_insn (unsigned long data, cuc_insn *insn) +{ + const char *name; + char *s; + extern char *disassembled; + int index = insn_decode (data); + struct or32_opcode const *opcode; + int i, argc = 0; + + insn->insn = data; + insn->index = -1; + insn->type = 0; + name = insn_name (index); + insn->index = index; + disassemble_index (data, index); + strcpy (insn->disasm, disassembled); + insn->dep = NULL; + for (i = 0; i < MAX_OPERANDS; i++) insn->opt[i] = OPT_NONE; + + if (index < 0) { + fprintf (stderr, "Invalid opcode 0x%08lx!\n", data); + exit (1); + } + opcode = &or32_opcodes[index]; + + for (s = opcode->args; *s != '\0'; ++s) { + switch (*s) { + case '\0': return name; + case 'r': + insn->opt[argc] = OPT_REGISTER | (argc ? 0 : OPT_DEST); + insn->op[argc++] = or32_extract(*++s, opcode->encoding, data); + break; + + default: + if (strchr (opcode->encoding, *s)) { + unsigned long imm = or32_extract (*s, opcode->encoding, data); + imm = extend_imm(imm, *s); + insn->opt[argc] = OPT_CONST; + insn->op[argc++] = imm; + } + } + } + return name; +} + +/* inserts nop before branch */ +void expand_branch () +{ + int i, j, num_bra = 0, d; + for (i = 0; i < num_insn; i++) if (insn[i].type & IT_BRANCH) num_bra++; + + d = num_insn + 2 * num_bra; + assert (d < MAX_INSNS); + + /* Add nop before branch */ + for (i = num_insn - 1; i >= 0; i--) if (insn[i].type & IT_BRANCH) { + insn[--d] = insn[i]; // for delay slot (later) + if (insn[d].opt[1] & OPT_REGISTER) { + assert (insn[d].op[1] == FLAG_REG); + insn[d].op[1] = i; insn[d].opt[1] = OPT_REF; + } + insn[--d] = insn[i]; // for branch + change_insn_type (&insn[d], II_NOP); + insn[--d] = insn[i]; // save flag & negation of conditional, if required + change_insn_type (&insn[d], II_CMOV); + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; + insn[d].op[1] = insn[d].type & IT_FLAG1 ? 0 : 1; insn[d].opt[1] = OPT_CONST; + insn[d].op[2] = insn[d].type & IT_FLAG1 ? 1 : 0; insn[d].opt[2] = OPT_CONST; + insn[d].op[3] = FLAG_REG; insn[d].opt[3] = OPT_REGISTER; + insn[d].type = IT_COND; + if (insn[d].type) + reloc[i] = d; + } else { + insn[--d] = insn[i]; + reloc[i] = d; + } + num_insn += 2 * num_bra; + for (i = 0; i < num_insn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP) + insn[i].op[j] = reloc[insn[i].op[j]]; +} + +/* expands immediate memory instructions to two */ +void expand_memory () +{ + int i, j, num_mem = 0, d; + for (i = 0; i < num_insn; i++) if (insn[i].type & IT_MEMORY) num_mem++; + + d = num_insn + num_mem; + assert (d < MAX_INSNS); + + /* Split memory commands */ + for (i = num_insn - 1; i >= 0; i--) if (insn[i].type & IT_MEMORY) { + insn[--d] = insn[i]; + insn[--d] = insn[i]; + reloc[i] = d; + switch (insn[d].index) { + case II_SW: + case II_SH: + case II_SB: + insn[d + 1].op[1] = d; insn[d + 1].opt[1] = OPT_REF; /* sw rx,(t($-1)) */ + insn[d + 1].op[0] = insn[i].op[2]; insn[d + 1].opt[0] = insn[d + 1].opt[2]; + insn[d + 1].opt[2] = OPT_NONE; + insn[d + 1].type &= ~IT_BBSTART; + insn[d].op[2] = insn[d].op[0]; insn[d].opt[2] = insn[d].opt[0]; + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; /* add rd, ra, rb */ + insn[d].opt[3] = OPT_NONE; + insn[d].type &= IT_INDELAY | IT_BBSTART; + insn[d].type |= IT_MEMADD; + change_insn_type (&insn[d], II_ADD); + break; + case II_LW: + case II_LH: + case II_LB: + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; /* add rd, ra, rb */ + insn[d].type &= IT_INDELAY | IT_BBSTART; + insn[d].type |= IT_MEMADD; + change_insn_type (&insn[d], II_ADD); + insn[d + 1].op[1] = d; insn[d + 1].opt[1] = OPT_REF; /* lw (t($-1)),rx */ + insn[d + 1].opt[2] = OPT_NONE; + insn[d + 1].opt[3] = OPT_NONE; + insn[d + 1].type &= ~IT_BBSTART; + break; + default: fprintf (stderr, "%4i, %4i: %s\n", i, d, cuc_insn_name (&insn[d])); + assert (0); + } + } else { + insn[--d] = insn[i]; + reloc[i] = d; + } + num_insn += num_mem; + for (i = 0; i < num_insn; i++) if (!(insn[i].type & IT_MEMORY)) + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP) + insn[i].op[j] = reloc[insn[i].op[j]]; +} + +/* expands signed comparisons to three instructions */ +void expand_signed () +{ + int i, j, num_sig = 0, d; + for (i = 0; i < num_insn; i++) + if (insn[i].type & IT_SIGNED && !(insn[i].type & IT_MEMORY)) num_sig++; + + d = num_insn + num_sig * 2; + assert (d < MAX_INSNS); + + /* Split signed instructions */ + for (i = num_insn - 1; i >= 0; i--) + /* We will expand signed memory later */ + if (insn[i].type & IT_SIGNED && !(insn[i].type & IT_MEMORY)) { + insn[--d] = insn[i]; + insn[d].op[1] = d - 2; insn[d].opt[1] = OPT_REF; + insn[d].op[2] = d - 1; insn[d].opt[2] = OPT_REF; + + insn[--d] = insn[i]; + change_insn_type (&insn[d], II_ADD); + insn[d].type = 0; + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; + insn[d].op[1] = insn[d].op[2]; insn[d].opt[1] = insn[d].opt[2]; + insn[d].op[2] = 0x80000000; insn[d].opt[2] = OPT_CONST; + insn[d].opt[3] = OPT_NONE; + + insn[--d] = insn[i]; + change_insn_type (&insn[d], II_ADD); + insn[d].type = 0; + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; + insn[d].op[1] = insn[d].op[1]; insn[d].opt[1] = insn[d].opt[1]; + insn[d].op[2] = 0x80000000; insn[d].opt[2] = OPT_CONST; + insn[d].opt[3] = OPT_NONE; + + reloc[i] = d; + } else { + insn[--d] = insn[i]; + reloc[i] = d; + } + num_insn += num_sig * 2; + for (i = 0; i < num_insn; i++) if (insn[i].type & IT_MEMORY || !(insn[i].type & IT_SIGNED)) { + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP) + insn[i].op[j] = reloc[insn[i].op[j]]; + } else insn[i].type &= ~IT_SIGNED; +} + +/* expands calls to 7 instructions */ +void expand_calls () +{ + int i, j, num_call = 0, d; + for (i = 0; i < num_insn; i++) + if (insn[i].index == II_CALL) num_call++; + + d = num_insn + num_call * 6; /* 6 parameters */ + assert (d < MAX_INSNS); + + /* Split call instructions */ + for (i = num_insn - 1; i >= 0; i--) + /* We will expand signed memory later */ + if (insn[i].index == II_CALL) { + insn[--d] = insn[i]; + insn[d].op[0] = insn[d].op[1]; insn[d].opt[0] = OPT_CONST; + insn[d].opt[1] = OPT_NONE; + insn[d].type |= IT_VOLATILE; + + for (j = 0; j < 6; j++) { + insn[--d] = insn[i]; + change_insn_type (&insn[d], II_ADD); + insn[d].type = IT_VOLATILE; + insn[d].op[0] = 3 + j; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; + insn[d].op[1] = 3 + j; insn[d].opt[1] = OPT_REGISTER; + insn[d].op[2] = 0x80000000; insn[d].opt[2] = OPT_CONST; + insn[d].opt[3] = OPT_NONE; + } + + reloc[i] = d; + } else { + insn[--d] = insn[i]; + reloc[i] = d; + } + num_insn += num_call * 6; + for (i = 0; i < num_insn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP) + insn[i].op[j] = reloc[insn[i].op[j]]; +} + +/* Loads function from file into global array insn. + Function returns nonzero if function cannot be converted. */ +int cuc_load (char *in_fn) +{ + int i, j; + FILE *fi; + int func_return = 0; + num_insn = 0; + + log ("Loading filename %s\n", in_fn); + if ((fi = fopen (in_fn, "rt")) == NULL) { + fprintf (stderr, "Cannot open '%s'\n", in_fn); + exit (1); + } + /* Read in the function and decode the instructions */ + for (i = 0;; i++) { + unsigned long data; + const char *name; + + if (fscanf (fi, "%08lx\n", &data) != 1) break; + + /* build params */ + name = build_insn (data, &insn[i]); + if (func_return) func_return++; + //PRINTF ("%s\n", name); + + if (or32_opcodes[insn[i].index].flags & OR32_IF_DELAY) { + int f; + if (strcmp (name, "l.bnf") == 0) f = 1; + else if (strcmp (name, "l.bf") == 0) f = 0; + else if (strcmp (name, "l.j") == 0) { + f = -1; + } else if (strcmp (name, "l.jr") == 0 && func_return == 0) { + func_return = 1; + change_insn_type (&insn[i], II_NOP); + continue; + } else { + cucdebug (1, "Instruction #%i: \"%s\" not supported.\n", i, name); + log ("Instruction #%i: \"%s\" not supported.\n", i, name); + return 1; + } + if (f < 0) { /* l.j */ + /* repair params */ + change_insn_type (&insn[i], II_BF); + insn[i].op[0] = i + insn[i].op[0]; insn[i].opt[0] = OPT_JUMP; + insn[i].op[1] = 1; insn[i].opt[1] = OPT_CONST; + insn[i].type |= IT_BRANCH | IT_VOLATILE; + } else { + change_insn_type (&insn[i], II_BF); + insn[i].op[0] = i + insn[i].op[0]; insn[i].opt[0] = OPT_JUMP; + insn[i].op[1] = FLAG_REG; insn[i].opt[1] = OPT_REGISTER; + insn[i].type |= IT_BRANCH | IT_VOLATILE; + if (f) insn[i].type |= IT_FLAG1; + } + } else { + insn[i].index = -1; + for (j = 0; j < sizeof (conv) / sizeof (cuc_conv); j++) + if (strcmp (conv[j].from, name) == 0) { + if (conv[j].to & II_SIGNED) insn[i].type |= IT_SIGNED; + if (conv[j].to & II_MEM) insn[i].type |= IT_MEMORY | IT_VOLATILE; + change_insn_type (&insn[i], conv[j].to & II_MASK); + break; + } + if (strcmp (name, "l.movhi") == 0) { + insn[i].op[1] <<= 16; + insn[i].op[2] = 0; + insn[i].opt[2] = OPT_CONST; + } + if (insn[i].index == II_SFEQ || insn[i].index == II_SFNE + || insn[i].index == II_SFLE || insn[i].index == II_SFGT + || insn[i].index == II_SFGE || insn[i].index == II_SFLT) { + /* repair params */ + insn[i].op[2] = insn[i].op[1]; insn[i].opt[2] = insn[i].opt[1] & ~OPT_DEST; + insn[i].op[1] = insn[i].op[0]; insn[i].opt[1] = insn[i].opt[0] & ~OPT_DEST; + insn[i].op[0] = FLAG_REG; insn[i].opt[0] = OPT_DEST | OPT_REGISTER; + insn[i].opt[3] = OPT_NONE; + insn[i].type |= IT_COND; + } + if (insn[i].index < 0 || insn[i].index == II_NOP && insn[i].op[0] != 0) { + cucdebug (1, "Instruction #%i: \"%s\" not supported (2).\n", i, name); + log ("Instruction #%i: \"%s\" not supported (2).\n", i, name); + return 1; + } + } + } + num_insn = i; + fclose (fi); + if (func_return != 2) { + cucdebug (1, "Unsupported function structure.\n"); + log ("Unsupported function structure.\n"); + return 1; + } + + log ("Number of instructions loaded = %i\n", num_insn); + if (cuc_debug >= 3) print_cuc_insns ("INITIAL", 1); + + log ("Converting.\n"); + expand_branch (); + if (cuc_debug >= 6) print_cuc_insns ("AFTER_EXP_BRANCH", 0); + + remove_dslots (); + if (cuc_debug >= 6) print_cuc_insns ("NO_DELAY_SLOTS", 0); + + if (config.cuc.calling_convention) { + detect_locals (); + if (cuc_debug >= 7) print_cuc_insns ("AFTER_LOCALS", 0); + } + expand_memory (); + if (cuc_debug >= 3) print_cuc_insns ("AFTER_EXP_MEM", 0); + + expand_signed (); + if (cuc_debug >= 3) print_cuc_insns ("AFTER_EXP_SIG", 0); + + expand_calls (); + if (cuc_debug >= 3) print_cuc_insns ("AFTER_EXP_CALLS", 0); + + return 0; +}
load.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: memory.c =================================================================== --- memory.c (nonexistent) +++ memory.c (revision 1765) @@ -0,0 +1,542 @@ +/* memory.c -- OpenRISC Custom Unit Compiler, memory optimization and scheduling + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "abstract.h" +#include "sim-config.h" +#include "cuc.h" +#include "insn.h" + + +/* Cleans memory & data dependencies */ +void clean_deps (cuc_func *f) +{ + int b, i; + dep_list *t; + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < f->bb[b].ninsn; i++) { + t = f->bb[b].insn[i].dep; + while (t) { + dep_list *tmp = t; + t = t->next; + free (tmp); + } + f->bb[b].insn[i].dep = NULL; + } + + t = f->bb[b].mdep; + while (t) { + dep_list *tmp = t; + t = t->next; + free (tmp); + } + f->bb[b].mdep = NULL; + } + + f->nmsched = 0; +} + +/* Checks for memory conflicts between two instructions; returns 1 if detected + 0 - exact; 1 - strong; 2 - weak; 3 - none */ +static int check_memory_conflict (cuc_func *f, cuc_insn *a, cuc_insn *b, int otype) +{ + switch (otype) { + case MO_EXACT: /* exact */ + case MO_STRONG: /* strong */ + return 1; + case MO_WEAK: /* weak */ + assert (a->type & IT_MEMORY); + assert (b->type & IT_MEMORY); + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + int aw, bw; + assert ((aw = II_MEM_WIDTH (a->index)) >= 0); + assert ((bw = II_MEM_WIDTH (b->index)) >= 0); + + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) return 1; + + /* Check if they overlap */ + if (a->op[2] >= b->op[2] && a->op[2] < b->op[2] + bw) return 1; + if (b->op[2] >= a->op[2] && b->op[2] < a->op[2] + aw) return 1; + return 0; + } else return 1; + case MO_NONE: /* none */ + return 0; + default: + assert (0); + } + return 1; +} + +/* Adds memory dependencies based on ordering type: + 0 - exact; 1 - strong; 2 - weak; 3 - none */ +void add_memory_dep (cuc_func *f, int otype) +{ + int b, i; + dep_list *all_mem = NULL; + + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (insn[i].type & IT_MEMORY) { + dep_list *tmp = all_mem; + while (tmp) { + //PRINTF ("%x %x\n", REF (b,i), tmp->ref); + if (check_memory_conflict (f, &insn[i], &f->INSN(tmp->ref), otype)) + add_dep (&insn[i].dep, tmp->ref); + tmp = tmp->next; + } + add_dep (&all_mem, REF (b, i)); + } + } + dispose_list (&all_mem); +} + +/* Check if they address the same location, so we can join them */ +static int same_transfers (cuc_func *f, int otype) +{ + int i, j; + int modified = 0; + if (otype == MO_WEAK || otype == MO_NONE) { + for (i = 1, j = 1; i < f->nmsched; i++) + /* Exclude memory stores and different memory types */ + if (f->mtype[i - 1] == f->mtype[i] && f->mtype[i] & MT_LOAD) { + cuc_insn *a = &f->INSN(f->msched[i - 1]); + cuc_insn *b = &f->INSN(f->msched[i]); + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + /* Not in usual form? */ + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) goto keep; + + //PRINTF ("%i %i, ", a->op[2], b->op[2]); + + /* Check if they are the same => do not copy */ + if (a->op[2] == b->op[2] + && REF_BB(f->msched[i - 1]) == REF_BB(f->msched[i])) { + /* yes => remove actual instruction */ + int t1 = MIN (f->msched[i - 1], f->msched[i]); + int t2 = MAX (f->msched[i - 1], f->msched[i]); + int b, i, j; + cucdebug (2, "Removing %x_%x and using %x_%x instead.\n", + REF_BB(t2), REF_I(t2), REF_BB(t1), REF_I(t1)); + change_insn_type (&f->INSN(t2), II_NOP); + modified = 1; + /* Update references */ + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == t2) + f->bb[b].insn[i].op[j] = t1; + + } else goto keep; + } else goto keep; + } else { +keep: + f->msched[j] = f->msched[i]; + f->mtype[j++] = f->mtype[i]; + } + f->nmsched = j; + } + return modified; +} + +/* Check if two consecutive lb[zs] can be joined into lhz and if + two consecutive lh[zs] can be joined into lwz */ +static int join_transfers (cuc_func *f, int otype) +{ + int i, j; + int modified = 0; + + /* We can change width even with strong memory ordering */ + if (otype == MO_WEAK || otype == MO_NONE || otype == MO_STRONG) { + for (i = 1, j = 1; i < f->nmsched; i++) + /* Exclude memory stores and different memory types */ + if (f->mtype[i - 1] == f->mtype[i] && f->mtype[i] & MT_LOAD) { + cuc_insn *a = &f->INSN(f->msched[i - 1]); + cuc_insn *b = &f->INSN(f->msched[i]); + int aw = f->mtype[i - 1] & MT_WIDTH; + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + + /* Not in usual form? */ + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) goto keep; + + /* Check if they touch together */ + if (a->op[2] + aw == b->op[2] + && REF_BB(f->msched[i - 1]) == REF_BB(f->msched[i])) { + /* yes => remove second instruction */ + int t1 = MIN (f->msched[i - 1], f->msched[i]); + int t2 = MAX (f->msched[i - 1], f->msched[i]); + dep_list *t1dep = f->INSN(t1).dep; + int x, p; + cuc_insn *ii; + + cucdebug (2, "Joining %x and %x.\n", t1, t2); + if (cuc_debug >= 8) print_cuc_bb (f, "PREJT"); + change_insn_type (&f->INSN(t1), II_NOP); + change_insn_type (&f->INSN(t2), II_NOP); + /* We will reuse the memadd before the first load, and add some + custom code at the end */ + insert_insns (f, t1, 10); + if (cuc_debug > 8) print_cuc_bb (f, "PREJT2"); + + /* Remove all dependencies to second access */ + for (x = 0; x < f->num_bb; x++) { + int i; + for (i = 0; i < f->bb[x].ninsn; i++) { + dep_list *d = f->bb[x].insn[i].dep; + dep_list **old = &f->bb[x].insn[i].dep; + while (d) { + if (d->ref == t2) { + d = d->next; + *old = d; + } else { + d = d->next; + old = &((*old)->next); + } + } + } + } + + /* Build the folowing code: + l[hw]z p-1 + and p-1, 0xff + sfle p-1, 0x7f + or p-2, 0xffffff00 + cmov p-3, p-1, p-2 + shr p-5, 8 + and p-1, 0xff + sfle p-1 0x7f + or p-2 0xffffff00 + cmov p-3, p-1, p-2*/ + p = REF_I(t1); + cucdebug (8, "%x %x\n", f->mtype[i - 1], f->mtype[i]); + for (x = 0; x < 2; x++) { + int t = f->mtype[i - 1 + x]; + ii = &f->bb[REF_BB(t1)].insn[p]; + if (!x) { + change_insn_type (ii, aw == 1 ? II_LH : II_LW); + ii->type = IT_MEMORY | IT_VOLATILE; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = t1 - 1; ii->opt[1] = OPT_REF; + ii->opt[2] = ii->opt[3] = OPT_NONE; + ii->dep = t1dep; + f->mtype[i - 1] = MT_LOAD | (aw == 1 ? 2 : 4); + f->msched[i - 1] = REF (REF_BB(t1), p); + } else { + change_insn_type (ii, II_SRL); + ii->type = 0; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = t1; ii->opt[1] = OPT_REF; + ii->op[2] = 8; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + } + + ii = &f->bb[REF_BB(t1)].insn[++p]; + change_insn_type (ii, II_AND); + ii->type = 0; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (REF_BB(t1), p - 1); ii->opt[1] = OPT_REF; + ii->op[2] = 0xff; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + + ii = &f->bb[REF_BB(t1)].insn[++p]; + change_insn_type (ii, II_SFLE); + ii->type = IT_COND; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (REF_BB(t1), p - 1); ii->opt[1] = OPT_REF; + ii->op[2] = 0x7f; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + + ii = &f->bb[REF_BB(t1)].insn[++p]; + change_insn_type (ii, II_OR); + ii->type = 0; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (REF_BB(t1), p - 2); ii->opt[1] = OPT_REF; + if (t & MT_SIGNED) ii->op[2] = 0xffffff00; + else ii->op[2] = 0; + ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + + ii = &f->bb[REF_BB(t1)].insn[++p]; + change_insn_type (ii, II_CMOV); + ii->type = 0; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (REF_BB(t1), p - 1); ii->opt[1] = OPT_REF; + ii->op[2] = REF (REF_BB(t1), p - 3); ii->opt[2] = OPT_REF; + ii->op[3] = REF (REF_BB(t1), p - 2); ii->opt[3] = OPT_REF; + p++; + } + + modified = 1; + + { + int b, i, j; + /* Update references */ + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (REF_I (f->bb[b].insn[i].op[j]) < REF_I (t1) + || REF_I(f->bb[b].insn[i].op[j]) >= REF_I (t1) + 10) { + if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == t1) + f->bb[b].insn[i].op[j] = t1 + 4; + else if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == t2) + f->bb[b].insn[i].op[j] = t1 + 9; + } + } + if (cuc_debug >= 8) print_cuc_bb (f, "POSTJT"); + } else goto keep; + } else goto keep; + } else { +keep: + f->msched[j] = f->msched[i]; + f->mtype[j++] = f->mtype[i]; + } + f->nmsched = j; + } + return modified; +} + +/* returns nonzero if a < b */ +int mem_ordering_cmp (cuc_func *f, cuc_insn *a, cuc_insn *b) +{ + assert (a->type & IT_MEMORY); + assert (b->type & IT_MEMORY); + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) return 0; + + /* Order linearly, we can then join them to bursts */ + return a->op[2] < b->op[2]; + } else return 0; +} + +/* Schedule memory accesses + 0 - exact; 1 - strong; 2 - weak; 3 - none */ +int schedule_memory (cuc_func *f, int otype) +{ + int b, i, j; + int modified = 0; + f->nmsched = 0; + + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (insn[i].type & IT_MEMORY) { + f->msched[f->nmsched++] = REF (b, i); + if (otype == MO_NONE || otype == MO_WEAK) insn[i].type |= IT_FLAG1; /* mark unscheduled */ + } + } + + for (i = 0; i < f->nmsched; i++) + cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' '); + cucdebug (2, "\n"); + + /* We can reorder just more loose types + We assume, that memory accesses are currently in valid (but not neccesserly) + optimal order */ + if (otype == MO_WEAK || otype == MO_NONE) { + for (i = 0; i < f->nmsched; i++) { + int best = i; + int tmp; + for (j = i + 1; j < f->nmsched; j++) if (REF_BB(f->msched[j]) == REF_BB(f->msched[best])) { + if (mem_ordering_cmp (f, &f->INSN (f->msched[j]), &f->INSN(f->msched[best]))) { + /* Check dependencies */ + dep_list *t = f->INSN(f->msched[j]).dep; + while (t) { + if (f->INSN(t->ref).type & IT_FLAG1) break; + t = t->next; + } + if (!t) best = j; /* no conflicts -> ok */ + } + } + + /* we have to shift instructions up, to maintain valid dependencies + and make space for best candidate */ + + /* make local copy */ + tmp = f->msched[best]; + for (j = best; j > i; j--) f->msched[j] = f->msched[j - 1]; + f->msched[i] = tmp; + f->INSN(f->msched[i]).type &= ~IT_FLAG1; /* mark scheduled */ + } + } + + for (i = 0; i < f->nmsched; i++) + cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' '); + cucdebug (2, "\n"); + + /* Assign memory types */ + for (i = 0; i < f->nmsched; i++) { + cuc_insn *a = &f->INSN(f->msched[i]); + f->mtype[i] = !II_IS_LOAD(a->index) ? MT_STORE : MT_LOAD; + f->mtype[i] |= II_MEM_WIDTH (a->index); + if (a->type & IT_SIGNED) f->mtype[i] |= MT_SIGNED; + } + + if (same_transfers (f, otype)) modified = 1; + if (join_transfers (f, otype)) modified = 1; + + for (i = 0; i < f->nmsched; i++) + cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' '); + cucdebug (2, "\n"); + if (cuc_debug > 5) print_cuc_bb (f, "AFTER_MEM_REMOVAL"); + + if (config.cuc.enable_bursts) { + //PRINTF ("\n"); + for (i = 1; i < f->nmsched; i++) { + cuc_insn *a = &f->INSN(f->msched[i - 1]); + cuc_insn *b = &f->INSN(f->msched[i]); + int aw = f->mtype[i - 1] & MT_WIDTH; + + /* Burst can only be out of words */ + if (aw != 4) continue; + + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + /* Not in usual form? */ + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) continue; + + //PRINTF ("%i %i, ", a->op[2], b->op[2]); + + /* Check if they touch together */ + if (a->op[2] + aw == b->op[2] + && REF_BB(f->msched[i - 1]) == REF_BB(f->msched[i])) { + /* yes => do burst */ + f->mtype[i - 1] &= ~MT_BURSTE; + f->mtype[i - 1] |= MT_BURST; + f->mtype[i] |= MT_BURST | MT_BURSTE; + } + } + } + } + + for (i = 0; i < f->nmsched; i++) + cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' '); + cucdebug (2, "\n"); + + /* We don't need dependencies in non-memory instructions */ + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) if (!(insn[i].type & IT_MEMORY)) + dispose_list (&insn[i].dep); + } + + if (cuc_debug > 5) print_cuc_bb (f, "AFTER_MEM_REMOVAL2"); + /* Reduce number of dependecies, keeping just direct dependencies, based on memory schedule */ + { + int lastl[3] = {-1, -1, -1}; + int lasts[3] = {-1, -1, -1}; + int lastc[3] = {-1, -1, -1}; + int last_load = -1, last_store = -1, last_call = -1; + for (i = 0; i < f->nmsched; i++) { + int t = f->mtype[i] & MT_LOAD ? 0 : f->mtype[i] & MT_STORE ? 1 : 2; + int maxl = lastl[t]; + int maxs = lasts[t]; + int maxc = lastc[t]; + dep_list *tmp = f->INSN(f->msched[i]).dep; + cucdebug (7, "!%i %x %p\n", i, f->msched[i], tmp); + while (tmp) { + if (f->INSN(tmp->ref).type & IT_MEMORY && REF_BB(tmp->ref) == REF_BB(f->msched[i])) { + cucdebug (7, "%i %x %lx\n", i, f->msched[i], tmp->ref); + /* Search for the reference */ + for (j = 0; j < f->nmsched; j++) if (f->msched[j] == tmp->ref) break; + assert (j < f->nmsched); + if (f->mtype[j] & MT_STORE) { + if (maxs < j) maxs = j; + } else if (f->mtype[j] & MT_LOAD) { + if (maxl < j) maxl = j; + } else if (f->mtype[j] & MT_CALL) { + if (maxc < j) maxc = j; + } + } + tmp = tmp->next; + } + dispose_list (&f->INSN(f->msched[i]).dep); + if (f->mtype[i] & MT_STORE) { + maxs = last_store; + last_store = i; + } else if (f->mtype[i] & MT_LOAD) { + maxl = last_load; + last_load = i; + } else if (f->mtype[i] & MT_CALL) { + maxc = last_call; + last_call = i; + } + + if (maxl > lastl[t]) { + add_dep (&f->INSN(f->msched[i]).dep, f->msched[maxl]); + lastl[t] = maxl; + } + if (maxs > lasts[t]) { + add_dep (&f->INSN(f->msched[i]).dep, f->msched[maxs]); + lasts[t] = maxs; + } + if (maxc > lastc[t]) { + add_dep (&f->INSN(f->msched[i]).dep, f->msched[maxc]); + lastc[t] = maxc; + } + //PRINTF ("%i(%i)> ml %i(%i) ms %i(%i) lastl %i %i lasts %i %i last_load %i last_store %i\n", i, f->msched[i], maxl, f->msched[maxl], maxs, f->msched[maxs], lastl[0], lastl[1], lasts[0], lasts[1], last_load, last_store); + + /* What we have to wait to finish this BB? */ + if (i + 1 >= f->nmsched || REF_BB(f->msched[i + 1]) != REF_BB(f->msched[i])) { + if (last_load > lastl[t]) { + add_dep (&f->bb[REF_BB(f->msched[i])].mdep, f->msched[last_load]); + lastl[t] = last_load; + } + if (last_store > lasts[t]) { + add_dep (&f->bb[REF_BB(f->msched[i])].mdep, f->msched[last_store]); + lasts[t] = last_store; + } + if (last_call > lastc[t]) { + add_dep (&f->bb[REF_BB(f->msched[i])].mdep, f->msched[last_call]); + lastc[t] = last_call; + } + } + } + } + return modified; +}
memory.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: timings.c =================================================================== --- timings.c (nonexistent) +++ timings.c (revision 1765) @@ -0,0 +1,305 @@ +/* timings.c -- OpenRISC Custom Unit Compiler, timing and size estimation + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "abstract.h" +#include "sim-config.h" +#include "cuc.h" +#include "insn.h" + +static cuc_timing_table *timing_table; +static double max_bb_delay; + +/* Returns instruction delay */ +double insn_time (cuc_insn *ii) +{ + if (ii->opt[2] & OPT_CONST) { + if (ii->opt[1] & OPT_CONST) return 0.; + else return timing_table[ii->index].delayi; + } else return timing_table[ii->index].delay; +} + +/* Returns instruction size */ +double insn_size (cuc_insn *ii) +{ + double s = (ii->opt[2] & OPT_CONST) ? timing_table[ii->index].sizei + : timing_table[ii->index].size; + if (ii->opt[1] & OPT_CONST) return 0.; + if (ii->type & IT_COND && (ii->index == II_CMOV || ii->index == II_ADD)) return s / 32.; + else return s; +} + +/* Returns normal instruction size */ +double ii_size (int index, int imm) +{ + if (imm) return timing_table[index].sizei; + else return timing_table[index].size; +} + +/* Returns dataflow tree height in cycles */ +static double max_delay (cuc_func *f, int b) +{ + double max_d = 0.; + double *d; + cuc_bb *bb = &f->bb[b]; + int i, j; + d = (double *) malloc (sizeof (double) * bb->ninsn); + for (i = 0; i < bb->ninsn; i++) { + double md = 0.; + for (j = 0; j < MAX_OPERANDS; j++) { + int op = bb->insn[i].op[j]; + if (bb->insn[i].opt[j] & OPT_REF && op >= 0 && REF_BB (op) == b && REF_I (op) < i) { + double t = d[REF_I (op)]; + if (t > md) md = t; + } + } + d[i] = md + insn_time (&bb->insn[i]); + if (d[i] > max_d) max_d = d[i]; + } + free (d); + //PRINTF ("max_d%i=%f\n", b, max_d); + return max_d; +} + +/* Calculates memory delay of a single run of a basic block */ +static int memory_delay (cuc_func *f, int b) +{ + int i; + int d = 0; + for (i = 0; i < f->nmsched; i++) + if (REF_BB (f->msched[i]) == b) { + if (f->mtype[i] & MT_STORE) { + if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += runtime.cuc.mdelay[2]; + else d += runtime.cuc.mdelay[3]; + } else if (f->mtype[i] & MT_LOAD) { + if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += runtime.cuc.mdelay[0]; + else d += runtime.cuc.mdelay[1]; + } + } + //PRINTF ("md%i=%i\n", b, d); + return d; +} + +/* Cuts the tree and marks registers */ +void cut_tree (cuc_func *f, int b, double sd) +{ + int i, j; + double *depths; + cuc_bb *bb = &f->bb[b]; + depths = (double *) malloc (sizeof (double) * bb->ninsn); + + for (i = 0; i < bb->ninsn; i++) { + double md = 0.; + int mg = 0; + for (j = 0; j < MAX_OPERANDS; j++) { + int op = bb->insn[i].op[j]; + if (bb->insn[i].opt[j] & OPT_REF && op >= 0 && REF_BB (op) == b && REF_I (op) < i) { + double t = depths[REF_I (op)]; + if (f->INSN(op).type & IT_CUT) { + if (f->INSN(op).tmp + 1 >= mg) { + if (f->INSN(op).tmp + 1 > mg) md = 0.; + mg = f->INSN(op).tmp + 1; + if (t > md) md = t; + } + } else { + if (f->INSN(op).tmp >= mg) { + if (f->INSN(op).tmp > mg) md = 0.; + mg = f->INSN(op).tmp; + if (t > md) md = t; + } + } + } + } + //PRINTF ("%2x md%.1f ", i, md); + md += insn_time (&bb->insn[i]); + //PRINTF ("md%.1f mg%i %.1f\n", md, mg, sd); + bb->insn[i].tmp = mg; + if (md > sd) { + bb->insn[i].type |= IT_CUT; + if (md > runtime.cuc.cycle_duration) + log ("WARNING: operation t%x_%x may need to be registered inbetween\n", b, i); + depths[i] = 0.; + } else depths[i] = md; + } + free (depths); +} + +/* How many cycles we need now to get through the BB */ +static int new_bb_cycles (cuc_func *f, int b, int cut) +{ + long d; + double x = max_delay (f, b); + d = ceil (x / runtime.cuc.cycle_duration); + if (d < 1) d = 1; + if (cut && x > runtime.cuc.cycle_duration) cut_tree (f, b, x / d); + + if (x / d > max_bb_delay) max_bb_delay = x / d; + + return memory_delay (f, b) + d; +} + +/* Cuts the tree and marks registers */ +void mark_cut (cuc_func *f) +{ + int b, i; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + f->bb[b].insn[i].tmp = 0; /* Set starting groups */ + if (config.cuc.no_multicycle) + for (b = 0; b < f->num_bb; b++) + new_bb_cycles (f, b, 1); +} + +/* Returns basic block circuit area */ +static double bb_size (cuc_bb *bb) +{ + int i; + double d = 0.; + for (i = 0; i < bb->ninsn; i++) { + if (bb->insn[i].opt[2] & OPT_CONST) + d = d + timing_table[bb->insn[i].index].sizei; + else d = d + timing_table[bb->insn[i].index].size; + } + return d; +} + +/* Recalculates bb[].cnt values, based on generated profile file */ +void recalc_cnts (cuc_func *f, char *bb_filename) +{ + int i, r, b, prevbb = -1, prevcnt = 0; + int buf[256]; + const int bufsize = 256; + FILE *fi = fopen (bb_filename, "rb"); + + assert (fi); + + /* initialize counts */ + for (b = 0; b < f->num_bb; b++) f->bb[b].cnt = 0; + + /* read control flow from file and set counts */ + do { + r = fread (buf, sizeof (int), bufsize, fi); + for (i = 0; i < r; i++) { + b = f->init_bb_reloc[buf[i]]; + if (b < 0) continue; + /* Were we in the loop? */ + if (b == prevbb) { + prevcnt++; + } else { + /* End the block */ + if (prevbb >= 0 && prevbb != BBID_START) + f->bb[prevbb].cnt += prevcnt / f->bb[prevbb].unrolled + 1; + prevcnt = 0; + prevbb = b; + } + } + } while (r == bufsize); + + fclose (fi); +} + +/* Analizes current version of design and places results into timings structure */ +void analyse_timings (cuc_func *f, cuc_timings *timings) +{ + long new_time = 0; + double size = 0.; + int b, i; + + /* Add time needed for mtspr/mfspr */ + for (i = 0; i < MAX_REGS; i++) if (f->used_regs[i]) new_time++; + new_time++; /* always one mfspr at the end */ + new_time *= f->num_runs; + + max_bb_delay = 0.; + for (b = 0; b < f->num_bb; b++) { + new_time += new_bb_cycles (f, b, 0) * f->bb[b].cnt; + size = size + bb_size (&f->bb[b]); + } + timings->new_time = new_time; + timings->size = size; + log ("Max circuit delay %.2fns; max circuit clock speed %.1fMHz\n", + max_bb_delay, 1000. / max_bb_delay); +} + +/* Loads in the specified timings table */ +void load_timing_table (char *filename) +{ + int i; + FILE *fi; + + log ("Loading timings from %s\n", filename); + log ("Using clock delay %.2fns (frequency %.0fMHz)\n", runtime.cuc.cycle_duration, + 1000. / runtime.cuc.cycle_duration); + assert (fi = fopen (filename, "rt")); + + timing_table = (cuc_timing_table *)malloc ((II_LAST + 1) * sizeof (cuc_timing_table)); + assert (timing_table); + for (i = 0; i <= II_LAST; i++) { + timing_table[i].size = -1.; + timing_table[i].sizei = -1.; + timing_table[i].delay = -1.; + timing_table[i].delayi = -1.; + } + + while (!feof(fi)) { + char tmp[256]; + int index; + if (fscanf (fi, "%s", tmp) != 1) break; + if (tmp[0] == '#') { + while (!feof (fi) && fgetc (fi) != '\n'); + continue; + } + for (i = 0; i <= II_LAST; i++) + if (strcmp (known[i].name, tmp) == 0) { + index = i; + break; + } + assert (index <= II_LAST); + i = index; + if (fscanf (fi, "%lf%lf%lf%lf\n", &timing_table[i].size, + &timing_table[i].sizei, &timing_table[i].delay, &timing_table[i].delayi) != 4) break; + /*PRINTF ("!%s size %f,%f delay %f,%f\n", known[i].name, timing_table[i].size, + timing_table[i].sizei, timing_table[i].delay, timing_table[i].delayi);*/ + } + + /* Was everything initialized? */ + for (i = 0; i <= II_LAST; i++) { + assert (timing_table[i].size >= 0 && timing_table[i].sizei >= 0 + && timing_table[i].delay >= 0 && timing_table[i].delayi >= 0); + /*PRINTF ("%s size %f,%f delay %f,%f\n", known[i], timing_table[i].size, + timing_table[i].sizei, timing_table[i].delay, timing_table[i].delayi);*/ + } + + fclose (fi); +} +
timings.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: Makefile.am =================================================================== --- Makefile.am (nonexistent) +++ Makefile.am (revision 1765) @@ -0,0 +1,25 @@ +# Makefile -- Makefile for cpu architecture independent simulation +# Copyright (C) 2002 Marko Mlinar, markom@opencores.org +# +# This file is part of OpenRISC 1000 Architectural Simulator. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + +noinst_LIBRARIES = libcuc.a + +libcuc_a_SOURCES = cuc.c cuc.h load.c bb.c memory.c \ + verilog.c timings.c insn.c insn.h adv.c + Index: insn.h =================================================================== --- insn.h (nonexistent) +++ insn.h (revision 1765) @@ -0,0 +1,111 @@ +/* insn.h -- OpenRISC Custom Unit Compiler, internal instruction definitions + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _DF_INSN_ +#define _DF_INSN_ + +#include "cuc.h" + +/* Instruction types */ +#define II_ADD 0 +#define II_SUB 1 +#define II_AND 2 +#define II_OR 3 +#define II_XOR 4 +#define II_MUL 5 +#define II_SRL 6 +#define II_SLL 7 +#define II_SRA 8 +#define II_LB 9 +#define II_LH 10 +#define II_LW 11 +#define II_SB 12 +#define II_SH 13 +#define II_SW 14 +#define II_SFEQ 15 +#define II_SFNE 16 +#define II_SFLE 17 +#define II_SFLT 18 +#define II_SFGE 19 +#define II_SFGT 20 +#define II_BF 21 +#define II_LRBB 22 +#define II_CMOV 23 +#define II_REG 24 +#define II_NOP 25 +#define II_CALL 26 +#define II_LAST 26 + +/* misc flags */ +#define II_MASK 0x0fff +#define II_MEM 0x1000 +#define II_SIGNED 0x2000 + +#define II_IS_LOAD(x) ((x) == II_LB || (x) == II_LH || (x) == II_LW) +#define II_IS_STORE(x) ((x) == II_SB || (x) == II_SH || (x) == II_SW) +#define II_MEM_WIDTH(x) (((x) == II_LB || (x) == II_SB) ? 1 :\ + ((x) == II_LH || (x) == II_SH) ? 2 :\ + ((x) == II_LW || (x) == II_SW) ? 4 : -1) + +/* List of known instructions and their rtl representation */ +typedef struct { + char *name; + int comutative; + char *rtl; +} cuc_known_insn; + +extern const cuc_known_insn known[II_LAST + 1]; + +/* Timing table -- same indexes as known table */ +typedef struct { + double delay; + double size; + double delayi; + double sizei; +} cuc_timing_table; + +/* Conversion links */ +typedef struct { + const char *from; + const int to; +} cuc_conv; + +/* normal (not immediate) size of a function */ +double ii_size (int index, int imm); + +/* Returns instruction size */ +double insn_time (cuc_insn *ii); + +/* Returns instruction time */ +double insn_size (cuc_insn *ii); + +/* Find known instruction and attach them to insn */ +void change_insn_type (cuc_insn *i, int index); + +/* Returns instruction name */ +const char *cuc_insn_name (cuc_insn *ii); + +/* Loads in the specified timings table */ +void load_timing_table (char *filename); + +/* Displays shared instructions */ +void print_shared (cuc_func *rf, cuc_shared_item *shared, int nshared); + +#endif /* _DF_INSN_ */ + Index: . =================================================================== --- . (nonexistent) +++ . (revision 1765)
. Property changes : Added: svn:ignore ## -0,0 +1,2 ## +Makefile +.deps

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.