URL
https://opencores.org/ocsvn/or1k/or1k/trunk
Subversion Repositories or1k
Compare Revisions
- This comparison shows the changes necessary to convert path
/or1k/tags/nog_patch_39/or1ksim/cuc
- from Rev 1403 to Rev 1765
- ↔ Reverse comparison
Rev 1403 → Rev 1765
/Makefile.in
0,0 → 1,346
# Makefile.in generated by automake 1.6.3 from Makefile.am. |
# @configure_input@ |
|
# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 |
# Free Software Foundation, Inc. |
# This Makefile.in is free software; the Free Software Foundation |
# gives unlimited permission to copy and/or distribute it, |
# with or without modifications, as long as this notice is preserved. |
|
# This program is distributed in the hope that it will be useful, |
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without |
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A |
# PARTICULAR PURPOSE. |
|
@SET_MAKE@ |
|
# Makefile -- Makefile for cpu architecture independent simulation |
# Copyright (C) 2002 Marko Mlinar, markom@opencores.org |
# |
# This file is part of OpenRISC 1000 Architectural Simulator. |
# |
# This program is free software; you can redistribute it and/or modify |
# it under the terms of the GNU General Public License as published by |
# the Free Software Foundation; either version 2 of the License, or |
# (at your option) any later version. |
# |
# This program is distributed in the hope that it will be useful, |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
# GNU General Public License for more details. |
# |
# You should have received a copy of the GNU General Public License |
# along with this program; if not, write to the Free Software |
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
# |
SHELL = @SHELL@ |
|
srcdir = @srcdir@ |
top_srcdir = @top_srcdir@ |
VPATH = @srcdir@ |
prefix = @prefix@ |
exec_prefix = @exec_prefix@ |
|
bindir = @bindir@ |
sbindir = @sbindir@ |
libexecdir = @libexecdir@ |
datadir = @datadir@ |
sysconfdir = @sysconfdir@ |
sharedstatedir = @sharedstatedir@ |
localstatedir = @localstatedir@ |
libdir = @libdir@ |
infodir = @infodir@ |
mandir = @mandir@ |
includedir = @includedir@ |
oldincludedir = /usr/include |
pkgdatadir = $(datadir)/@PACKAGE@ |
pkglibdir = $(libdir)/@PACKAGE@ |
pkgincludedir = $(includedir)/@PACKAGE@ |
top_builddir = .. |
|
ACLOCAL = @ACLOCAL@ |
AUTOCONF = @AUTOCONF@ |
AUTOMAKE = @AUTOMAKE@ |
AUTOHEADER = @AUTOHEADER@ |
|
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd |
INSTALL = @INSTALL@ |
INSTALL_PROGRAM = @INSTALL_PROGRAM@ |
INSTALL_DATA = @INSTALL_DATA@ |
install_sh_DATA = $(install_sh) -c -m 644 |
install_sh_PROGRAM = $(install_sh) -c |
install_sh_SCRIPT = $(install_sh) -c |
INSTALL_SCRIPT = @INSTALL_SCRIPT@ |
INSTALL_HEADER = $(INSTALL_DATA) |
transform = @program_transform_name@ |
NORMAL_INSTALL = : |
PRE_INSTALL = : |
POST_INSTALL = : |
NORMAL_UNINSTALL = : |
PRE_UNINSTALL = : |
POST_UNINSTALL = : |
build_alias = @build_alias@ |
build_triplet = @build@ |
host_alias = @host_alias@ |
host_triplet = @host@ |
target_alias = @target_alias@ |
target_triplet = @target@ |
|
EXEEXT = @EXEEXT@ |
OBJEXT = @OBJEXT@ |
PATH_SEPARATOR = @PATH_SEPARATOR@ |
AMTAR = @AMTAR@ |
AR = @AR@ |
ARFLAGS = @ARFLAGS@ |
AWK = @AWK@ |
BUILD_DIR = @BUILD_DIR@ |
CC = @CC@ |
CFLAGS = @CFLAGS@ |
CPU_ARCH = @CPU_ARCH@ |
DEPDIR = @DEPDIR@ |
INCLUDES = @INCLUDES@ |
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ |
LOCAL_CFLAGS = @LOCAL_CFLAGS@ |
LOCAL_DEFS = @LOCAL_DEFS@ |
MAKE_SHELL = @MAKE_SHELL@ |
PACKAGE = @PACKAGE@ |
RANLIB = @RANLIB@ |
STRIP = @STRIP@ |
SUMVERSION = @SUMVERSION@ |
TERMCAP_LIB = @TERMCAP_LIB@ |
VERSION = @VERSION@ |
am__include = @am__include@ |
am__quote = @am__quote@ |
host = @host@ |
host_cpu = @host_cpu@ |
host_os = @host_os@ |
install_sh = @install_sh@ |
|
noinst_LIBRARIES = libcuc.a |
|
libcuc_a_SOURCES = cuc.c cuc.h load.c bb.c memory.c \ |
verilog.c timings.c insn.c insn.h adv.c |
|
subdir = cuc |
mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs |
CONFIG_HEADER = $(top_builddir)/config.h |
CONFIG_CLEAN_FILES = |
LIBRARIES = $(noinst_LIBRARIES) |
|
libcuc_a_AR = $(AR) cru |
libcuc_a_LIBADD = |
am_libcuc_a_OBJECTS = cuc.$(OBJEXT) load.$(OBJEXT) bb.$(OBJEXT) \ |
memory.$(OBJEXT) verilog.$(OBJEXT) timings.$(OBJEXT) \ |
insn.$(OBJEXT) adv.$(OBJEXT) |
libcuc_a_OBJECTS = $(am_libcuc_a_OBJECTS) |
|
DEFS = @DEFS@ |
DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) |
CPPFLAGS = @CPPFLAGS@ |
LDFLAGS = @LDFLAGS@ |
LIBS = @LIBS@ |
depcomp = $(SHELL) $(top_srcdir)/depcomp |
am__depfiles_maybe = depfiles |
@AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/adv.Po ./$(DEPDIR)/bb.Po \ |
@AMDEP_TRUE@ ./$(DEPDIR)/cuc.Po ./$(DEPDIR)/insn.Po \ |
@AMDEP_TRUE@ ./$(DEPDIR)/load.Po ./$(DEPDIR)/memory.Po \ |
@AMDEP_TRUE@ ./$(DEPDIR)/timings.Po ./$(DEPDIR)/verilog.Po |
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ |
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) |
CCLD = $(CC) |
LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ |
DIST_SOURCES = $(libcuc_a_SOURCES) |
DIST_COMMON = Makefile.am Makefile.in |
SOURCES = $(libcuc_a_SOURCES) |
|
all: all-am |
|
.SUFFIXES: |
.SUFFIXES: .c .o .obj |
$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) |
cd $(top_srcdir) && \ |
$(AUTOMAKE) --gnu cuc/Makefile |
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status |
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) |
|
clean-noinstLIBRARIES: |
-test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) |
libcuc.a: $(libcuc_a_OBJECTS) $(libcuc_a_DEPENDENCIES) |
-rm -f libcuc.a |
$(libcuc_a_AR) libcuc.a $(libcuc_a_OBJECTS) $(libcuc_a_LIBADD) |
$(RANLIB) libcuc.a |
|
mostlyclean-compile: |
-rm -f *.$(OBJEXT) core *.core |
|
distclean-compile: |
-rm -f *.tab.c |
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adv.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bb.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cuc.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insn.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/load.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/memory.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timings.Po@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verilog.Po@am__quote@ |
|
distclean-depend: |
-rm -rf ./$(DEPDIR) |
|
.c.o: |
@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ |
@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
$(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$< |
|
.c.obj: |
@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ |
@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
$(COMPILE) -c `cygpath -w $<` |
CCDEPMODE = @CCDEPMODE@ |
uninstall-info-am: |
|
ETAGS = etags |
ETAGSFLAGS = |
|
tags: TAGS |
|
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) |
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ |
unique=`for i in $$list; do \ |
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ |
done | \ |
$(AWK) ' { files[$$0] = 1; } \ |
END { for (i in files) print i; }'`; \ |
mkid -fID $$unique |
|
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ |
$(TAGS_FILES) $(LISP) |
tags=; \ |
here=`pwd`; \ |
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ |
unique=`for i in $$list; do \ |
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ |
done | \ |
$(AWK) ' { files[$$0] = 1; } \ |
END { for (i in files) print i; }'`; \ |
test -z "$(ETAGS_ARGS)$$tags$$unique" \ |
|| $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ |
$$tags $$unique |
|
GTAGS: |
here=`$(am__cd) $(top_builddir) && pwd` \ |
&& cd $(top_srcdir) \ |
&& gtags -i $(GTAGS_ARGS) $$here |
|
distclean-tags: |
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH |
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) |
|
top_distdir = .. |
distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) |
|
distdir: $(DISTFILES) |
@list='$(DISTFILES)'; for file in $$list; do \ |
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ |
dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ |
if test "$$dir" != "$$file" && test "$$dir" != "."; then \ |
dir="/$$dir"; \ |
$(mkinstalldirs) "$(distdir)$$dir"; \ |
else \ |
dir=''; \ |
fi; \ |
if test -d $$d/$$file; then \ |
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ |
cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ |
fi; \ |
cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ |
else \ |
test -f $(distdir)/$$file \ |
|| cp -p $$d/$$file $(distdir)/$$file \ |
|| exit 1; \ |
fi; \ |
done |
check-am: all-am |
check: check-am |
all-am: Makefile $(LIBRARIES) |
|
installdirs: |
|
install: install-am |
install-exec: install-exec-am |
install-data: install-data-am |
uninstall: uninstall-am |
|
install-am: all-am |
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am |
|
installcheck: installcheck-am |
install-strip: |
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ |
INSTALL_STRIP_FLAG=-s \ |
`test -z '$(STRIP)' || \ |
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install |
mostlyclean-generic: |
|
clean-generic: |
|
distclean-generic: |
-rm -f Makefile $(CONFIG_CLEAN_FILES) |
|
maintainer-clean-generic: |
@echo "This command is intended for maintainers to use" |
@echo "it deletes files that may require special tools to rebuild." |
clean: clean-am |
|
clean-am: clean-generic clean-noinstLIBRARIES mostlyclean-am |
|
distclean: distclean-am |
|
distclean-am: clean-am distclean-compile distclean-depend \ |
distclean-generic distclean-tags |
|
dvi: dvi-am |
|
dvi-am: |
|
info: info-am |
|
info-am: |
|
install-data-am: |
|
install-exec-am: |
|
install-info: install-info-am |
|
install-man: |
|
installcheck-am: |
|
maintainer-clean: maintainer-clean-am |
|
maintainer-clean-am: distclean-am maintainer-clean-generic |
|
mostlyclean: mostlyclean-am |
|
mostlyclean-am: mostlyclean-compile mostlyclean-generic |
|
uninstall-am: uninstall-info-am |
|
.PHONY: GTAGS all all-am check check-am clean clean-generic \ |
clean-noinstLIBRARIES distclean distclean-compile \ |
distclean-depend distclean-generic distclean-tags distdir dvi \ |
dvi-am info info-am install install-am install-data \ |
install-data-am install-exec install-exec-am install-info \ |
install-info-am install-man install-strip installcheck \ |
installcheck-am installdirs maintainer-clean \ |
maintainer-clean-generic mostlyclean mostlyclean-compile \ |
mostlyclean-generic tags uninstall uninstall-am \ |
uninstall-info-am |
|
# Tell versions [3.59,3.63) of GNU make to not export all variables. |
# Otherwise a system limit (for SysV at least) may be exceeded. |
.NOEXPORT: |
/cuc.c
0,0 → 1,887
/* cuc.c -- OpenRISC Custom Unit Compiler |
* Copyright (C) 2002 Marko Mlinar, markom@opencores.org |
* |
* This file is part of OpenRISC 1000 Architectural Simulator. |
* |
* This program is free software; you can redistribute it and/or modify |
* it under the terms of the GNU General Public License as published by |
* the Free Software Foundation; either version 2 of the License, or |
* (at your option) any later version. |
* |
* This program is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with this program; if not, write to the Free Software |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ |
|
/* Main file, including code optimization and command prompt */ |
|
#include <stdio.h> |
#include <stdlib.h> |
#include <stdarg.h> |
#include <assert.h> |
#include <ctype.h> |
#include <string.h> |
#include <unistd.h> |
|
#include "config.h" |
|
#ifdef HAVE_INTTYPES_H |
#include <inttypes.h> |
#endif |
|
#include "port.h" |
#include "arch.h" |
#include "abstract.h" |
#include "sim-config.h" |
#include "cuc.h" |
#include "insn.h" |
#include "profiler.h" |
#include "opcode/or32.h" |
#include "parse.h" |
#include "debug.h" |
|
FILE *flog; |
int cuc_debug = 0; |
|
/* Last used registers by software convention */ |
/* Note that r11 is caller saved register, and we can destroy it. |
Due to CUC architecture we must always return something, even garbage (so that |
caller knows, we are finished, when we send acknowledge). |
In case r11 was not used (trivial register assignment) we will remove it later, |
but if we assigned a value to it, it must not be removed, so caller_saved[11] = 0 */ |
const int caller_saved[MAX_REGS] = { |
0, 0, 0, 1, 1, 1, 1, 1, |
1, 1, 0, 0, 0, 1, 0, 1, |
0, 1, 0, 1, 0, 1, 0, 1, |
0, 1, 0, 1, 0, 1, 0, 1, |
1, 1}; |
|
/* returns log2(x) */ |
/* Call this log2_int, because there is a library function named log2 */ |
int log2_int (unsigned long x) |
{ |
int c = 0; |
assert (x >= 0); |
if (!x) return 0; /* not by the book, but practical */ |
while (x != 1) x >>= 1, c++; |
return c; |
} |
|
/* Does all known instruction optimizations */ |
void cuc_optimize (cuc_func *func) |
{ |
int modified = 0; |
int first = 1; |
log ("Optimizing.\n"); |
do { |
modified = 0; |
clean_deps (func); |
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_CLEAN_DEPS"); |
if (optimize_cmovs (func)) { |
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_CMOVS"); |
modified = 1; |
} |
if (cuc_debug) cuc_check (func); |
if (optimize_tree (func)) { |
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_TREE1"); |
modified = 1; |
} |
if (remove_nops (func)) { |
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS"); |
modified = 1; |
} |
if (cuc_debug) cuc_check (func); |
if (remove_dead (func)) { |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD"); |
modified = 1; |
} |
if (cuc_debug) cuc_check (func); |
if (cse (func)) { |
log ("Common subexpression elimination.\n"); |
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_CSE"); |
modified = 1; |
} |
if (first) { |
insert_conditional_facts (func); |
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_COND_FACT"); |
if (cuc_debug) cuc_check (func); |
first = 0; |
} |
if (optimize_bb (func)) { |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_OPT_BB"); |
modified = 1; |
} |
if (cuc_debug) cuc_check (func); |
if (remove_nops (func)) { |
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS"); |
modified = 1; |
} |
if (remove_dead_bb (func)) { |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD_BB"); |
modified = 1; |
} |
if (remove_trivial_regs (func)) { |
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL"); |
modified = 1; |
} |
if (remove_nops (func)) { |
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS"); |
modified = 1; |
} |
add_memory_dep (func, func->memory_order); |
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP"); |
add_data_dep (func); |
if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP"); |
if (schedule_memory (func, func->memory_order)) { |
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM"); |
modified = 1; |
} |
} while (modified); |
set_io (func); |
#if 0 |
detect_max_values (func); |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_MAX_VALUES"); |
#endif |
} |
|
/* Pre/unrolls basic block and optimizes it */ |
cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j) |
{ |
cuc_func *func; |
cucdebug (2, "BB%i unroll %i times preroll %i times\n", b, j, i); |
log ("BB%i unroll %i times preroll %i times\n", b, j, i); |
func = preunroll_loop (f, b, i, j, bb_filename); |
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_PREUNROLL"); |
cuc_optimize (func); |
analyse_timings (func, timings); |
|
cucdebug (2, "new_time = %i, old_time = %i, size = %f\n", |
timings->new_time, func->orig_time, timings->size); |
log ("new time = %icyc, old_time = %icyc, size = %.0f gates\n", |
timings->new_time, func->orig_time, timings->size); |
//output_verilog (func, argv[1]); |
free_func (func); |
timings->b = b; |
timings->unroll = j; |
timings->preroll = i; |
timings->nshared = 0; |
return timings; |
} |
|
/* Simple comparison function */ |
int tim_comp (cuc_timings *a, cuc_timings *b) |
{ |
if (a->new_time < b->new_time) return -1; |
else if (a->new_time > b->new_time) return 1; |
else return 0; |
} |
|
/* Analyses function; done when cuc command is entered in (sim) prompt */ |
cuc_func *analyse_function (char *module_name, long orig_time, |
unsigned long start_addr, unsigned long end_addr, |
int memory_order, int num_runs) |
{ |
cuc_timings timings; |
cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func)); |
cuc_func *saved; |
int b, i, j; |
char tmp1[256]; |
char tmp2[256]; |
|
func->orig_time = orig_time; |
func->start_addr = start_addr; |
func->end_addr = end_addr; |
func->memory_order = memory_order; |
func->nfdeps = 0; |
func->fdeps = NULL; |
func->num_runs = num_runs; |
|
sprintf (tmp1, "%s.bin", module_name); |
cucdebug (2, "Loading %s.bin\n", module_name); |
if (cuc_load (tmp1)) { |
free (func); |
return NULL; |
} |
|
log ("Detecting basic blocks\n"); |
detect_bb (func); |
if (cuc_debug >= 2) print_cuc_insns ("WITH_BB_LIMITS", 0); |
|
//sprintf (tmp1, "%s.bin.mp", module_name); |
sprintf (tmp2, "%s.bin.bb", module_name); |
generate_bb_seq (func, config.sim.mprof_fn, tmp2); |
log ("Assuming %i clk cycle load (%i cyc burst)\n", runtime.cuc.mdelay[0], runtime.cuc.mdelay[2]); |
log ("Assuming %i clk cycle store (%i cyc burst)\n", runtime.cuc.mdelay[1], runtime.cuc.mdelay[3]); |
|
build_bb (func); |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_BUILD_BB"); |
reg_dep (func); |
|
log ("Detecting dependencies\n"); |
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_REG_DEP"); |
cuc_optimize (func); |
|
#if 0 |
csm (func); |
#endif |
assert (saved = dup_func (func)); |
|
timings.preroll = timings.unroll = 1; |
timings.nshared = 0; |
|
add_latches (func); |
if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES"); |
analyse_timings (func, &timings); |
|
free_func (func); |
log ("Base option: pre%i,un%i,sha%i: %icyc %.1f\n", |
timings.preroll, timings.unroll, timings.nshared, timings.new_time, timings.size); |
saved->timings = timings; |
|
#if 1 |
/* detect and unroll simple loops */ |
for (b = 0; b < saved->num_bb; b++) { |
cuc_timings t[MAX_UNROLL * MAX_PREROLL]; |
cuc_timings *ut; |
cuc_timings *cut = &t[0]; |
int nt = 1; |
double csize; |
saved->bb[b].selected_tim = -1; |
|
/* Is it a loop? */ |
if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue; |
log ("Found loop at BB%x. Trying to unroll.\n", b); |
t[0] = timings; |
t[0].b = b; |
t[0].preroll = 1; |
t[0].unroll = 1; |
t[0].nshared = 0; |
|
sprintf (tmp1, "%s.bin.bb", module_name); |
i = 1; |
do { |
cuc_timings *pt; |
cuc_timings *cpt = cut; |
j = 1; |
|
do { |
pt = cpt; |
cpt = preunroll_bb (tmp1, saved, &t[nt++], b, ++j, i); |
} while (j <= MAX_PREROLL && pt->new_time > cpt->new_time); |
i++; |
ut = cut; |
cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i); |
} while (i <= MAX_UNROLL && ut->new_time > cut->new_time); |
|
/* Sort the timings */ |
#if 0 |
if (cuc_debug >= 3) |
for (i = 0; i < nt; i++) PRINTF ("%i:%i,%i: %icyc\n", |
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time); |
#endif |
|
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp); |
|
/* Delete timings, that have worst time and bigger size than other */ |
j = 1; |
csize = t[0].size; |
for (i = 1; i < nt; i++) |
if (t[i].size < csize) t[j++] = t[i]; |
nt = j; |
|
cucdebug (1, "Available options\n"); |
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n", |
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size); |
/* Add results from CSM */ |
j = nt; |
for (i = 0; i < saved->bb[b].ntim; i++) { |
int i1; |
for (i1 = 0; i1 < nt; i1++) { |
t[j] = t[i1]; |
t[j].size += saved->bb[b].tim[i].size - timings.size; |
t[j].new_time += saved->bb[b].tim[i].new_time - timings.new_time; |
t[j].nshared = saved->bb[b].tim[i].nshared; |
t[j].shared = saved->bb[b].tim[i].shared; |
if (++j >= MAX_UNROLL * MAX_PREROLL) goto full; |
} |
} |
|
full: |
nt = j; |
|
cucdebug (1, "Available options:\n"); |
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n", |
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size); |
|
/* Sort again with new timings added */ |
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp); |
|
/* Delete timings, that have worst time and bigger size than other */ |
j = 1; |
csize = t[0].size; |
for (i = 1; i < nt; i++) |
if (t[i].size < csize) t[j++] = t[i]; |
nt = j; |
|
cucdebug (1, "Available options:\n"); |
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n", |
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size); |
|
if (saved->bb[b].ntim) free (saved->bb[b].tim); |
saved->bb[b].ntim = nt; |
assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt)); |
|
/* Copy options in reverse order -- smallest first */ |
for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i]; |
|
log ("Available options:\n"); |
for (i = 0; i < saved->bb[b].ntim; i++) { |
log ("%i:pre%i,un%i,sha%i: %icyc %.1f\n", |
saved->bb[b].tim[i].b, saved->bb[b].tim[i].preroll, saved->bb[b].tim[i].unroll, |
saved->bb[b].tim[i].nshared, saved->bb[b].tim[i].new_time, saved->bb[b].tim[i].size); |
} |
} |
#endif |
return saved; |
} |
|
/* Utility option formatting functions */ |
static const char *option_char = "?abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
|
/*static */char *gen_option (char *s, int bb_no, int f_opt) |
{ |
if (bb_no >= 0) sprintf (s, "%i", bb_no); |
assert (f_opt <= strlen (option_char)); |
sprintf (s, "%s%c", s, option_char[f_opt]); |
return s; |
} |
|
/*static */void print_option (int bb_no, int f_opt) |
{ |
char tmp1[10]; |
char tmp2[10]; |
sprintf (tmp2, "%s", gen_option (tmp1, bb_no, f_opt)); |
PRINTF ("%3s", tmp2); |
} |
|
static char *format_func_options (char *s, cuc_func *f) |
{ |
int b, first = 1; |
*s = '\0'; |
for (b = 0; b < f->num_bb; b++) |
if (f->bb[b].selected_tim >= 0) { |
char tmp[10]; |
sprintf (s, "%s%s%s", s, first ? "" : ",", gen_option (tmp, b, f->bb[b].selected_tim)); |
first = 0; |
} |
return s; |
} |
|
static void options_cmd (int func_no, cuc_func *f) |
{ |
int b, i; |
char tmp[30]; |
char *name = prof_func[func_no].name; |
PRINTF ("-----------------------------------------------------------------------------\n"); |
PRINTF ("|%-28s|pre/unrolled|shared| time | gates |old_time|\n", |
strstrip (tmp, name, 28)); |
PRINTF ("| BASE |%4i / %4i | %4i |%8i|%8.f|%8i|\n", 1, 1, 0, |
f->timings.new_time, f->timings.size, f->orig_time); |
for (b = 0; b < f->num_bb; b++) { |
/* Print out results */ |
for (i = 1; i < f->bb[b].ntim; i++) { /* First one is base option */ |
int time = f->bb[b].tim[i].new_time - f->timings.new_time; |
double size = f->bb[b].tim[i].size - f->timings.size; |
PRINTF ("| "); |
print_option (b, i); |
PRINTF (" |%4i / %4i | %4i |%+8i|%+8.f| |\n", |
f->bb[b].tim[i].preroll, f->bb[b].tim[i].unroll, f->bb[b].tim[i].nshared, |
time, size); |
} |
} |
} |
|
/* Generates a function, based on specified parameters */ |
cuc_func *generate_function (cuc_func *rf, char *name, char *cut_filename) |
{ |
int b; |
char tmp[256]; |
cuc_timings tt; |
cuc_func *f; |
assert (f = dup_func (rf)); |
|
if (cuc_debug >= 2) print_cuc_bb (f, "BEFORE_GENERATE"); |
log ("Generating function %s.\n", name); |
PRINTF ("Generating function %s.\n", name); |
|
format_func_options (tmp, rf); |
if (strlen (tmp)) PRINTF ("Applying options: %s\n", tmp); |
else PRINTF ("Using basic options.\n"); |
|
/* Generate function as specified by options */ |
for (b = 0; b < f->num_bb; b++) { |
cuc_timings *st; |
if (rf->bb[b].selected_tim < 0) continue; |
st = &rf->bb[b].tim[rf->bb[b].selected_tim]; |
sprintf (tmp, "%s.bin.bb", name); |
preunroll_bb (&tmp[0], f, &tt, b, st->preroll, st->unroll); |
if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_PREUNROLL"); |
} |
for (b = 0; b < f->num_bb; b++) { |
cuc_timings *st; |
if (rf->bb[b].selected_tim < 0) continue; |
st = &rf->bb[b].tim[rf->bb[b].selected_tim]; |
if (!st->nshared) continue; |
assert (0); |
//csm_gen (f, rf, st->nshared, st->shared); |
} |
add_latches (f); |
if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_LATCHES"); |
analyse_timings (f, &tt); |
|
sprintf (tmp, "%s%s", cut_filename, name); |
output_verilog (f, tmp, name); |
return f; |
} |
|
/* Calculates required time, based on selected options */ |
int calc_cycles (cuc_func *f) |
{ |
int b, ntime = f->timings.new_time; |
for (b = 0; b < f->num_bb; b++) |
if (f->bb[b].selected_tim >= 0) { |
assert (f->bb[b].selected_tim < f->bb[b].ntim); |
ntime += f->bb[b].tim[f->bb[b].selected_tim].new_time - f->timings.new_time; |
} |
return ntime; |
} |
|
/* Calculates required size, based on selected options */ |
double calc_size (cuc_func *f) |
{ |
int b; |
double size = f->timings.size; |
for (b = 0; b < f->num_bb; b++) |
if (f->bb[b].selected_tim >= 0) { |
assert (f->bb[b].selected_tim < f->bb[b].ntim); |
size += f->bb[b].tim[f->bb[b].selected_tim].size - f->timings.size; |
} |
return size; |
} |
|
/* Dumps specified function to file (hex) */ |
unsigned long extract_function (char *out_fn, unsigned long start_addr) |
{ |
FILE *fo; |
unsigned long a = start_addr; |
int x = 0; |
assert (fo = fopen (out_fn, "wt+")); |
|
do { |
unsigned long d = evalsim_mem32 (a); |
int index = insn_decode (d); |
assert (index >= 0); |
if (x) x++; |
if (strcmp (insn_name (index), "l.jr") == 0) x = 1; |
a += 4; |
fprintf (fo, "%08lx\n", d); |
} while (x < 2); |
|
fclose (fo); |
return a - 4; |
} |
|
static cuc_func *func[MAX_FUNCS]; |
static int func_v[MAX_FUNCS]; |
|
/* Detects function dependencies and removes */ |
static void set_func_deps () |
{ |
int f, b, i, j; |
restart: |
for (f = 0; f < prof_nfuncs - 1; f++) if (func[f]) { |
int fused[MAX_FUNCS] = {0}; |
int c; |
for (b = 0; b < func[f]->num_bb; b++) |
for (i = 0; i < func[f]->bb[b].ninsn; i++) { |
cuc_insn *ii = &func[f]->bb[b].insn[i]; |
if (ii->index == II_CALL) { |
assert (ii->opt[0] == OPT_CONST); |
for (j = 0; j < prof_nfuncs - 1; j++) |
if (func[j] && func[j]->start_addr == ii->op[0]) break; |
if (j >= prof_nfuncs - 1) { |
log ("%s is calling unknown function, address %08lx\n", |
prof_func[f].name, ii->op[0]); |
debug (1, "%s is calling unknown function, address %08lx\n", |
prof_func[f].name, ii->op[0]); |
free_func (func[f]); |
func[f] = NULL; |
goto restart; |
} else if (f == j) { |
log ("%s is recursive, ignoring\n", prof_func[f].name); |
debug (1, "%s is recursive, ignoring\n", prof_func[f].name); |
free_func (func[f]); |
func[f] = NULL; |
goto restart; |
} else fused[j]++; |
} |
} |
for (i = 0; i < MAX_FUNCS; i++) if (fused[i]) c++; |
if (func[f]->nfdeps) free (func[f]->fdeps); |
func[f]->nfdeps = c; |
func[f]->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * c); |
for (i = 0, j = 0; i < MAX_FUNCS; i++) |
if (fused[i]) func[f]->fdeps[j++] = func[i]; |
} |
|
/* Detect loops */ |
{ |
int change; |
for (f = 0; f < MAX_FUNCS; f++) if (func[f]) func[f]->tmp = 0; |
do { |
change = 0; |
for (f = 0; f < MAX_FUNCS; f++) if (func[f] && !func[f]->tmp) { |
int o = 1; |
for (i = 0; i < func[f]->nfdeps; i++) |
if (!func[f]->fdeps[i]->tmp) {o = 0; break;} |
if (o) { |
func[f]->tmp = 1; |
change = 1; |
} |
} |
} while (change); |
|
change = 0; |
for (f = 0; f < MAX_FUNCS; f++) if (func[f] && !func[f]->tmp) { |
free_func (func[f]); |
func[f] = NULL; |
change = 1; |
} |
if (change) goto restart; |
} |
} |
|
void main_cuc (char *filename) |
{ |
int i, j; |
char tmp1[256]; |
char filename_cut[256]; |
#if 0 /* Select prefix, based on binary program name */ |
for (i = 0; i < sizeof (filename_cut); i++) { |
if (isalpha(filename[i])) filename_cut[i] = filename[i]; |
else { |
filename_cut[i] = '\0'; |
break; |
} |
} |
#else |
strcpy (filename_cut, "cu"); |
#endif |
|
PRINTF ("Entering OpenRISC Custom Unit Compiler command prompt\n"); |
PRINTF ("Using profile file \"%s\" and memory profile file \"%s\".\n", config.sim.prof_fn, config.sim.mprof_fn); |
sprintf (tmp1, "%s.log", filename_cut); |
PRINTF ("Analyzing. (log file \"%s\").\n", tmp1); |
assert (flog = fopen (tmp1, "wt+")); |
|
/* Loads in the specified timings table */ |
PRINTF ("Using timings from \"%s\" at %s\n",config.cuc.timings_fn, |
generate_time_pretty (tmp1, config.sim.clkcycle_ps)); |
load_timing_table (config.cuc.timings_fn); |
runtime.cuc.cycle_duration = 1000. * config.sim.clkcycle_ps; |
PRINTF ("Multicycle logic %s, bursts %s, %s memory order.\n", |
config.cuc.no_multicycle ? "OFF" : "ON", config.cuc.enable_bursts ? "ON" : "OFF", |
config.cuc.memory_order == MO_NONE ? "no" : config.cuc.memory_order == MO_WEAK ? "weak" : |
config.cuc.memory_order == MO_STRONG ? "strong" : "exact"); |
|
prof_set (1, 0); |
assert (prof_acquire (config.sim.prof_fn) == 0); |
|
if (config.cuc.calling_convention) |
PRINTF ("Assuming OpenRISC standard calling convention.\n"); |
|
/* Try all functions except "total" */ |
for (i = 0; i < prof_nfuncs - 1; i++) { |
long orig_time; |
unsigned long start_addr, end_addr; |
orig_time = prof_func[i].cum_cycles; |
start_addr = prof_func[i].addr; |
|
/* Extract the function from the binary */ |
sprintf (tmp1, "%s.bin", prof_func[i].name); |
end_addr = extract_function (tmp1, start_addr); |
|
log ("Testing function %s (%08lx - %08lx)\n", prof_func[i].name, start_addr, |
end_addr); |
PRINTF ("Testing function %s (%08lx - %08lx)\n", prof_func[i].name, |
start_addr, end_addr); |
func[i] = analyse_function (prof_func[i].name, orig_time, start_addr, |
end_addr, config.cuc.memory_order, prof_func[i].calls); |
func_v[i] = 0; |
} |
set_func_deps (); |
|
while (1) { |
char *s; |
wait_command: |
PRINTF ("(cuc) "); |
fflush (stdout); |
wait_command_empty: |
s = fgets(tmp1, sizeof tmp1, stdin); |
usleep (100); |
if (!s) goto wait_command_empty; |
for (s = tmp1; *s != '\0' && *s != '\n' && *s != '\r'; s++); |
*s = '\0'; |
|
/* quit command */ |
if (strcmp (tmp1, "q") == 0 || strcmp (tmp1, "quit") == 0) { |
/* Delete temporary files */ |
for (i = 0; i < prof_nfuncs - 1; i++) { |
sprintf (tmp1, "%s.bin", prof_func[i].name); |
log ("Deleting temporary file %s %s\n", tmp1, remove (tmp1) ? "FAILED" : "OK"); |
sprintf (tmp1, "%s.bin.bb", prof_func[i].name); |
log ("Deleting temporary file %s %s\n", tmp1, remove (tmp1) ? "FAILED" : "OK"); |
} |
break; |
|
/* profile command */ |
} else if (strcmp (tmp1, "p") == 0 || strcmp (tmp1, "profile") == 0) { |
int ntime = 0; |
int size = 0; |
PRINTF ("-----------------------------------------------------------------------------\n"); |
PRINTF ("|function name |calls|avg cycles |old%%| max. f. | impr. f.| options |\n"); |
PRINTF ("|--------------------+-----+------------+----+----------|---------+---------|\n"); |
for (j = 0; j < prof_nfuncs; j++) { |
int bestcyc = 0, besti = 0; |
char tmp[100]; |
for (i = 0; i < prof_nfuncs; i++) |
if (prof_func[i].cum_cycles > bestcyc) { |
bestcyc = prof_func[i].cum_cycles; |
besti = i; |
} |
i = besti; |
PRINTF ("|%-20s|%5li|%12.1f|%3.0f%%| ", |
strstrip (tmp, prof_func[i].name, 20), prof_func[i].calls, |
((double)prof_func[i].cum_cycles / prof_func[i].calls), |
(100. * prof_func[i].cum_cycles / prof_cycles)); |
if (func[i]) { |
double f = 1.0; |
if (func_v[i]) { |
int nt = calc_cycles (func[i]); |
int s = calc_size (func[i]); |
f = 1. * func[i]->orig_time / nt; |
ntime += nt; |
size += s; |
} else ntime += prof_func[i].cum_cycles; |
PRINTF ("%8.1f |%8.1f | %-8s|\n", 1.f * prof_func[i].cum_cycles |
/ func[i]->timings.new_time, f, format_func_options (tmp, func[i])); |
} else { |
PRINTF (" N/A | N/A | N/A |\n"); |
ntime += prof_func[i].cum_cycles; |
} |
prof_func[i].cum_cycles = -prof_func[i].cum_cycles; |
} |
for (i = 0; i < prof_nfuncs; i++) |
prof_func[i].cum_cycles = -prof_func[i].cum_cycles; |
PRINTF ("-----------------------------------------------------------------------------\n"); |
PRINTF ("Total %i cycles (was %i), total added gates = %i. Speed factor %.1f\n", |
ntime, prof_cycles, size, 1. * prof_cycles / ntime); |
|
/* debug command */ |
} else if (strncmp (tmp1, "d", 1) == 0 || strncmp (tmp1, "debug", 5) == 0) { |
sscanf (tmp1, "%*s %i", &cuc_debug); |
if (cuc_debug < 0) cuc_debug = 0; |
if (cuc_debug > 9) cuc_debug = 9; |
|
/* generate command */ |
} else if (strcmp (tmp1, "g") == 0 || strcmp (tmp1, "generate") == 0) { |
/* check for function dependencies */ |
for (i = 0; i < prof_nfuncs; i++) |
if (func[i]) func[i]->tmp = func_v[i]; |
for (i = 0; i < prof_nfuncs; i++) if (func[i]) |
for (j = 0; j < func[i]->nfdeps; j++) |
if (!func[i]->fdeps[j] || !func[i]->fdeps[j]->tmp) { |
PRINTF ("Function %s must be selected for translation (required by %s)\n", |
prof_func[j].name, prof_func[i].name); |
goto wait_command; |
} |
for (i = 0; i < prof_nfuncs; i++) |
if (func[i] && func_v[i]) generate_function (func[i], prof_func[i].name, filename_cut); |
generate_main (prof_nfuncs, func, filename_cut); |
|
/* list command */ |
} else if (strcmp (tmp1, "l") == 0 || strcmp (tmp1, "list") == 0) { |
/* check for function dependencies */ |
for (i = 0; i < prof_nfuncs; i++) |
if (func_v[i]) { |
PRINTF ("%s\n", prof_func[j].name); |
} |
|
/* selectall command */ |
} else if (strcmp (tmp1, "sa") == 0 || strcmp (tmp1, "selectall") == 0) { |
int f; |
for (f = 0; f < prof_nfuncs; f++) if (func[f]) { |
func_v[f] = 1; |
PRINTF ("Function %s selected for translation.\n", prof_func[f].name); |
} |
|
/* select command */ |
} else if (strncmp (tmp1, "s", 1) == 0 || strncmp (tmp1, "select", 6) == 0) { |
char tmp[50], ch; |
int p, o, b, f; |
p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch); |
if (p < 1) PRINTF ("Invalid parameters.\n"); |
else { |
/* Check if we have valid option */ |
for (f = 0; f < prof_nfuncs; f++) |
if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break; |
if (f < prof_nfuncs) { |
if (p == 1) { |
if (func[f]) { |
func_v[f] = 1; |
PRINTF ("Function %s selected for translation.\n", prof_func[f].name); |
} else PRINTF ("Function %s not suitable for translation.\n", prof_func[f].name); |
} else { |
if (!func_v[f]) |
PRINTF ("Function %s not yet selected for translation.\n", prof_func[f].name); |
if (p < 3) goto invalid_option; |
for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++); |
if (!option_char[o]) goto invalid_option; |
if (b < 0 || b >= func[f]->num_bb) goto invalid_option; |
if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option; |
|
/* select an option */ |
func[f]->bb[b].selected_tim = o; |
if (func[f]->bb[b].tim[o].nshared) { |
PRINTF ("Option has shared instructions: "); |
print_shared (func[f], func[f]->bb[b].tim[o].shared, func[f]->bb[b].tim[o].nshared); |
PRINTF ("\n"); |
} |
goto wait_command; |
invalid_option: |
PRINTF ("Invalid option.\n"); |
} |
} else PRINTF ("Invalid function.\n"); |
} |
|
/* unselect command */ |
} else if (strncmp (tmp1, "u", 1) == 0 || strncmp (tmp1, "unselect", 8) == 0) { |
char tmp[50], ch; |
int p, o, b, f; |
p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch); |
if (p < 1) PRINTF ("Invalid parameters.\n"); |
else { |
/* Check if we have valid option */ |
for (f = 0; f < prof_nfuncs; f++) |
if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break; |
if (f < prof_nfuncs) { |
if (p == 1) { |
if (func[f]) { |
func_v[f] = 0; |
PRINTF ("Function %s unselected for translation.\n", prof_func[f].name); |
} else PRINTF ("Function %s not suitable for translation.\n", prof_func[f].name); |
} else { |
if (p < 3) goto invalid_option; |
for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++); |
if (!option_char[o]) goto invalid_option; |
if (b < 0 || b >= func[f]->num_bb) goto invalid_option; |
if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option; |
|
/* select an option */ |
func[f]->bb[b].selected_tim = -1; |
} |
} else PRINTF ("Invalid function.\n"); |
} |
|
/* options command */ |
} else if (strcmp (tmp1, "o") == 0 || strcmp (tmp1, "options") == 0) { |
int any = 0; |
PRINTF ("Available options:\n"); |
for (i = 0; i < prof_nfuncs; i++) |
if (func[i]) { |
options_cmd (i, func[i]); |
any = 1; |
} |
if (any) PRINTF ("-----------------------------------------------------------------------------\n"); |
else PRINTF ("Sorry. No available options.\n"); |
|
/* Ignore empty string */ |
} else if (strcmp (tmp1, "") == 0) { |
|
/* help command */ |
} else { |
if (strcmp (tmp1, "h") != 0 && strcmp (tmp1, "help") != 0) |
PRINTF ("Unknown command.\n"); |
PRINTF ("OpenRISC Custom Unit Compiler command prompt\n"); |
PRINTF ("Available commands:\n"); |
PRINTF (" h | help displays this help\n"); |
PRINTF (" q | quit returns to or1ksim prompt\n"); |
PRINTF (" p | profile displays function profiling\n"); |
PRINTF (" d | debug # sets debug level (0-9)\n"); |
PRINTF (" o | options displays available options\n"); |
PRINTF (" s | select func [option] selects an option/function\n"); |
PRINTF (" u | unselect func [option] unselects an option/function\n"); |
PRINTF (" g | generate generates verilog file\n"); |
PRINTF (" l | list displays selected functions\n"); |
} |
} |
|
/* Dispose memory */ |
for (i = 0; i < prof_nfuncs -1; i++) |
if (func[i]) free_func (func[i]); |
|
fclose (flog); |
} |
|
/*----------------------------------------------------[ CUC Configuration ]---*/ |
void cuc_calling_convention(union param_val val, void *dat) |
{ |
config.cuc.calling_convention = val.int_val; |
} |
|
void cuc_enable_bursts(union param_val val, void *dat) |
{ |
config.cuc.enable_bursts = val.int_val; |
} |
|
void cuc_no_multicycle(union param_val val, void *dat) |
{ |
config.cuc.no_multicycle = val.int_val; |
} |
|
void cuc_memory_order(union param_val val, void *dat) |
{ |
if (strcmp (val.str_val, "none") == 0) |
config.cuc.memory_order = MO_NONE; |
else if (strcmp (val.str_val, "weak") == 0) |
config.cuc.memory_order = MO_WEAK; |
else if (strcmp (val.str_val, "strong") == 0) |
config.cuc.memory_order = MO_STRONG; |
else if (strcmp (val.str_val, "exact") == 0) { |
config.cuc.memory_order = MO_EXACT; |
} else { |
char tmp[200]; |
sprintf (tmp, "invalid memory order '%s'.\n", val.str_val); |
CONFIG_ERROR(tmp); |
} |
} |
|
void cuc_timings_fn(union param_val val, void *dat) |
{ |
strcpy(config.cuc.timings_fn, val.str_val); |
} |
|
void reg_cuc_sec(void) |
{ |
struct config_section *sec = reg_config_sec("cuc", NULL, NULL); |
|
reg_config_param(sec, "calling_convention", paramt_int, cuc_calling_convention); |
reg_config_param(sec, "enable_bursts", paramt_int, cuc_enable_bursts); |
reg_config_param(sec, "no_multicycle", paramt_int, cuc_no_multicycle); |
reg_config_param(sec, "memory_order", paramt_word, cuc_memory_order); |
reg_config_param(sec, "timings_fn", paramt_str, cuc_timings_fn); |
} |
cuc.c
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: load.c
===================================================================
--- load.c (nonexistent)
+++ load.c (revision 1765)
@@ -0,0 +1,536 @@
+/* load.c -- OpenRISC Custom Unit Compiler, instruction loading and converting
+ * Copyright (C) 2002 Marko Mlinar, markom@opencores.org
+ *
+ * This file is part of OpenRISC 1000 Architectural Simulator.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include
+#include
+#include
+#include
+#include
+
+#include "config.h"
+
+#ifdef HAVE_INTTYPES_H
+#include
+#endif
+
+#include "port.h"
+#include "arch.h"
+#include "abstract.h"
+#include "sim-config.h"
+#include "cuc.h"
+#include "opcode/or32.h"
+#include "insn.h"
+
+static const cuc_conv conv[] = {
+{"l.add", II_ADD}, {"l.addi", II_ADD},
+{"l.movhi", II_OR},
+{"l.sub", II_SUB}, {"l.subi", II_SUB},
+{"l.and", II_AND}, {"l.andi", II_AND},
+{"l.xor", II_XOR}, {"l.xori", II_XOR},
+{"l.or", II_OR }, {"l.ori", II_OR},
+{"l.mul", II_MUL}, {"l.muli", II_MUL},
+
+{"l.sra", II_SRA}, {"l.srai", II_SRA},
+{"l.srl", II_SRL}, {"l.srli", II_SRL},
+{"l.sll", II_SLL}, {"l.slli", II_SLL},
+
+{"l.lbz",II_LB | II_MEM}, {"l.lbs", II_LB | II_MEM | II_SIGNED},
+{"l.lhz",II_LH | II_MEM}, {"l.lhs", II_LH | II_MEM | II_SIGNED},
+{"l.lwz",II_LW | II_MEM}, {"l.lws", II_LW | II_MEM | II_SIGNED},
+{"l.sb", II_SB | II_MEM}, {"l.sh", II_SH | II_MEM}, {"l.sw", II_SW | II_MEM},
+{"l.sfeq", II_SFEQ }, {"l.sfeqi", II_SFEQ},
+{"l.sfne", II_SFNE }, {"l.sfnei", II_SFNE},
+{"l.sflts", II_SFLT | II_SIGNED}, {"l.sfltis", II_SFLT | II_SIGNED},
+{"l.sfltu", II_SFLT}, {"l.sfltiu", II_SFLT},
+{"l.sfgts", II_SFGT | II_SIGNED}, {"l.sfgtis", II_SFGT | II_SIGNED},
+{"l.sfgtu", II_SFGT}, {"l.sfgtiu", II_SFGT},
+{"l.sfges", II_SFGE | II_SIGNED}, {"l.sfgeis", II_SFGE | II_SIGNED},
+{"l.sfgeu", II_SFGE}, {"l.sfgeiu", II_SFGE},
+{"l.sfles", II_SFLE | II_SIGNED}, {"l.sfleis", II_SFLE | II_SIGNED},
+{"l.sfleu", II_SFLE}, {"l.sfleiu", II_SFLE},
+{"l.j", II_BF },
+{"l.bf", II_BF },
+{"l.jal", II_CALL },
+{"l.nop", II_NOP }
+};
+
+/* Instructions from function */
+cuc_insn insn[MAX_INSNS];
+int num_insn;
+int reloc[MAX_INSNS];
+
+/* Prints out instructions */
+void print_cuc_insns (char *s, int verbose)
+{
+ PRINTF ("****************** %s ******************\n", s);
+ print_insns (0, insn, num_insn,verbose);
+ PRINTF ("\n\n");
+}
+
+void xchg_insn (int i, int j)
+{
+ cuc_insn t;
+ t = insn[i];
+ insn[i] = insn[j];
+ insn[j] = t;
+}
+
+/* Negates conditional instruction */
+void negate_conditional (cuc_insn *ii)
+{
+ assert (ii->type & IT_COND);
+
+ if (ii->index == II_SFEQ) change_insn_type (ii, II_SFNE);
+ else if (ii->index == II_SFNE) change_insn_type (ii, II_SFEQ);
+ else if (ii->index == II_SFLT) change_insn_type (ii, II_SFGE);
+ else if (ii->index == II_SFGT) change_insn_type (ii, II_SFLE);
+ else if (ii->index == II_SFLE) change_insn_type (ii, II_SFGT);
+ else if (ii->index == II_SFGE) change_insn_type (ii, II_SFLT);
+ else assert (0);
+}
+
+/* Remove delay slots */
+void remove_dslots ()
+{
+ int i;
+ int in_delay = 0;
+ for (i = 0; i < num_insn; i++) {
+ if (in_delay) insn[i].type |= IT_INDELAY;
+ in_delay = 0;
+ if (insn[i].type & IT_BRANCH) in_delay = 1;
+ if (insn[i].type & IT_INDELAY) {
+ cuc_insn *ii;
+ cuc_insn *bi;
+ assert (i >= 2);
+ ii = &insn[i - 2];
+ bi = &insn[i - 1];
+ /* delay slot should not be a branch target! */
+ assert ((insn[i].type & IT_BBSTART) == 0);
+ assert ((bi->type & IT_INDELAY) == 0);
+ insn[i].type &= ~IT_INDELAY; /* no more in delay slot */
+
+ /* Get the value we need before the actual jump */
+ if (bi->opt[1] & OPT_REGISTER && bi->op[1] >= 0) {
+ int r = bi->op[1];
+ assert (ii->index == II_NOP);
+ change_insn_type (ii, II_ADD);
+ ii->type = IT_COND;
+ ii->dep = NULL;
+ ii->op[0] = r; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = r; ii->opt[1] = OPT_REGISTER;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ ii->opt[3] = OPT_NONE;
+ bi->op[1] = i - 2; bi->opt[1] = OPT_REF;
+ }
+ xchg_insn (i, i - 1);
+ }
+ }
+ assert (in_delay == 0);
+}
+
+/* Convert local variables (uses stack frame -- r1) to internal values */
+void detect_locals ()
+{
+ int stack[CUC_MAX_STACK];
+ int i, can_remove_stack = 1;
+ int real_stack_size = 0;
+
+ for (i = 0; i < CUC_MAX_STACK; i++) stack[i] = -1;
+
+ for (i = 0; i < num_insn; i++) {
+ /* sw off (r1),rx */
+ if (insn[i].index == II_SW
+ && (insn[i].opt[0] & OPT_CONST)
+ && insn[i].op[1] == 1 && (insn[i].opt[1] & OPT_REGISTER)) {
+
+ if (insn[i].op[0] < CUC_MAX_STACK/* && insn[i].op[1] >= 4*/) { /* Convert to normal move */
+ stack[insn[i].op[0]] = i;
+ insn[i].type &= IT_INDELAY | IT_BBSTART;
+ change_insn_type (&insn[i], II_ADD);
+ insn[i].op[0] = -1; insn[i].opt[0] = OPT_REGISTER | OPT_DEST;
+ insn[i].op[1] = insn[i].op[2]; insn[i].opt[1] = insn[i].opt[2];
+ insn[i].op[2] = 0; insn[i].opt[2] = OPT_CONST;
+ } else can_remove_stack = 0;
+ /* lw rx,off (r1) */
+ } else if (insn[i].index == II_LW
+ && (insn[i].opt[1] & OPT_CONST)
+ && insn[i].op[2] == 1 && (insn[i].opt[2] & OPT_REGISTER)) {
+
+ if (insn[i].op[1] < CUC_MAX_STACK && stack[insn[i].op[1]] >= 0) { /* Convert to normal move */
+ insn[i].type &= IT_INDELAY | IT_BBSTART;
+ change_insn_type (&insn[i], II_ADD);
+ insn[i].op[1] = stack[insn[i].op[1]]; insn[i].opt[1] = OPT_REF;
+ insn[i].op[2] = 0; insn[i].opt[2] = OPT_CONST;
+ } else can_remove_stack = 0;
+ /* Check for defined stack size */
+ } else if (insn[i].index == II_ADD && !real_stack_size
+ && (insn[i].opt[0] & OPT_REGISTER) && insn[i].op[0] == 1
+ && (insn[i].opt[1] & OPT_REGISTER) && insn[i].op[1] == 1
+ && (insn[i].opt[2] & OPT_CONST)) {
+ real_stack_size = -insn[i].op[2];
+ }
+ }
+ //assert (can_remove_stack); /* TODO */
+}
+
+/* Disassemble one instruction from insn index and generate parameters */
+const char *build_insn (unsigned long data, cuc_insn *insn)
+{
+ const char *name;
+ char *s;
+ extern char *disassembled;
+ int index = insn_decode (data);
+ struct or32_opcode const *opcode;
+ int i, argc = 0;
+
+ insn->insn = data;
+ insn->index = -1;
+ insn->type = 0;
+ name = insn_name (index);
+ insn->index = index;
+ disassemble_index (data, index);
+ strcpy (insn->disasm, disassembled);
+ insn->dep = NULL;
+ for (i = 0; i < MAX_OPERANDS; i++) insn->opt[i] = OPT_NONE;
+
+ if (index < 0) {
+ fprintf (stderr, "Invalid opcode 0x%08lx!\n", data);
+ exit (1);
+ }
+ opcode = &or32_opcodes[index];
+
+ for (s = opcode->args; *s != '\0'; ++s) {
+ switch (*s) {
+ case '\0': return name;
+ case 'r':
+ insn->opt[argc] = OPT_REGISTER | (argc ? 0 : OPT_DEST);
+ insn->op[argc++] = or32_extract(*++s, opcode->encoding, data);
+ break;
+
+ default:
+ if (strchr (opcode->encoding, *s)) {
+ unsigned long imm = or32_extract (*s, opcode->encoding, data);
+ imm = extend_imm(imm, *s);
+ insn->opt[argc] = OPT_CONST;
+ insn->op[argc++] = imm;
+ }
+ }
+ }
+ return name;
+}
+
+/* inserts nop before branch */
+void expand_branch ()
+{
+ int i, j, num_bra = 0, d;
+ for (i = 0; i < num_insn; i++) if (insn[i].type & IT_BRANCH) num_bra++;
+
+ d = num_insn + 2 * num_bra;
+ assert (d < MAX_INSNS);
+
+ /* Add nop before branch */
+ for (i = num_insn - 1; i >= 0; i--) if (insn[i].type & IT_BRANCH) {
+ insn[--d] = insn[i]; // for delay slot (later)
+ if (insn[d].opt[1] & OPT_REGISTER) {
+ assert (insn[d].op[1] == FLAG_REG);
+ insn[d].op[1] = i; insn[d].opt[1] = OPT_REF;
+ }
+ insn[--d] = insn[i]; // for branch
+ change_insn_type (&insn[d], II_NOP);
+ insn[--d] = insn[i]; // save flag & negation of conditional, if required
+ change_insn_type (&insn[d], II_CMOV);
+ insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST;
+ insn[d].op[1] = insn[d].type & IT_FLAG1 ? 0 : 1; insn[d].opt[1] = OPT_CONST;
+ insn[d].op[2] = insn[d].type & IT_FLAG1 ? 1 : 0; insn[d].opt[2] = OPT_CONST;
+ insn[d].op[3] = FLAG_REG; insn[d].opt[3] = OPT_REGISTER;
+ insn[d].type = IT_COND;
+ if (insn[d].type)
+ reloc[i] = d;
+ } else {
+ insn[--d] = insn[i];
+ reloc[i] = d;
+ }
+ num_insn += 2 * num_bra;
+ for (i = 0; i < num_insn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP)
+ insn[i].op[j] = reloc[insn[i].op[j]];
+}
+
+/* expands immediate memory instructions to two */
+void expand_memory ()
+{
+ int i, j, num_mem = 0, d;
+ for (i = 0; i < num_insn; i++) if (insn[i].type & IT_MEMORY) num_mem++;
+
+ d = num_insn + num_mem;
+ assert (d < MAX_INSNS);
+
+ /* Split memory commands */
+ for (i = num_insn - 1; i >= 0; i--) if (insn[i].type & IT_MEMORY) {
+ insn[--d] = insn[i];
+ insn[--d] = insn[i];
+ reloc[i] = d;
+ switch (insn[d].index) {
+ case II_SW:
+ case II_SH:
+ case II_SB:
+ insn[d + 1].op[1] = d; insn[d + 1].opt[1] = OPT_REF; /* sw rx,(t($-1)) */
+ insn[d + 1].op[0] = insn[i].op[2]; insn[d + 1].opt[0] = insn[d + 1].opt[2];
+ insn[d + 1].opt[2] = OPT_NONE;
+ insn[d + 1].type &= ~IT_BBSTART;
+ insn[d].op[2] = insn[d].op[0]; insn[d].opt[2] = insn[d].opt[0];
+ insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; /* add rd, ra, rb */
+ insn[d].opt[3] = OPT_NONE;
+ insn[d].type &= IT_INDELAY | IT_BBSTART;
+ insn[d].type |= IT_MEMADD;
+ change_insn_type (&insn[d], II_ADD);
+ break;
+ case II_LW:
+ case II_LH:
+ case II_LB:
+ insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; /* add rd, ra, rb */
+ insn[d].type &= IT_INDELAY | IT_BBSTART;
+ insn[d].type |= IT_MEMADD;
+ change_insn_type (&insn[d], II_ADD);
+ insn[d + 1].op[1] = d; insn[d + 1].opt[1] = OPT_REF; /* lw (t($-1)),rx */
+ insn[d + 1].opt[2] = OPT_NONE;
+ insn[d + 1].opt[3] = OPT_NONE;
+ insn[d + 1].type &= ~IT_BBSTART;
+ break;
+ default: fprintf (stderr, "%4i, %4i: %s\n", i, d, cuc_insn_name (&insn[d]));
+ assert (0);
+ }
+ } else {
+ insn[--d] = insn[i];
+ reloc[i] = d;
+ }
+ num_insn += num_mem;
+ for (i = 0; i < num_insn; i++) if (!(insn[i].type & IT_MEMORY))
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP)
+ insn[i].op[j] = reloc[insn[i].op[j]];
+}
+
+/* expands signed comparisons to three instructions */
+void expand_signed ()
+{
+ int i, j, num_sig = 0, d;
+ for (i = 0; i < num_insn; i++)
+ if (insn[i].type & IT_SIGNED && !(insn[i].type & IT_MEMORY)) num_sig++;
+
+ d = num_insn + num_sig * 2;
+ assert (d < MAX_INSNS);
+
+ /* Split signed instructions */
+ for (i = num_insn - 1; i >= 0; i--)
+ /* We will expand signed memory later */
+ if (insn[i].type & IT_SIGNED && !(insn[i].type & IT_MEMORY)) {
+ insn[--d] = insn[i];
+ insn[d].op[1] = d - 2; insn[d].opt[1] = OPT_REF;
+ insn[d].op[2] = d - 1; insn[d].opt[2] = OPT_REF;
+
+ insn[--d] = insn[i];
+ change_insn_type (&insn[d], II_ADD);
+ insn[d].type = 0;
+ insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST;
+ insn[d].op[1] = insn[d].op[2]; insn[d].opt[1] = insn[d].opt[2];
+ insn[d].op[2] = 0x80000000; insn[d].opt[2] = OPT_CONST;
+ insn[d].opt[3] = OPT_NONE;
+
+ insn[--d] = insn[i];
+ change_insn_type (&insn[d], II_ADD);
+ insn[d].type = 0;
+ insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST;
+ insn[d].op[1] = insn[d].op[1]; insn[d].opt[1] = insn[d].opt[1];
+ insn[d].op[2] = 0x80000000; insn[d].opt[2] = OPT_CONST;
+ insn[d].opt[3] = OPT_NONE;
+
+ reloc[i] = d;
+ } else {
+ insn[--d] = insn[i];
+ reloc[i] = d;
+ }
+ num_insn += num_sig * 2;
+ for (i = 0; i < num_insn; i++) if (insn[i].type & IT_MEMORY || !(insn[i].type & IT_SIGNED)) {
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP)
+ insn[i].op[j] = reloc[insn[i].op[j]];
+ } else insn[i].type &= ~IT_SIGNED;
+}
+
+/* expands calls to 7 instructions */
+void expand_calls ()
+{
+ int i, j, num_call = 0, d;
+ for (i = 0; i < num_insn; i++)
+ if (insn[i].index == II_CALL) num_call++;
+
+ d = num_insn + num_call * 6; /* 6 parameters */
+ assert (d < MAX_INSNS);
+
+ /* Split call instructions */
+ for (i = num_insn - 1; i >= 0; i--)
+ /* We will expand signed memory later */
+ if (insn[i].index == II_CALL) {
+ insn[--d] = insn[i];
+ insn[d].op[0] = insn[d].op[1]; insn[d].opt[0] = OPT_CONST;
+ insn[d].opt[1] = OPT_NONE;
+ insn[d].type |= IT_VOLATILE;
+
+ for (j = 0; j < 6; j++) {
+ insn[--d] = insn[i];
+ change_insn_type (&insn[d], II_ADD);
+ insn[d].type = IT_VOLATILE;
+ insn[d].op[0] = 3 + j; insn[d].opt[0] = OPT_REGISTER | OPT_DEST;
+ insn[d].op[1] = 3 + j; insn[d].opt[1] = OPT_REGISTER;
+ insn[d].op[2] = 0x80000000; insn[d].opt[2] = OPT_CONST;
+ insn[d].opt[3] = OPT_NONE;
+ }
+
+ reloc[i] = d;
+ } else {
+ insn[--d] = insn[i];
+ reloc[i] = d;
+ }
+ num_insn += num_call * 6;
+ for (i = 0; i < num_insn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP)
+ insn[i].op[j] = reloc[insn[i].op[j]];
+}
+
+/* Loads function from file into global array insn.
+ Function returns nonzero if function cannot be converted. */
+int cuc_load (char *in_fn)
+{
+ int i, j;
+ FILE *fi;
+ int func_return = 0;
+ num_insn = 0;
+
+ log ("Loading filename %s\n", in_fn);
+ if ((fi = fopen (in_fn, "rt")) == NULL) {
+ fprintf (stderr, "Cannot open '%s'\n", in_fn);
+ exit (1);
+ }
+ /* Read in the function and decode the instructions */
+ for (i = 0;; i++) {
+ unsigned long data;
+ const char *name;
+
+ if (fscanf (fi, "%08lx\n", &data) != 1) break;
+
+ /* build params */
+ name = build_insn (data, &insn[i]);
+ if (func_return) func_return++;
+ //PRINTF ("%s\n", name);
+
+ if (or32_opcodes[insn[i].index].flags & OR32_IF_DELAY) {
+ int f;
+ if (strcmp (name, "l.bnf") == 0) f = 1;
+ else if (strcmp (name, "l.bf") == 0) f = 0;
+ else if (strcmp (name, "l.j") == 0) {
+ f = -1;
+ } else if (strcmp (name, "l.jr") == 0 && func_return == 0) {
+ func_return = 1;
+ change_insn_type (&insn[i], II_NOP);
+ continue;
+ } else {
+ cucdebug (1, "Instruction #%i: \"%s\" not supported.\n", i, name);
+ log ("Instruction #%i: \"%s\" not supported.\n", i, name);
+ return 1;
+ }
+ if (f < 0) { /* l.j */
+ /* repair params */
+ change_insn_type (&insn[i], II_BF);
+ insn[i].op[0] = i + insn[i].op[0]; insn[i].opt[0] = OPT_JUMP;
+ insn[i].op[1] = 1; insn[i].opt[1] = OPT_CONST;
+ insn[i].type |= IT_BRANCH | IT_VOLATILE;
+ } else {
+ change_insn_type (&insn[i], II_BF);
+ insn[i].op[0] = i + insn[i].op[0]; insn[i].opt[0] = OPT_JUMP;
+ insn[i].op[1] = FLAG_REG; insn[i].opt[1] = OPT_REGISTER;
+ insn[i].type |= IT_BRANCH | IT_VOLATILE;
+ if (f) insn[i].type |= IT_FLAG1;
+ }
+ } else {
+ insn[i].index = -1;
+ for (j = 0; j < sizeof (conv) / sizeof (cuc_conv); j++)
+ if (strcmp (conv[j].from, name) == 0) {
+ if (conv[j].to & II_SIGNED) insn[i].type |= IT_SIGNED;
+ if (conv[j].to & II_MEM) insn[i].type |= IT_MEMORY | IT_VOLATILE;
+ change_insn_type (&insn[i], conv[j].to & II_MASK);
+ break;
+ }
+ if (strcmp (name, "l.movhi") == 0) {
+ insn[i].op[1] <<= 16;
+ insn[i].op[2] = 0;
+ insn[i].opt[2] = OPT_CONST;
+ }
+ if (insn[i].index == II_SFEQ || insn[i].index == II_SFNE
+ || insn[i].index == II_SFLE || insn[i].index == II_SFGT
+ || insn[i].index == II_SFGE || insn[i].index == II_SFLT) {
+ /* repair params */
+ insn[i].op[2] = insn[i].op[1]; insn[i].opt[2] = insn[i].opt[1] & ~OPT_DEST;
+ insn[i].op[1] = insn[i].op[0]; insn[i].opt[1] = insn[i].opt[0] & ~OPT_DEST;
+ insn[i].op[0] = FLAG_REG; insn[i].opt[0] = OPT_DEST | OPT_REGISTER;
+ insn[i].opt[3] = OPT_NONE;
+ insn[i].type |= IT_COND;
+ }
+ if (insn[i].index < 0 || insn[i].index == II_NOP && insn[i].op[0] != 0) {
+ cucdebug (1, "Instruction #%i: \"%s\" not supported (2).\n", i, name);
+ log ("Instruction #%i: \"%s\" not supported (2).\n", i, name);
+ return 1;
+ }
+ }
+ }
+ num_insn = i;
+ fclose (fi);
+ if (func_return != 2) {
+ cucdebug (1, "Unsupported function structure.\n");
+ log ("Unsupported function structure.\n");
+ return 1;
+ }
+
+ log ("Number of instructions loaded = %i\n", num_insn);
+ if (cuc_debug >= 3) print_cuc_insns ("INITIAL", 1);
+
+ log ("Converting.\n");
+ expand_branch ();
+ if (cuc_debug >= 6) print_cuc_insns ("AFTER_EXP_BRANCH", 0);
+
+ remove_dslots ();
+ if (cuc_debug >= 6) print_cuc_insns ("NO_DELAY_SLOTS", 0);
+
+ if (config.cuc.calling_convention) {
+ detect_locals ();
+ if (cuc_debug >= 7) print_cuc_insns ("AFTER_LOCALS", 0);
+ }
+ expand_memory ();
+ if (cuc_debug >= 3) print_cuc_insns ("AFTER_EXP_MEM", 0);
+
+ expand_signed ();
+ if (cuc_debug >= 3) print_cuc_insns ("AFTER_EXP_SIG", 0);
+
+ expand_calls ();
+ if (cuc_debug >= 3) print_cuc_insns ("AFTER_EXP_CALLS", 0);
+
+ return 0;
+}
load.c
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: bb.c
===================================================================
--- bb.c (nonexistent)
+++ bb.c (revision 1765)
@@ -0,0 +1,1508 @@
+/* bb.c -- OpenRISC Custom Unit Compiler, Basic Block handling
+ * Copyright (C) 2002 Marko Mlinar, markom@opencores.org
+ *
+ * This file is part of OpenRISC 1000 Architectural Simulator.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include
+#include
+#include
+#include
+#include
+
+#include "config.h"
+
+#ifdef HAVE_INTTYPES_H
+#include
+#endif
+
+#include "port.h"
+#include "arch.h"
+#include "sim-config.h"
+#include "abstract.h"
+#include "cuc.h"
+#include "insn.h"
+#include "support/profile.h"
+
+/* prints out bb string */
+void print_bb_num (int num)
+{
+ if (num < 0) PRINTF ("*");
+ else if (num == BBID_END) PRINTF ("END");
+ else if (num == BBID_START) PRINTF ("START");
+ else PRINTF ("%2x", num);
+}
+
+/* Print out basic blocks */
+void print_cuc_bb (cuc_func *f, char *s)
+{
+ int i;
+ PRINTF ("------- %s -------\n", s);
+ for (i = 0; i < f->num_bb; i++) {
+ if (f->bb[i].insn) PRINTF ("\n---- BB%-2x * %x ---- ", i, f->bb[i].cnt);
+ else PRINTF ("BB%-2x: %4x-%-4x", i, f->bb[i].first, f->bb[i].last);
+ PRINTF (" type %02lx tmp %i ", f->bb[i].type, f->bb[i].tmp);
+ PRINTF ("next "); print_bb_num (f->bb[i].next[0]);
+ PRINTF (" "); print_bb_num (f->bb[i].next[1]);
+ PRINTF (" prev "); print_bb_num (f->bb[i].prev[0]);
+ PRINTF (" "); print_bb_num (f->bb[i].prev[1]);
+ PRINTF ("\n");
+
+ if (f->bb[i].insn) print_insns (i, f->bb[i].insn, f->bb[i].ninsn, 0);
+ }
+ if (f->nmsched) {
+ PRINTF ("\nmsched: ");
+ for (i = 0; i < f->nmsched; i++)
+ PRINTF ("%x ", f->msched[i]);
+ PRINTF ("\n\n\n");
+ } else PRINTF ("\n");
+ fflush (stdout);
+}
+
+/* Copies src basic block into destination */
+cuc_bb *cpy_bb (cuc_bb *dest, cuc_bb *src)
+{
+ int i, j;
+ dep_list *d;
+ assert (dest != src);
+ *dest = *src;
+ assert (dest->insn = malloc (sizeof (cuc_insn) * src->ninsn));
+ for (i = 0; i < src->ninsn; i++) {
+ d = src->insn[i].dep;
+ dest->insn[i] = src->insn[i];
+ dest->insn[i].dep = NULL;
+ while (d) {
+ add_dep (&dest->insn[i].dep, d->ref);
+ d = d->next;
+ }
+ }
+
+ d = src->mdep;
+ dest->mdep = NULL;
+ while (d) {
+ add_dep (&dest->mdep, d->ref);
+ d = d->next;
+ }
+ if (src->ntim) {
+ assert (dest->tim = malloc (sizeof (cuc_timings) * src->ntim));
+ for (i = 0; i < src->ntim; i++) {
+ dest->tim[i] = src->tim[i];
+ if (src->tim[i].nshared) {
+ assert (dest->tim[i].shared = malloc (sizeof (int) * src->tim[i].nshared));
+ for (j = 0; j < src->tim[i].nshared; j++)
+ dest->tim[i].shared[j] = src->tim[i].shared[j];
+ }
+ }
+ }
+}
+
+/* Duplicates function */
+cuc_func *dup_func (cuc_func *f)
+{
+ cuc_func *n = (cuc_func *) malloc (sizeof (cuc_func));
+ int b, i;
+ for (b = 0; b < f->num_bb; b++) cpy_bb (&n->bb[b], &f->bb[b]);
+ n->num_bb = f->num_bb;
+ assert (n->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb));
+ for (b = 0; b < f->num_init_bb; b++) n->init_bb_reloc[b] = f->init_bb_reloc[b];
+ n->num_init_bb = f->num_init_bb;
+ for (i = 0; i < MAX_REGS; i++) {
+ n->saved_regs[i] = f->saved_regs[i];
+ n->lur[i] = f->lur[i];
+ n->used_regs[i] = f->used_regs[i];
+ }
+ n->start_addr = f->start_addr;
+ n->end_addr = f->end_addr;
+ n->orig_time = f->orig_time;
+ n->nmsched = f->nmsched;
+ n->num_runs = f->num_runs;
+ for (i = 0; i < f->nmsched; i++) {
+ n->msched[i] = f->msched[i];
+ n->mtype[i] = f->mtype[i];
+ }
+ n->nfdeps = f->nfdeps;
+ if (f->nfdeps) {
+ f->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * f->nfdeps);
+ for (i = 0; i < f->nfdeps; i++) n->fdeps[i] = f->fdeps[i];
+ }
+ return n;
+}
+
+/* Releases memory allocated by function */
+void free_func (cuc_func *f)
+{
+ int b, i;
+ for (b = 0; b < f->num_bb; b++) {
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ dispose_list (&f->bb[b].insn[i].dep);
+ if (f->bb[b].insn) free (f->bb[b].insn);
+ for (i = 0; i < f->bb[b].ntim; i++)
+ if (f->bb[b].tim[i].nshared && f->bb[b].tim[i].shared)
+ free (f->bb[b].tim[i].shared);
+ if (f->bb[b].tim && f->bb[b].ntim) free (f->bb[b].tim);
+ }
+ free (f);
+}
+
+/* Recalculates last_used_reg */
+void recalc_last_used_reg (cuc_func *f, int b)
+{
+ int i;
+ cuc_bb *bb = &f->bb[b];
+
+ /* rebuild last used reg array */
+ if (bb->insn[0].index == II_LRBB) bb->last_used_reg[LRBB_REG] = 0;
+ else bb->last_used_reg[LRBB_REG] = -1;
+
+ for (i = 1; i < MAX_REGS - 1; i++) bb->last_used_reg[i] = -1;
+
+ /* Create references */
+ for (i = 0; i < bb->ninsn; i++) {
+ int k;
+ /* Now check for destination operand(s) */
+ for (k = 0; k < MAX_OPERANDS; k++) if (bb->insn[i].opt[k] & OPT_DEST)
+ if ((bb->insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER
+ && (int)bb->insn[i].op[k] >= 0) {
+ bb->last_used_reg[bb->insn[i].op[k]] = REF (b, i);
+ }
+ }
+}
+
+/* Set the BB limits */
+void detect_bb (cuc_func *f)
+{
+ int i, j, end_bb = 0, eb = 0;
+
+ /* Mark block starts/ends */
+ for (i = 0; i < num_insn; i++) {
+ if (end_bb) insn[i].type |= IT_BBSTART;
+ end_bb = 0;
+ if (insn[i].type & IT_BRANCH) {
+ int jt = insn[i].op[0];
+ insn[i].type |= IT_BBEND;
+ end_bb = 1;
+ if (jt < 0 || jt >= num_insn) {
+ fprintf (stderr, "Instruction #%i:Jump out of function '%s'.\n", i, insn[i].disasm);
+ exit (1);
+ }
+ if (jt > 0) insn[jt - 1].type |= IT_BBEND;
+ insn[jt].type |= IT_BBSTART;
+ }
+ }
+
+ /* Initialize bb array */
+ insn[0].type |= IT_BBSTART;
+ insn[num_insn - 1].type |= IT_BBEND;
+ f->num_bb = 0;
+ for (i = 0; i < num_insn; i++) {
+ if (insn[i].type & IT_BBSTART) {
+ f->bb[f->num_bb].first = i;
+ f->bb[f->num_bb].cnt = 0;
+ }
+ /* Determine repetitions of a loop */
+ if (insn[i].type & IT_BBEND) {
+ f->bb[f->num_bb].type = 0;
+ f->bb[f->num_bb].last = i;
+ f->bb[f->num_bb].next[0] = f->bb[f->num_bb].next[1] = -1;
+ f->bb[f->num_bb].tmp = 0;
+ f->bb[f->num_bb].ntim = 0;
+ f->num_bb++;
+ assert (f->num_bb < MAX_BB);
+ }
+ }
+ if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INIT");
+
+ /* Build forward connections between BBs */
+ for (i = 0; i < f->num_bb; i++)
+ if (insn[f->bb[i].last].type & IT_BRANCH) {
+ int j;
+ assert (insn[f->bb[i].last].index == II_BF);
+ /* Find block this instruction jumps to */
+ for (j = 0; j < f->num_bb; j++)
+ if (f->bb[j].first == insn[f->bb[i].last].op[0]) break;
+ assert (j < f->num_bb);
+
+ /* Convert the jump address to BB link */
+ insn[f->bb[i].last].op[0] = j; insn[f->bb[i].last].opt[0] = OPT_BB;
+
+ /* Make a link */
+ f->bb[i].next[0] = j;
+ if (++f->bb[j].tmp > 2) eb++;
+ f->bb[i].next[1] = i + 1;
+ if (++f->bb[i + 1].tmp > 2) eb++;
+ } else if (f->bb[i].last == num_insn - 1) { /* Last instruction doesn't have to do anything */
+ } else {
+ f->bb[i].next[0] = i + 1;
+ if (++f->bb[i + 1].tmp > 2) eb++;
+ }
+
+ if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_NEXT");
+
+ /* Build backward connections, but first insert artificial blocks
+ * to handle more than 2 connections */
+ cucdebug (6, "artificial %i %i\n", f->num_bb, eb);
+ end_bb = f->num_bb + eb;
+ for (i = f->num_bb - 1; i >= 0; i--) {
+ j = f->bb[i].tmp;
+ if (f->bb[i].tmp > 2) f->bb[i].tmp = -f->bb[i].tmp;
+ f->bb[--end_bb] = f->bb[i];
+ reloc[i] = end_bb;
+ while (j-- > 2) {
+ f->bb[--end_bb].first = f->bb[i].first;
+ f->bb[end_bb].last = -1;
+ f->bb[end_bb].next[0] = -1;
+ f->bb[end_bb].next[1] = -1;
+ f->bb[end_bb].tmp = 0;
+ f->bb[end_bb].cnt = f->bb[i].cnt;
+ f->bb[end_bb].ntim = 0;
+ }
+ }
+ f->num_bb += eb;
+
+ /* relocate jump instructions */
+ for (i = 0; i < num_insn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (insn[i].opt[j] & OPT_BB)
+ insn[i].op[j] = reloc[insn[i].op[j]];
+ if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT-reloc");
+ for (i = 0; i < f->num_bb; i++) {
+ if (f->bb[i].next[0] >= 0) {
+ int t = reloc[f->bb[i].next[0]];
+ if (f->bb[t].tmp < 0) {
+ f->bb[t].tmp = -f->bb[t].tmp;
+ t -= f->bb[t].tmp - 2;
+ } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2;
+ f->bb[i].next[0] = t;
+ }
+ if (f->bb[i].next[1] >= 0) {
+ int t = reloc[f->bb[i].next[1]];
+ if (f->bb[t].tmp < 0) {
+ f->bb[t].tmp = -f->bb[t].tmp;
+ t -= f->bb[t].tmp - 2;
+ } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2;
+ f->bb[i].next[1] = t;
+ }
+ /* artificial blocks do not have relocations, hardcode them */
+ if (f->bb[i].last < 0) f->bb[i].next[0] = i + 1;
+ }
+ if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT");
+
+ /* Uncoditional branched do not continue to next block */
+ for (i = 0; i < f->num_bb; i++) {
+ cuc_insn *ii;
+ if (f->bb[i].last < 0) continue;
+ ii = &insn[f->bb[i].last];
+ /* Unconditional branch? */
+ if (ii->type & IT_BRANCH && ii->opt[1] & OPT_CONST) {
+ change_insn_type (ii, II_NOP);
+#if 0
+ if (f->bb[i].next[1] == i + 1) f->bb[i].next[0] = f->bb[i].next[1];
+#endif
+ f->bb[i].next[1] = -1;
+ }
+ }
+ if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_UNCOND_JUMP");
+
+ /* Add backward connections */
+ for (i = 0; i < f->num_bb; i++)
+ f->bb[i].prev[0] = f->bb[i].prev[1] = -1;
+
+ for (i = 0; i < f->num_bb; i++) {
+ if (f->bb[i].next[0] >= 0) {
+ int t = f->bb[i].next[0];
+ if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i;
+ else {
+ assert (f->bb[t].prev[1] < 0);
+ f->bb[t].prev[1] = i;
+ }
+ }
+ if (f->bb[i].next[1] >= 0) {
+ int t = f->bb[i].next[1];
+ if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i;
+ else {
+ assert (f->bb[t].prev[1] < 0);
+ f->bb[t].prev[1] = i;
+ }
+ }
+ }
+ /* Add START marker */
+ assert (f->bb[0].prev[0] < 0);
+ f->bb[0].prev[0] = BBID_START;
+
+ /* Add END marker */
+ assert (f->bb[f->num_bb - 1].next[0] < 0);
+ assert (f->bb[f->num_bb - 1].next[1] < 0);
+ f->bb[f->num_bb - 1].next[0] = BBID_END;
+ if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_PREV");
+}
+
+/* We do a quick check if there are some anomalies with references */
+void cuc_check (cuc_func *f)
+{
+ int i, j, k;
+ cucdebug (1, "cuc_check\n");
+ for (i = 0; i < f->num_bb; i++) {
+ if (!f->bb[i].insn && f->bb[i].ninsn) goto err;
+ for (j = 0; j < f->bb[i].ninsn; j++) {
+ cuc_insn *ii = &f->bb[i].insn[j];
+ if ((ii->index == II_CMOV || ii->index == II_ADD) && ii->type & IT_COND && ii->opt[0] & OPT_DEST) {
+ k = 0;
+ assert (ii->opt[k] & OPT_REGISTER);
+ if ((signed)ii->op[k] >= 0 && ii->op[k] != FLAG_REG && ii->op[k] != LRBB_REG) {
+ cucdebug (1, "Invalid dest conditional type opt%x op%lx\n", ii->opt[0], ii->op[0]);
+ goto err;
+ }
+ }
+ for (k = 0; k < MAX_OPERANDS; k++) {
+ if (ii->opt[k] & OPT_REF) {
+ int t = ii->op[k];
+ if (REF_BB(t) >= f->num_bb || REF_I (t) >= f->bb[REF_BB(t)].ninsn
+ || (ii->index == II_CMOV || ii->index == II_ADD) && (
+ (f->INSN(t).type & IT_COND) != (ii->type & IT_COND) && k < 3
+ || !(f->INSN(t).type & IT_COND) && k == 3)) {
+ cucdebug (1, "Conditional misused\n");
+ goto err;
+ }
+ }
+ if (k && ii->opt[k] & OPT_DEST) {
+ cucdebug (1, "Destination only allowed for op0!\n");
+ goto err;
+ }
+ }
+ }
+ }
+ return;
+err:
+ cucdebug (1, "Anomaly detected at [%x_%x].%i\n", i, j, k);
+ print_cuc_bb (f, "ANOMALY");
+ cucdebug (1, "Anomaly detected at [%x_%x].%i\n", i, j, k);
+ exit (1);
+}
+
+/* Build basic blocks */
+void build_bb (cuc_func *f)
+{
+ int i, j, k;
+ for (i = 0; i < f->num_bb; i++) {
+ if (f->bb[i].last < 0) f->bb[i].ninsn = MAX_REGS - 1;
+ else f->bb[i].ninsn = f->bb[i].last - f->bb[i].first + 1 + MAX_REGS - 1;
+ assert (f->bb[i].ninsn >= MAX_REGS - 1);
+ f->bb[i].insn = (cuc_insn *) malloc (sizeof (cuc_insn) * f->bb[i].ninsn);
+ assert (f->bb[i].insn);
+ f->bb[i].nmemory = 0;
+ f->bb[i].unrolled = 1;
+
+ /* Save space for conditional moves, exclude r0, place lrbb instead */
+ change_insn_type (&f->bb[i].insn[0], II_LRBB);
+ strcpy (f->bb[i].insn[0].disasm, "lrbb");
+ f->bb[i].insn[0].type = IT_UNUSED | IT_COND;
+ f->bb[i].insn[0].dep = NULL;
+ f->bb[i].insn[0].op[0] = LRBB_REG; f->bb[i].insn[0].opt[0] = OPT_REGISTER | OPT_DEST;
+ f->bb[i].insn[0].opt[1] = OPT_LRBB;
+ f->bb[i].insn[0].opt[2] = f->bb[i].insn[0].opt[3] = OPT_NONE;
+ for (j = 1; j < MAX_REGS - 1; j++) {
+ change_insn_type (&f->bb[i].insn[j], II_CMOV);
+ strcpy (f->bb[i].insn[j].disasm, "cmov");
+ f->bb[i].insn[j].type = j == FLAG_REG || j == LRBB_REG ? IT_COND : 0;
+ f->bb[i].insn[j].dep = NULL;
+ f->bb[i].insn[j].opt[0] = f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_REGISTER;
+ f->bb[i].insn[j].opt[0] |= OPT_DEST;
+ f->bb[i].insn[j].op[0] = f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = j;
+ f->bb[i].insn[j].op[3] = LRBB_REG; f->bb[i].insn[j].opt[3] = OPT_REGISTER;
+ }
+
+ /* Relocate instructions */
+ for (j = MAX_REGS - 1; j < f->bb[i].ninsn; j++) {
+ f->bb[i].insn[j] = insn[f->bb[i].first + j - (MAX_REGS - 1)];
+ for (k = 0; k < MAX_OPERANDS; k++)
+ if (f->bb[i].insn[j].opt[k] & OPT_REF) {
+ int b1;
+ for (b1 = 0; b1 < i; b1++)
+ if (f->bb[b1].first <= (signed) f->bb[i].insn[j].op[k]
+ && (signed)f->bb[i].insn[j].op[k] <= f->bb[b1].last) break;
+ assert (b1 < f->num_bb);
+ f->bb[i].insn[j].op[k] = REF (b1, f->bb[i].insn[j].op[k] - f->bb[b1].first + MAX_REGS - 1);
+ }
+ if (f->bb[i].insn[j].type & IT_MEMORY) f->bb[i].nmemory++;
+ }
+ }
+ cuc_check (f);
+}
+
+/* Does simplification on blocks A, B, C:
+ A->B->C, A->C to just A->B->C */
+static void simplify_bb (cuc_func *f, int pred, int s1, int s2, int neg)
+{
+ cuc_insn *last;
+ int i;
+ if (cuc_debug >= 3) print_cuc_bb (f, "BEFORE_SIMPLIFY");
+ cucdebug (3, "simplify %x->%x->%x (%i)\n", pred, s1, s2, neg);
+ assert (s2 != pred); /* Shouldn't occur => stupid */
+ f->bb[pred].next[1] = -1;
+ f->bb[pred].next[0] = s1;
+
+ if (f->bb[s2].prev[0] == pred) {
+ f->bb[s2].prev[0] = f->bb[s2].prev[1];
+ f->bb[s2].prev[1] = -1;
+ } else if (f->bb[s2].prev[1] == pred) {
+ f->bb[s2].prev[1] = -1;
+ } else assert (0);
+
+ last = &f->bb[pred].insn[f->bb[pred].ninsn - 1];
+ assert (last->type & IT_BRANCH);
+ for (i = 0; i < f->bb[s2].ninsn; i++) {
+ cuc_insn *ii= &f->bb[s2].insn[i];
+ if (ii->index == II_LRBB) {
+ change_insn_type (ii, II_CMOV);
+ ii->type = IT_COND;
+ ii->op[1] = neg ? 0 : 1; ii->opt[1] = OPT_CONST;
+ ii->op[2] = neg ? 1 : 0; ii->opt[2] = OPT_CONST;
+ ii->op[3] = last->op[1]; ii->opt[3] = last->opt[1];
+ }
+ }
+ change_insn_type (last, II_NOP);
+ if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_SIMPLIFY");
+}
+
+/* type == 0; keep predecessor condition
+ * type == 1; keep successor condition
+ * type == 2; join loop unrolled blocks */
+static void join_bb (cuc_func *f, int pred, int succ, int type)
+{
+ int i, j, k, n1, n2, ninsn, add_cond = 0;
+ unsigned long cond_op, cond_opt;
+ cuc_insn *insn;
+
+ if (cuc_debug) cuc_check (f);
+ cucdebug (3, "%x <= %x+%x (%i)\n", pred, pred, succ, type);
+ cucdebug (3, "%x %x\n", f->bb[pred].ninsn, f->bb[succ].ninsn);
+ if (cuc_debug >= 3) fflush (stdout);
+
+ n1 = f->bb[pred].ninsn;
+ n2 = f->bb[succ].ninsn;
+ if (n1 <= 0
+ || !(f->bb[pred].insn[n1 - 1].type & IT_BRANCH)) type = 1;
+ if (type == 0 && f->bb[succ].prev[0] == f->bb[succ].next[0]) add_cond = 1;
+ if (type == 2) add_cond = 1;
+
+ //assert (f->bb[pred].next[0] == f->bb[succ].next[0] || type != 2); /* not supported */
+
+ ninsn = n1 + n2 + (type == 1 ? 0 : 1) + (add_cond ? MAX_REGS : 0);
+
+ insn = (cuc_insn *) malloc (ninsn * sizeof (cuc_insn));
+ for (i = 0; i < n1; i++) insn[i] = f->bb[pred].insn[i];
+ /* when type == 0, we move the last (jump) instruction to the end */
+ if (type == 0 || type == 2) {
+ /* Move first branch instruction to the end */
+ assert (insn[n1 - 1].type & IT_BRANCH);
+ insn[ninsn - 1] = insn[n1 - 1];
+ cond_op = insn[n1 - 1].op[1];
+ cond_opt = insn[n1 - 1].opt[1];
+
+ /* Remove old branch */
+ change_insn_type (&insn[n1 - 1], II_NOP);
+ }
+ /* Copy second block */
+ for (i = 0; i < n2; i++) insn[i + n1] = f->bb[succ].insn[i];
+
+ /* and when type == 2, we may need to add sfor instruction, to quit when either is true */
+ if (type == 2) {
+ /* Move second branch instruction to the end */
+ if (insn[n1 + n2 - 1].type & IT_BRANCH) {
+ insn[ninsn - 1] = insn[n1 + n2 - 1];
+
+ /* Use conditional from cmov FLAG_REG, c_p, c_s, c_p */
+ insn[ninsn - 1].op[1] = REF (pred, n1 + n2 + FLAG_REG); insn[ninsn - 1].opt[1] = OPT_REF;
+
+ /* Remove old one */
+ change_insn_type (&insn[n1 + n2 - 1], II_NOP);
+ } else change_insn_type (&insn[ninsn - 1], II_NOP); /* do not use branch slot */
+ }
+
+#if 1
+ /* LRBB at start of succ BB is not valid anymore */
+ if (n1 > 0 && insn[n1].index == II_LRBB) {
+ if (type == 1) {
+ /* We have two possibilities, how this could have happened:
+ 1. we just moved second predecessor of succ to pred,
+ pred now having two predecessors => everything is ok
+ 2. we just moved second predecessor of succ to pred,
+ now, having just one predecessor => LRBB is not needed anymore */
+ if (f->bb[pred].prev[1] < 0) { /* handle second option */
+ change_insn_type (&insn[n1], II_ADD);
+ insn[n1].op[1] = 1; insn[n1].opt[1] = OPT_CONST;
+ insn[n1].op[2] = 0; insn[n1].opt[2] = OPT_CONST;
+ insn[n1].opt[3] = OPT_NONE;
+ }
+ } else {
+ assert (0); /* not tested yet */
+ change_insn_type (&insn[n1], II_NOP);
+ for (i = n1; i < ninsn; i++)
+ if (insn[i].index == II_CMOV && insn[i].op[3] == REF (pred, n1)) {
+ assert (insn[i].opt[3] == OPT_REF);
+ insn[i].op[3] = cond_op;
+ insn[i].opt[3] = cond_opt;
+ if (f->bb[pred].next[0] != succ) {
+ unsigned long t; /* negate conditional -- exchange */
+ assert (f->bb[pred].next[1] == succ);
+ t = insn[i].op[1];
+ insn[i].op[1] = insn[i].op[2];
+ insn[i].op[2] = t;
+ t = insn[i].opt[1];
+ insn[i].opt[1] = insn[i].opt[2];
+ insn[i].opt[2] = t;
+ }
+ }
+ }
+ }
+#endif
+
+ for (i = 0; i < ninsn; i++) reloc[i] = -1;
+
+ /* Add conditional instructions if required */
+ if (add_cond) {
+ recalc_last_used_reg (f, pred);
+ recalc_last_used_reg (f, succ);
+
+ /* r0 -- add nop for it */
+ change_insn_type (&insn[n1 + n2], II_NOP);
+ for (i = 1; i < MAX_REGS; i++) {
+ cuc_insn *ii = &insn[n1 + n2 + i];
+ int a = f->bb[pred].last_used_reg[i];
+ int b = f->bb[succ].last_used_reg[i];
+
+ /* We have deleted first branch instruction, now we must setup FLAG_REG,
+ to point to conditional */
+ if (i == FLAG_REG) {
+ change_insn_type (ii, II_CMOV);
+ ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
+ ii->dep = NULL;
+ ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = cond_op; ii->opt[1] = cond_opt;
+ if (b >= 0) {
+ ii->op[2] = b; ii->opt[2] = OPT_REF;
+ } else {
+ ii->op[2] = cond_op; ii->opt[2] = cond_opt;
+ }
+ ii->op[3] = cond_op; ii->opt[3] = cond_opt;
+ reloc[REF_I(a)] = REF (pred, n1 + n2 + i);
+ } else if (b < 0) change_insn_type (ii, II_NOP);
+ else if (a < 0) {
+ change_insn_type (ii, II_ADD);
+ ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
+ ii->dep = NULL;
+ ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = b; ii->opt[1] = OPT_REF;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ ii->opt[3] = OPT_NONE;
+ } else if (b >= 0) {
+ change_insn_type (ii, II_CMOV);
+ ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
+ ii->dep = NULL;
+ ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = a; ii->opt[1] = OPT_REF;
+ ii->op[2] = b; ii->opt[2] = OPT_REF;
+ ii->op[3] = cond_op; ii->opt[3] = cond_opt;
+ reloc[REF_I(a)] = REF (pred, n1 + n2 + i);
+ }
+ sprintf (ii->disasm, "cmov (join BB)");
+ }
+ }
+
+ if (cuc_debug) cuc_check (f);
+ i = 0;
+ switch (type) {
+ case 0:
+ assert (f->bb[pred].next[0] >= 0);
+ if (f->bb[pred].next[0] == succ) f->bb[pred].next[0] = f->bb[succ].next[0];
+ if (f->bb[pred].next[1] == succ) f->bb[pred].next[1] = f->bb[succ].next[0];
+ break;
+ case 1:
+ assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END);
+ f->bb[pred].next[0] = f->bb[succ].next[0];
+ f->bb[pred].next[1] = f->bb[succ].next[1];
+ break;
+ case 2:
+ assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END);
+ f->bb[pred].next[0] = f->bb[succ].next[0];
+ f->bb[pred].next[1] = f->bb[succ].next[1];
+ break;
+ }
+ if (f->bb[pred].next[0] < 0) f->bb[pred].next[0] = f->bb[pred].next[1];
+ if (f->bb[pred].next[0] == f->bb[pred].next[1]) f->bb[pred].next[1] = -1;
+
+ if (type == 0) assert (f->bb[succ].next[1] < 0);
+
+ /* We just did something stupid -- we joined two predecessors into one;
+ succ may need the information from which block we came. We will repair
+ this by converting LRBB to CMOV */
+ for (j = 0; j < 2; j++) {
+ int nb = f->bb[pred].next[j];
+ int t;
+
+ /* check just valid connections */
+ if (nb < 0 || nb == BBID_END) continue;
+
+ /* check type */
+ if (f->bb[nb].prev[0] == pred && f->bb[nb].prev[1] == succ) t = 1;
+ else if (f->bb[nb].prev[1] == pred && f->bb[nb].prev[0] == succ) t = 0;
+ else continue;
+
+ /* check all LRBB instructions. */
+ for (i = 0; i < f->bb[nb].ninsn; i++)
+ if (f->bb[nb].insn[i].index == II_LRBB) {
+ cuc_insn *lrbb =&f->bb[nb].insn[i];
+ change_insn_type (lrbb, II_CMOV);
+ lrbb->op[1] = t; lrbb->opt[1] = OPT_CONST;
+ lrbb->op[2] = 1 - t; lrbb->opt[2] = OPT_CONST;
+ lrbb->op[3] = cond_op; lrbb->opt[3] = cond_opt;
+ lrbb->type |= IT_COND;
+ }
+ }
+
+ f->bb[succ].type = BB_DEAD;
+ //PRINTF (" %x %x %x %x %x\n", f->bb[pred].next[0], f->bb[pred].next[1], f->bb[succ].next[0], f->bb[succ].next[1], insn[ninsn - 1].type);
+ /* remove branch instruction, if there is only one successor */
+ if (f->bb[pred].next[1] < 0 && ninsn > 0 && insn[ninsn - 1].type & IT_BRANCH) {
+ assert (f->bb[pred].next[0] != pred); /* end BB, loop should not be possible */
+ change_insn_type (&insn[ninsn - 1], II_NOP);
+ }
+
+ /* Set max count */
+ if (f->bb[pred].cnt < f->bb[succ].cnt) f->bb[pred].cnt = f->bb[succ].cnt;
+ f->bb[pred].ninsn = ninsn;
+ f->bb[succ].ninsn = 0;
+ free (f->bb[pred].insn); f->bb[pred].insn = NULL;
+ free (f->bb[succ].insn); f->bb[succ].insn = NULL;
+ f->bb[pred].insn = insn;
+ for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
+ if (f->bb[i].prev[0] == succ) f->bb[i].prev[0] = pred;
+ if (f->bb[i].prev[1] == succ) f->bb[i].prev[1] = pred;
+ if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1;
+ for (j = 0; j < f->bb[i].ninsn; j++)
+ for (k = 0; k < MAX_OPERANDS; k++)
+ if (f->bb[i].insn[j].opt[k] & OPT_REF) {
+ /* Check if we are referencing successor BB -> relocate to second part of
+ the new block */
+ if (REF_BB (f->bb[i].insn[j].op[k]) == succ) {
+ int t = f->bb[i].insn[j].op[k];
+ int ndest = REF (pred, REF_I (t) + n1);
+ //PRINTF ("%x: %x %x\n", REF(i, j), t, ndest);
+
+ /* We've found a reference to succ. block, being removed, relocate */
+ f->bb[i].insn[j].op[k] = ndest;
+ } else if (REF_BB(f->bb[i].insn[j].op[k]) == pred) {
+ if (i != pred && reloc[REF_I(f->bb[i].insn[j].op[k])] >= 0) {
+ f->bb[i].insn[j].op[k] = reloc[REF_I(f->bb[i].insn[j].op[k])];
+ }
+ }
+ }
+ }
+
+ if (cuc_debug) cuc_check (f);
+ if (cuc_debug >= 3) print_cuc_bb (f, "join");
+}
+
+/* Optimize basic blocks */
+int optimize_bb (cuc_func *f)
+{
+ int modified = 0;
+ int i, j;
+remove_lrbb:
+ /* we can remove lrbb instructions from blocks with just one predecessor */
+ for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
+ if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* exactly one predecessor */
+ for (j = 0; j < f->bb[i].ninsn; j++)
+ if (f->bb[i].insn[j].index == II_LRBB) {
+ cuc_insn *t;
+ cucdebug (4, "-lrbb %x.%x\n", i, j);
+
+ /* Change to add LRBB, 0, 0 */
+ change_insn_type (&f->bb[i].insn[j], II_ADD);
+ f->bb[i].insn[j].type &= ~IT_VOLATILE;
+ f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_CONST;
+ f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = 0; /* always use left block */
+ f->bb[i].insn[j].opt[3] = OPT_NONE;
+ modified = 1;
+ if (f->bb[i].prev[0] != BBID_START && f->bb[f->bb[i].prev[0]].ninsn > 0) {
+ t = &f->bb[f->bb[i].prev[0]].insn[f->bb[f->bb[i].prev[0]].ninsn - 1];
+
+ /* If the predecessor still has a conditional jump instruction, we must be careful.
+ If next[0] == next[1] join them. Now we will link lrbb and correct the situation */
+ if (t->type & IT_BRANCH) { /* We must set a reference to branch result */
+ f->bb[i].insn[j].opt[1] = t->opt[1];
+ f->bb[i].insn[j].op[1] = t->op[1];
+ /* sometimes branch is not needed anymore */
+ if (f->bb[f->bb[i].prev[0]].next[1] < 0) change_insn_type (t, II_NOP);
+ }
+ }
+ }
+ }
+ }
+
+ /* Ordering of joining types is cruical -- we should concat all directly connected BBs
+ together first, so when we do a type != 1 joining, we can remove LRBB, directly by
+ looking at number of its predeccessors */
+
+ /* Type 1 joining
+ 1. link between pred & succ
+ 2. no other pred's successors
+ 3. no other succ's predecessors, except if pred has max one */
+ for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
+ int p = f->bb[i].prev[0];
+ if (p < 0 || p == BBID_START) continue;
+ /* one successor and max sum of 3 predecessors */
+ if (f->bb[p].next[0] >= 0 && f->bb[p].next[1] < 0
+ && (f->bb[p].prev[1] < 0 || f->bb[i].prev[1] < 0)) {
+ /* First we will move all predecessors from succ to pred, and then we will do
+ real type 1 joining */
+ if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START) {
+ int p1 = f->bb[i].prev[1];
+ /* joining is surely not worth another extra memory access */
+ if (f->bb[p].nmemory) continue;
+ if (f->bb[p].prev[0] >= 0) {
+ assert (f->bb[p].prev[1] < 0);
+ f->bb[p].prev[1] = p1;
+ } else f->bb[p].prev[0] = p1;
+ if (f->bb[p1].next[0] == i) f->bb[p1].next[0] = p;
+ else if (f->bb[p1].next[1] == i) f->bb[p1].next[1] = p;
+ else assert (0);
+ f->bb[i].prev[1] = -1;
+ }
+ assert (p >= 0 && f->bb[i].prev[1] < 0); /* one predecessor */
+ join_bb (f, p, i, 1);
+ modified = 1;
+ goto remove_lrbb;
+ }
+ }
+
+ /* Type 0 joining
+ 1. link between pred & succ
+ 2. no memory accesses in succ
+ 3. optional pred's second successors
+ 4. max. one succ's successors */
+ for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
+ if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START
+ && f->bb[i].prev[1] < 0 /* one predecessor */
+ && f->bb[i].next[1] < 0 /* max. one successor */
+ && f->bb[i].nmemory == 0) { /* and no memory acceses */
+ join_bb (f, f->bb[i].prev[0], i, 0);
+ modified = 1;
+ goto remove_lrbb;
+ }
+
+ /* Type 2 joining
+ 1. link between pred & succ
+ 2. succ has exactly one predeccessor
+ 3. pred & succ share common successor
+ 4. optional succ's second successor */
+ for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
+ if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* one predecessor */
+ int p = f->bb[i].prev[0];
+ if (p == BBID_START) continue;
+#if 0 /* not yet supported */
+ if (f->bb[p].next[0] == i
+ && (f->bb[i].next[1] == f->bb[p].next[1]
+ || f->bb[i].next[1] == f->bb[p].next[0])) {
+ join_bb (f, p, i, 2);
+ goto remove_lrbb;
+ }
+#endif
+ if (f->bb[p].next[1] == i
+ && (f->bb[p].next[0] == f->bb[i].next[1]
+ || f->bb[p].next[0] == f->bb[i].next[0])) {
+ join_bb (f, p, i, 2);
+ modified = 1;
+ goto remove_lrbb;
+ }
+ }
+
+ /* BB simplify:
+ 1. a block has exactly 2 successors A and B
+ 2. A has exactly one successor -- B
+ 3. A has no memory accesses
+ to:
+ flow always goes though A, LRBB is replaced by current block conditional
+ */
+ for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
+ if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END
+ && f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END) {
+ int a = f->bb[i].next[0];
+ int b = f->bb[i].next[1];
+ int neg = 0;
+ /* Exchange? */
+ if (f->bb[b].next[0] == a && f->bb[b].next[1] < 0) {
+ int t = a;
+ a = b;
+ b = t;
+ neg = 1;
+ }
+ /* Do the simplification if possible */
+ if (f->bb[a].next[0] == b && f->bb[a].next[1] < 0
+ && f->bb[a].nmemory == 0) {
+ simplify_bb (f, i, a, b, neg);
+ modified = 1;
+ goto remove_lrbb;
+ }
+ }
+
+ return modified;
+}
+
+/* Removes BBs marked as dead */
+int remove_dead_bb (cuc_func *f)
+{
+ int i, j, k, d = 0;
+
+ for (i = 0; i < f->num_bb; i++) if (f->bb[i].type & BB_DEAD) {
+ if (f->bb[i].insn) free (f->bb[i].insn);
+ f->bb[i].insn = NULL;
+ reloc[i] = -1;
+ } else {
+ reloc[i] = d;
+ f->bb[d++] = f->bb[i];
+ }
+ if (f->num_bb == d) return 0;
+ f->num_bb = d;
+
+ /* relocate initial blocks */
+ for (i = 0; i < f->num_init_bb; i++)
+ f->init_bb_reloc[i] = reloc[f->init_bb_reloc[i]];
+
+ /* repair references */
+ for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
+ cucdebug (5, "%x %x %x %x %x\n", i, f->bb[i].prev[0], f->bb[i].prev[1], f->bb[i].next[0], f->bb[i].next[1]);
+ fflush (stdout);
+ if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START)
+ assert ((f->bb[i].prev[0] = reloc[f->bb[i].prev[0]]) >= 0);
+ if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START)
+ assert ((f->bb[i].prev[1] = reloc[f->bb[i].prev[1]]) >= 0);
+ if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END)
+ assert ((f->bb[i].next[0] = reloc[f->bb[i].next[0]]) >= 0);
+ if (f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END)
+ assert ((f->bb[i].next[1] = reloc[f->bb[i].next[1]]) >= 0);
+ if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1;
+ if (f->bb[i].next[0] == f->bb[i].next[1]) f->bb[i].next[1] = -1;
+
+ for (j = 0; j < f->bb[i].ninsn; j++)
+ for (k = 0; k < MAX_OPERANDS; k++)
+ if ((f->bb[i].insn[j].opt[k] & OPT_BB) &&
+ ((signed)f->bb[i].insn[j].op[k] >= 0)) {
+ if (f->bb[i].insn[j].op[k] != BBID_END)
+ assert ((f->bb[i].insn[j].op[k] = reloc[f->bb[i].insn[j].op[k]]) >= 0);
+ } else if (f->bb[i].insn[j].opt[k] & OPT_REF) {
+ int t = f->bb[i].insn[j].op[k];
+ assert (reloc[REF_BB(t)] >= 0);
+ f->bb[i].insn[j].op[k] = REF (reloc[REF_BB(t)], REF_I (t));
+ }
+ }
+ return 1;
+}
+
+/* Recursive calculation of dependencies */
+static int reg_dep_rec (cuc_func *f, int cur)
+{
+ int i, j;
+ cuc_insn *insn = f->bb[cur].insn;
+
+ //PRINTF ("\n %i", cur);
+ /* Spread only, do not loop */
+ if (f->bb[cur].tmp) return;
+ f->bb[cur].tmp = 1;
+ //PRINTF ("! ");
+
+ for (i = 0; i < f->bb[cur].ninsn; i++) {
+ /* Check for destination operand(s) */
+ for (j = 0; j < MAX_OPERANDS; j++) if (insn[i].opt[j] & OPT_DEST)
+ if ((insn[i].opt[j] & ~OPT_DEST) == OPT_REGISTER && (signed)insn[i].op[j] >= 0) {
+ //PRINTF ("%i:%i,%x ", insn[i].op[j], i, REF (cur, i));
+ assert (insn[i].op[j] > 0 && insn[i].op[j] < MAX_REGS); /* r0 should never be dest */
+ f->bb[cur].last_used_reg[insn[i].op[j]] = REF (cur, i);
+ }
+ }
+
+ if (f->bb[cur].next[0] >= 0 && f->bb[cur].next[0] != BBID_END)
+ reg_dep_rec (f, f->bb[cur].next[0]);
+ if (f->bb[cur].next[1] >= 0 && f->bb[cur].next[1] != BBID_END)
+ reg_dep_rec (f, f->bb[cur].next[1]);
+}
+
+/* Detect register dependencies */
+void reg_dep (cuc_func *f)
+{
+ int i, b, c;
+
+ /* Set dead blocks */
+ for (b = 0; b < f->num_bb; b++) {
+ f->bb[b].tmp = 0;
+ for (i = 0; i < MAX_REGS; i++) f->bb[b].last_used_reg[i] = -1;
+ }
+
+ /* Start with first block and set dependecies of all reachable blocks */
+ /* At the same time set last_used_regs */
+ reg_dep_rec (f, 0);
+
+ for (i = 0; i < f->num_bb; i++)
+ if (f->bb[i].tmp) f->bb[i].tmp = 0;
+ else f->bb[i].type |= BB_DEAD;
+
+ /* Detect loops; mark BBs where loops must be broken */
+ for (c = 0; c < f->num_bb; c++) {
+ int min = 3, minb;
+
+ /* search though all non-visited for minimum number of unvisited predecessors */
+ for (b = 0; b < f->num_bb; b++) if (!f->bb[b].tmp) {
+ int tmp = 0;
+ if (f->bb[b].prev[0] >= 0 && f->bb[b].prev[0] != BBID_START
+ && !f->bb[f->bb[b].prev[0]].tmp) tmp++;
+ if (f->bb[b].prev[1] >= 0 && f->bb[b].prev[1] != BBID_START
+ && !f->bb[f->bb[b].prev[1]].tmp) tmp++;
+ if (tmp < min) {
+ minb = b;
+ min = tmp;
+ if (tmp == 0) break; /* We already have the best one */
+ }
+ }
+ b = minb;
+ f->bb[b].tmp = 1; /* Mark visited */
+ cucdebug (3, "minb %i min %i\n", minb, min);
+ if (min) { /* We just broke the loop */
+ f->bb[b].type |= BB_INLOOP;
+ }
+ }
+
+ /* Set real predecessors in cmov instructions to previous blocks */
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 1; i < MAX_REGS - 1; i++) {
+ int pa, pb;
+ assert (f->bb[b].insn[i].index == II_CMOV);
+ assert (f->bb[b].insn[i].opt[0] == OPT_REGISTER | OPT_DEST);
+ assert (f->bb[b].insn[i].op[0] == i);
+ if (f->bb[b].prev[0] < 0 || f->bb[b].prev[0] == BBID_START) pa = -1;
+ else pa = f->bb[f->bb[b].prev[0]].last_used_reg[i];
+ if (f->bb[b].prev[1] < 0 || f->bb[b].prev[1] == BBID_START) pb = -1;
+ else pb = f->bb[f->bb[b].prev[1]].last_used_reg[i];
+
+ /* We do some very simple optimizations right away to make things more readable */
+ if (pa < 0 && pb < 0) {
+ /* Was not used at all */
+ change_insn_type (&f->bb[b].insn[i], II_ADD);
+ f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
+ f->bb[b].insn[i].opt[3] = OPT_NONE;
+ } else if (pa < 0) {
+ change_insn_type (&f->bb[b].insn[i], II_ADD);
+ assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST));
+ f->bb[b].insn[i].op[1] = pb; f->bb[b].insn[i].opt[1] = OPT_REF;
+ f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
+ f->bb[b].insn[i].opt[3] = OPT_NONE;
+ } else if (pb < 0) {
+ change_insn_type (&f->bb[b].insn[i], II_ADD);
+ assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST));
+ f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF;
+ f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
+ f->bb[b].insn[i].opt[3] = OPT_NONE;
+ } else {
+ int t = REF (b, 0); /* lrbb should be first instruction */
+ assert (f->INSN(t).index == II_LRBB);
+
+ f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF;
+ assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST));
+
+ f->bb[b].insn[i].op[2] = pb; f->bb[b].insn[i].opt[2] = OPT_REF;
+ assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST));
+
+ /* Update op[3] -- flag register */
+ assert (f->bb[b].insn[i].opt[3] == OPT_REGISTER);
+ assert (f->bb[b].insn[i].op[3] == LRBB_REG);
+ assert (t >= 0);
+ f->bb[b].insn[i].opt[3] = OPT_REF; /* Convert already used regs to references */
+ f->bb[b].insn[i].op[3] = t;
+ assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST));
+ }
+ }
+
+ /* assign register references */
+ for (b = 0; b < f->num_bb; b++) {
+ /* rebuild last used reg array */
+ f->bb[b].last_used_reg[0] = -1;
+ if (f->bb[b].insn[0].index == II_LRBB) f->bb[b].last_used_reg[LRBB_REG] = 0;
+ else f->bb[b].last_used_reg[LRBB_REG] = -1;
+
+ for (i = 1; i < MAX_REGS - 1; i++)
+ f->bb[b].last_used_reg[i] = -1;
+
+ /* Create references */
+ for (i = 0; i < f->bb[b].ninsn; i++) {
+ int k;
+ /* Check for source operands first */
+ for (k = 0; k < MAX_OPERANDS; k++) {
+ if (!(f->bb[b].insn[i].opt[k] & OPT_DEST)) {
+ if (f->bb[b].insn[i].opt[k] & OPT_REGISTER) {
+ int t = f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]];
+
+ if (f->bb[b].insn[i].op[k] == 0) { /* Convert r0 to const0 */
+ f->bb[b].insn[i].opt[k] = OPT_CONST;
+ f->bb[b].insn[i].op[k] = 0;
+ } else if (t >= 0) {
+ f->bb[b].insn[i].opt[k] = OPT_REF; /* Convert already used regs to references */
+ f->bb[b].insn[i].op[k] = t;
+ assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST));
+ //f->INSN(t).op[0] = -1;
+ }
+ } else if (f->bb[b].insn[i].opt[k] & OPT_REF) {
+ //f->INSN(f->bb[b].insn[i].op[k]).op[0] = -1; /* Mark referenced */
+ f->INSN(f->bb[b].insn[i].op[k]).type &= ~IT_UNUSED;
+ }
+ }
+ }
+
+ /* Now check for destination operand(s) */
+ for (k = 0; k < MAX_OPERANDS; k++) if (f->bb[b].insn[i].opt[k] & OPT_DEST)
+ if ((f->bb[b].insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER
+ && (int)f->bb[b].insn[i].op[k] >= 0) {
+ assert (f->bb[b].insn[i].op[k] != 0); /* r0 should never be dest */
+ f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]] = REF (b, i);
+ }
+ }
+ }
+
+ /* Remove all unused lrbb */
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (f->bb[b].insn[i].type & IT_UNUSED) change_insn_type (&f->bb[b].insn[i], II_NOP);
+
+ /* SSAs with final register value are marked as outputs */
+ assert (f->bb[f->num_bb - 1].next[0] == BBID_END);
+ for (i = 0; i < MAX_REGS; i++) if (!caller_saved[i]) {
+ int t = f->bb[f->num_bb - 1].last_used_reg[i];
+ /* Mark them volatile, so optimizer does not remove them */
+ if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT;
+ }
+ {
+ int t = f->bb[f->num_bb - 1].last_used_reg[i];
+ /* Mark them volatile, so optimizer does not remove them */
+ if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT;
+ }
+}
+
+/* split the BB, based on the group numbers in .tmp */
+void expand_bb (cuc_func *f, int b)
+{
+ int n = f->num_bb;
+ int mg = 0;
+ int b1, i, j;
+
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp;
+
+ /* Create copies */
+ for (b1 = 1; b1 <= mg; b1++) {
+ assert (f->num_bb < MAX_BB);
+ cpy_bb (&f->bb[f->num_bb], &f->bb[b]);
+ f->num_bb++;
+ }
+
+ /* Relocate */
+ for (b1 = 0; b1 < f->num_bb; b1++)
+ for (i = 0; i < f->bb[b1].ninsn; i++) {
+ dep_list *d = f->bb[b1].insn[i].dep;
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (f->bb[b1].insn[i].opt[j] & OPT_REF) {
+ int t = f->bb[b1].insn[i].op[j];
+ if (REF_BB(t) == b && f->INSN(t).tmp != 0)
+ f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t));
+ }
+ while (d) {
+ if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0)
+ d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref));
+ d = d->next;
+ }
+ }
+
+ /* Delete unused instructions */
+ for (j = 0; j <= mg; j++) {
+ if (j == 0) b1 = b;
+ else b1 = n + j - 1;
+ for (i = 0; i < f->bb[b1].ninsn; i++) {
+ if (f->bb[b1].insn[i].tmp != j)
+ change_insn_type (&f->bb[b1].insn[i], II_NOP);
+ f->bb[b1].insn[i].tmp = 0;
+ }
+ if (j < mg) {
+ f->bb[b1].next[0] = n + j;
+ f->bb[b1].next[1] = -1;
+ f->bb[n + j].prev[0] = b1;
+ f->bb[n + j].prev[1] = -1;
+ } else {
+ i = f->bb[b1].next[0];
+ f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1;
+ f->bb[n + j].prev[1] = -1;
+ if (i >= 0 && i != BBID_END) {
+ if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
+ if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
+ }
+ i = f->bb[b1].next[1];
+ if (i >= 0 && i != BBID_END) {
+ if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
+ if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
+ }
+ }
+ }
+}
+
+/* Scans sequence of BBs and set bb[].cnt */
+void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename)
+{
+ FILE *fi, *fo;
+ struct mprofentry_struct *buf;
+ const int bufsize = 256;
+ unsigned long *bb_start;
+ unsigned long *bb_end;
+ int b, i, r;
+ int curbb, prevbb = -1;
+ unsigned long addr = -1;
+ unsigned long prevaddr = -1;
+ int mssum = 0;
+ int mlsum = 0;
+ int mscnt = 0;
+ int mlcnt = 0;
+ int reopened = 0;
+
+ /* Use already opened stream? */
+ if (runtime.sim.fmprof) {
+ fi = runtime.sim.fmprof;
+ reopened = 1;
+ rewind (fi);
+ } else assert (fi = fopen (mp_filename, "rb"));
+ assert (fo = fopen (bb_filename, "wb+"));
+
+ assert (bb_start = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb));
+ assert (bb_end = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb));
+ for (b = 0; b < f->num_bb; b++) {
+ bb_start[b] = f->start_addr + f->bb[b].first * 4;
+ bb_end[b] = f->start_addr + f->bb[b].last * 4;
+ //PRINTF ("%i %x %x\n", b, bb_start[b], bb_end[b]);
+ f->bb[0].cnt = 0;
+ }
+
+ buf = (struct mprofentry_struct *) malloc (sizeof (struct mprofentry_struct) * bufsize);
+ assert (buf);
+
+ //PRINTF ("BBSEQ:\n");
+ do {
+ r = fread (buf, sizeof (struct mprofentry_struct), bufsize, fi);
+ //PRINTF ("r%i : ", r);
+ for (i = 0; i < r; i++) {
+ if (buf[i].type & MPROF_FETCH) {
+ //PRINTF ("%x, ", buf[i].addr);
+ if (buf[i].addr >= f->start_addr && buf[i].addr <= f->end_addr) {
+ assert (buf[i].type & MPROF_32);
+ prevaddr = addr;
+ addr = buf[i].addr;
+ for (b = 0; b < f->num_bb; b++)
+ if (bb_start[b] <= addr && addr <= bb_end[b]) break;
+ assert (b < f->num_bb);
+ curbb = b;
+ if (prevaddr + 4 != addr) prevbb = -1;
+ } else curbb = -1;
+
+#warning TODO: do not count interrupts
+ if (curbb != prevbb && curbb >= 0) {
+ fwrite (&curbb, sizeof (unsigned long), 1, fo);
+ //PRINTF (" [%i] ", curbb);
+ f->bb[curbb].cnt++;
+ prevbb = curbb;
+ }
+ } else {
+ if (verify_memoryarea(buf[i].addr)) {
+ if (buf[i].type & MPROF_WRITE) mscnt++, mssum += cur_area->delayw;
+ else mlcnt++, mlsum += cur_area->delayr;
+ }
+ }
+ }
+ //PRINTF ("\n");
+ } while (r == bufsize);
+ //PRINTF ("\n");
+
+ runtime.cuc.mdelay[0] = (1. * mlsum) / mlcnt;
+ runtime.cuc.mdelay[1] = (1. * mssum) / mscnt;
+ runtime.cuc.mdelay[2] = runtime.cuc.mdelay[3] = 1;
+ f->num_runs = f->bb[0].cnt;
+ if (!reopened) fclose (fi);
+ fclose (fo);
+ free (buf);
+ free (bb_end);
+ free (bb_start);
+
+ /* Initialize basic block relocations */
+ f->num_init_bb = f->num_bb;
+ //PRINTF ("num_init_bb = %i\n", f->num_init_bb);
+ assert (f->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb));
+ for (b = 0; b < f->num_init_bb; b++) f->init_bb_reloc[b] = b;
+}
+
+/* Scans sequence of BBs and set counts for pre/unrolled loop for BB b */
+void count_bb_seq (cuc_func *f, int b, char *bb_filename, int *counts, int preroll, int unroll)
+{
+ FILE *fi;
+ const int bufsize = 256;
+ int i, r;
+ int *buf;
+ int cnt = 0;
+ int times = preroll - 1 + unroll;
+
+ assert (fi = fopen (bb_filename, "rb"));
+ for (i = 0; i < times; i++) counts[i] = 0;
+ assert (buf = (int *) malloc (sizeof (int) * bufsize));
+
+ do {
+ r = fread (buf, sizeof (int), bufsize, fi);
+ for (i = 0; i < r; i++) {
+ /* count consecutive acesses */
+ if (f->init_bb_reloc[buf[i]] == b) {
+ counts[cnt]++;
+ if (++cnt >= times) cnt = preroll - 1;
+ } else cnt = 0;
+ }
+ } while (r == bufsize);
+
+ log ("Counts %i,%i :", preroll, unroll);
+ for (i = 0; i < times; i++) log ("%x ", counts[i]);
+ log ("\n");
+
+ fclose (fi);
+ free (buf);
+}
+
+/* relocate all accesses inside of BB b to back/fwd */
+static void relocate_bb (cuc_bb *bb, int b, int back, int fwd)
+{
+ int i, j;
+ for (i = 0; i < bb->ninsn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (bb->insn[i].opt[j] & OPT_REF
+ && REF_BB (bb->insn[i].op[j]) == b) {
+ int t = REF_I (bb->insn[i].op[j]);
+ if (t < i) bb->insn[i].op[j] = REF (back, t);
+ else bb->insn[i].op[j] = REF (fwd, t);
+ }
+}
+
+/* Preroll if type == 1 or unroll if type == 0 loop in BB b `ntimes' times and return
+ new function. Original function is unmodified. */
+static cuc_func *roll_loop (cuc_func *f, int b, int ntimes, int type)
+{
+ int b1, t, i, prevb, prevart_b;
+ cuc_func *n = dup_func (f);
+ cuc_bb *ob = &f->bb[b];
+ cuc_insn *ii;
+
+ assert (ntimes > 1);
+ cucdebug (3, "roll type = %i, BB%i x %i (num_bb %i)\n", type, b, ntimes, n->num_bb);
+ ntimes--;
+ assert (n->num_bb + ntimes * 2 < MAX_BB);
+
+ prevb = b;
+ prevart_b = b;
+
+ /* point to first artificial block */
+ if (n->bb[b].next[0] != b) {
+ n->bb[b].next[0] = n->num_bb + 1;
+ } else if (n->bb[b].next[1] != b) {
+ n->bb[b].next[1] = n->num_bb + 1;
+ }
+
+ /* Duplicate the BB */
+ for (t = 0; t < ntimes; t++) {
+ cuc_bb *pb = &n->bb[prevart_b];
+ /* Add new block and set links */
+ b1 = n->num_bb++;
+ cpy_bb (&n->bb[b1], ob);
+ /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
+ n->bb[b1].type &= ~BB_INLOOP;
+ print_cuc_bb (n, "prerollA");
+
+ printf ("prevb %i b1 %i prevart %i\n", prevb, b1, prevart_b);
+ /* Set predecessor's successor */
+ if (n->bb[prevb].next[0] == b) {
+ n->bb[prevb].next[0] = b1;
+ if (pb->next[0] < 0) pb->next[0] = b1 + 1;
+ else pb->next[1] = b1 + 1;
+ n->bb[b1].next[1] = b1 + 1;
+ } else if (n->bb[prevb].next[1] == b) {
+ if (pb->next[0] < 0) pb->next[0] = b1 + 1;
+ else pb->next[1] = b1 + 1;
+ n->bb[b1].next[0] = b1 + 1;
+ n->bb[prevb].next[1] = b1;
+ } else assert (0);
+
+ /* Set predecessor */
+ n->bb[b1].prev[0] = prevb;
+ n->bb[b1].prev[1] = -1;
+
+ /* Relocate backward references to current instance and forward references
+ to previous one */
+ relocate_bb (&n->bb[b1], b, b1, prevb);
+
+ /* add artificial block, just to join accesses */
+ b1 = n->num_bb++;
+ cpy_bb (&n->bb[b1], ob);
+ n->bb[b1].cnt = 0;
+
+ for (i = 0; i < ob->ninsn - 1; i++) {
+ ii = &n->bb[b1].insn[i];
+ if (ob->insn[i].opt[0] & OPT_DEST) {
+ change_insn_type (ii, II_CMOV);
+ ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = REF (prevart_b, i); ii->opt[1] = OPT_REF;
+ ii->op[2] = REF (b1 - 1, i); ii->opt[2] = OPT_REF;
+
+ /* Take left one, if we should have finished the first iteration*/
+ if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
+ ii->op[3] = pb->insn[pb->ninsn - 1].op[1]; ii->opt[3] = pb->insn[pb->ninsn - 1].opt[1];
+ } else {
+ assert (pb->insn[pb->ninsn - 1].type & IT_COND);
+ ii->op[3] = REF (prevart_b, pb->ninsn - 1); ii->opt[3] = OPT_REF;
+ }
+ ii->dep = NULL;
+ ii->type = ob->insn[i].type & IT_COND;
+ } else {
+ change_insn_type (ii, II_NOP);
+ }
+ }
+
+ /* Add conditional or instruction at the end, prioritizing flags */
+ ii = &n->bb[b1].insn[ob->ninsn - 1];
+ change_insn_type (ii, II_CMOV);
+ ii->op[0] = FLAG_REG; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
+ ii->op[1] = pb->insn[pb->ninsn - 1].op[1];
+ ii->opt[1] = pb->insn[pb->ninsn - 1].opt[1];
+ } else {
+ ii->op[1] = REF (prevart_b, pb->ninsn - 1);
+ ii->opt[1] = OPT_REF;
+ }
+ if (n->bb[b1 - 1].insn[pb->ninsn - 1].type & IT_BRANCH) {
+ ii->op[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].op[1];
+ ii->opt[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].opt[1];
+ } else {
+ ii->op[2] = REF (b1 - 1, pb->ninsn - 1);
+ ii->opt[2] = OPT_REF;
+ }
+ /* {z = x || y;} is same as {z = x ? x : y;} */
+ ii->op[3] = ii->op[1]; ii->opt[3] = ii->opt[1];
+ ii->type = IT_COND;
+
+ /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
+ n->bb[b1].type &= ~BB_INLOOP;
+ n->bb[b1].prev[0] = prevart_b;
+ n->bb[b1].prev[1] = b1 - 1;
+ n->bb[b1].next[0] = -1;
+ n->bb[b1].next[1] = -1;
+
+ prevb = b1 - 1;
+ prevart_b = b1;
+ print_cuc_bb (n, "prerollB");
+ }
+
+ print_cuc_bb (n, "preroll0");
+ n->bb[prevart_b].next[0] = ob->next[0] == b ? ob->next[1] : ob->next[0];
+
+ print_cuc_bb (n, "preroll1");
+ /* repair BB after loop, to point back to latest artificial BB */
+ b1 = n->bb[prevart_b].next[0];
+ if (b1 >= 0 && b1 != BBID_END) {
+ if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = prevart_b;
+ else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = prevart_b;
+ else assert (0);
+ }
+
+ if (type) {
+ /* Relink to itself */
+ /* Set predecessor's successor */
+ if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = prevb;
+ else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = prevb;
+ else assert (0);
+ n->bb[prevb].prev[1] = prevb;
+
+ /* Set predecessor */
+ if (n->bb[b].prev[0] == b) {
+ n->bb[b].prev[0] = n->bb[b].prev[1];
+ n->bb[b].prev[1] = -1;
+ } else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = -1;
+ else assert (0);
+ } else {
+ /* Relink back to start of the loop */
+ /* Set predecessor's successor */
+ if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = b;
+ else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = b;
+ else assert (0);
+
+ /* Set predecessor */
+ if (n->bb[b].prev[0] == b) n->bb[b].prev[0] = prevb;
+ else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = prevb;
+ else assert (0);
+ }
+
+ print_cuc_bb (n, "preroll2");
+
+ /* Relocate backward references to current instance and forward references
+ to previous one */
+ relocate_bb (&n->bb[b], b, b, prevb);
+
+ /* Relocate all other blocks to point to latest prevart_b */
+ for (i = 0; i < f->num_bb; i++)
+ if (i != b) relocate_bb (&n->bb[i], b, prevart_b, prevart_b);
+
+ return n;
+}
+
+/* Unroll loop b unroll times and return new function. Original
+ function is unmodified. */
+cuc_func *preunroll_loop (cuc_func *f, int b, int preroll, int unroll, char *bb_filename)
+{
+ int b1, i;
+ cuc_func *n, *t;
+ int *counts;
+
+ if (preroll > 1) {
+ t = roll_loop (f, b, preroll, 1);
+ b1 = t->num_bb - 2;
+ if (unroll > 1) {
+ //print_cuc_bb (t, "preunroll1");
+ n = roll_loop (t, b1, unroll, 0);
+ free_func (t);
+ } else n = t;
+ } else {
+ b1 = b;
+ if (unroll > 1) n = roll_loop (f, b1, unroll, 0);
+ else return dup_func (f);
+ }
+
+ /* Assign new counts to functions */
+ assert (counts = (int *)malloc (sizeof (int) * (preroll - 1 + unroll)));
+ count_bb_seq (n, b, bb_filename, counts, preroll, unroll);
+ for (i = 0; i < preroll - 1 + unroll; i++) {
+ if (i == 0) b1 = b;
+ else b1 = f->num_bb + (i - 1) * 2;
+ n->bb[b1].cnt = counts[i];
+ }
+
+ //print_cuc_bb (n, "preunroll");
+ free (counts);
+ return n;
+}
+
bb.c
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: insn.c
===================================================================
--- insn.c (nonexistent)
+++ insn.c (revision 1765)
@@ -0,0 +1,1424 @@
+/* insn.c -- OpenRISC Custom Unit Compiler, instruction support
+ * Copyright (C) 2002 Marko Mlinar, markom@opencores.org
+ *
+ * This file is part of OpenRISC 1000 Architectural Simulator.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include
+#include
+#include
+#include
+
+#include "config.h"
+
+#ifdef HAVE_INTTYPES_H
+#include
+#endif
+
+#include "port.h"
+#include "arch.h"
+#include "abstract.h"
+#include "sim-config.h"
+#include "cuc.h"
+#include "insn.h"
+
+/* Table of known instructions. Watch out for indexes I_*! */
+const cuc_known_insn known[II_LAST + 1] = {
+{"add", 1, "assign \1 = \2 + \3;"},
+{"sub", 0, "assign \1 = \2 - \3;"},
+{"and", 1, "assign \1 = \2 & \3;"},
+{"or", 1, "assign \1 = \2 | \3;"},
+{"xor", 1, "assign \1 = \2 ^ \3;"},
+{"mul", 1, "assign \1 = \2 * \3;"},
+
+{"srl", 0, "assign \1 = \2 >> \3;"},
+{"sll", 0, "assign \1 = \2 << \3;"},
+{"sra", 0, "assign \1 = ({32{\2[31]}} << (6'd32-{1'b0, \3}))\n\
+ | \2 >> \3;"},
+
+{"lb", 0, "always @(posedge clk)"},
+{"lh", 0, "always @(posedge clk)"},
+{"lw", 0, "always @(posedge clk)"},
+{"sb", 0, "/* mem8[\2] = \1 */"},
+{"sh", 0, "/* mem16[\2] = \1 */"},
+{"sw", 0, "/* mem32[\2] = \1 */"},
+
+{"sfeq", 1, "assign \1 = \2 == \3;"},
+{"sfne", 1, "assign \1 = \2 != \3;"},
+{"sfle", 0, "assign \1 = \2 <= \3;"},
+{"sflt", 0, "assign \1 = \2 < \3;"},
+{"sfge", 0, "assign \1 = \2 >= \3;"},
+{"sfgt", 0, "assign \1 = \2 > \3;"},
+{"bf", 0, ""},
+
+{"lrbb", 0,"always @(posedge clk or posedge rst)"},
+{"cmov", 0,"assign \1 = \4 ? \2 : \3;"},
+{"reg", 0, "always @(posedge clk)"},
+
+{"nop", 1, ""},
+{"call", 0, "/* function call */"}};
+
+/* Find known instruction and attach them to insn */
+void change_insn_type (cuc_insn *i, int index)
+{
+ int j;
+ assert (index >= 0 && index <= II_LAST);
+ i->index = index;
+ if (i->index == II_NOP) {
+ for (j = 0; j < MAX_OPERANDS; j++) i->opt[j] = OPT_NONE;
+ i->type = 0;
+ i->dep = NULL;
+ i->disasm[0] = '\0';
+ }
+}
+
+/* Returns instruction name */
+const char *cuc_insn_name (cuc_insn *ii) {
+ if (ii->index < 0 || ii->index > II_LAST) return "???";
+ else return known[ii->index].name;
+}
+
+/* Prints out instructions */
+void print_insns (int bb, cuc_insn *insn, int ninsn, int verbose)
+{
+ int i, j;
+ for (i = 0; i < ninsn; i++) {
+ char tmp[10];
+ dep_list *l = insn[i].dep;
+ sprintf (tmp, "[%x_%x]", bb, i);
+ PRINTF ("%-8s%c %-4s ", tmp, insn[i].index >= 0 ? ':' : '?', cuc_insn_name (&insn[i]));
+ if (verbose) {
+ PRINTF ("%-20s insn = %08lx, index = %i, type = %04x ",
+ insn[i].disasm, insn[i].insn, insn[i].index, insn[i].type);
+ } else PRINTF ("type = %04x ", insn[i].type);
+ for (j = 0; j < MAX_OPERANDS; j++) {
+ if (insn[i].opt[j] & OPT_DEST) PRINTF ("*");
+ switch (insn[i].opt[j] & ~OPT_DEST) {
+ case OPT_NONE:
+ break;
+ case OPT_CONST:
+ if (insn[i].type & IT_COND && (insn[i].index == II_CMOV
+ || insn[i].index == II_ADD))
+ PRINTF ("%lx, ", insn[i].op[j]);
+ else
+ PRINTF ("0x%08lx, ", insn[i].op[j]);
+ break;
+ case OPT_JUMP:
+ PRINTF ("J%lx, ", insn[i].op[j]);
+ break;
+ case OPT_REGISTER:
+ PRINTF ("r%li, ", insn[i].op[j]);
+ break;
+ case OPT_REF:
+ PRINTF ("[%lx_%lx], ", REF_BB(insn[i].op[j]), REF_I(insn[i].op[j]));
+ break;
+ case OPT_BB:
+ PRINTF ("BB ");
+ print_bb_num (insn[i].op[j]);
+ PRINTF (", ");
+ break;
+ case OPT_LRBB:
+ PRINTF ("LRBB, ");
+ break;
+ default:
+ fprintf (stderr, "Invalid operand type %s(%x_%x) = %x\n",
+ cuc_insn_name (&insn[i]), i, j, insn[i].opt[j]);
+ assert (0);
+ }
+ }
+ if (l) {
+ PRINTF ("\n\tdep:");
+ while (l) {
+ PRINTF (" [%lx_%lx],", REF_BB (l->ref), REF_I (l->ref));
+ l = l->next;
+ }
+ }
+ PRINTF ("\n");
+ }
+}
+
+void add_dep (dep_list **list, int dep)
+{
+ dep_list *ndep;
+ dep_list **tmp = list;
+
+ while (*tmp) {
+ if ((*tmp)->ref == dep) return; /* already there */
+ tmp = &((*tmp)->next);
+ }
+ ndep = (dep_list *)malloc (sizeof (dep_list));
+ ndep->ref = dep;
+ ndep->next = NULL;
+ *tmp = ndep;
+}
+
+void dispose_list (dep_list **list)
+{
+ while (*list) {
+ dep_list *tmp = *list;
+ *list = tmp->next;
+ free (tmp);
+ }
+}
+
+void add_data_dep (cuc_func *f)
+{
+ int b, i, j;
+ for (b = 0; b < f->num_bb; b++) {
+ cuc_insn *insn = f->bb[b].insn;
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++) {
+ fflush (stdout);
+ if (insn[i].opt[j] & OPT_REF) {
+ /* Copy list from predecessor */
+ dep_list *l = f->INSN(insn[i].op[j]).dep;
+ while (l) {
+ add_dep (&insn[i].dep, l->ref);
+ l = l->next;
+ }
+ /* add predecessor */
+ add_dep (&insn[i].dep, insn[i].op[j]);
+ }
+ }
+ }
+}
+
+/* Inserts n nops before insn 'ref' */
+void insert_insns (cuc_func *f, int ref, int n)
+{
+ int b1, i, j;
+ int b = REF_BB(ref);
+ int ins = REF_I(ref);
+
+ assert (b < f->num_bb);
+ assert (ins <= f->bb[b].ninsn);
+ assert (f->bb[b].ninsn + n < MAX_INSNS);
+ if (cuc_debug >= 8) print_cuc_bb (f, "PREINSERT");
+ f->bb[b].insn = (cuc_insn *) realloc (f->bb[b].insn,
+ (f->bb[b].ninsn + n) * sizeof (cuc_insn));
+
+ /* Set up relocations */
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (i < ins) reloc[i] = i;
+ else reloc[i] = i + n;
+
+ /* Move instructions, based on relocations */
+ for (i = f->bb[b].ninsn - 1; i >= 0; i--) f->bb[b].insn[reloc[i]] = f->bb[b].insn[i];
+ for (i = 0; i < n; i++) change_insn_type (&f->bb[b].insn[ins + i], II_NOP);
+
+ f->bb[b].ninsn += n;
+ for (b1 = 0; b1 < f->num_bb; b1++) {
+ dep_list *d = f->bb[b1].mdep;
+ while (d) {
+ if (REF_BB (d->ref) == b && REF_I (d->ref) >= ins)
+ d->ref = REF (b, REF_I (d->ref) + n);
+ d = d->next;
+ }
+ for (i = 0; i < f->bb[b1].ninsn; i++) {
+ d = f->bb[b1].insn[i].dep;
+ while (d) {
+ if (REF_BB (d->ref) == b && REF_I (d->ref) >= ins)
+ d->ref = REF (b, REF_I (d->ref) + n);
+ d = d->next;
+ }
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (f->bb[b1].insn[i].opt[j] & OPT_REF && REF_BB (f->bb[b1].insn[i].op[j]) == b
+ && REF_I (f->bb[b1].insn[i].op[j]) >= ins)
+ f->bb[b1].insn[i].op[j] = REF (b, REF_I (f->bb[b1].insn[i].op[j]) + n);
+ }
+ }
+ for (i = 0; i < f->nmsched; i++)
+ if (REF_BB(f->msched[i]) == b) f->msched[i] = REF (b, reloc[REF_I (f->msched[i])]);
+ if (cuc_debug >= 8) print_cuc_bb (f, "POSTINSERT");
+ cuc_check (f);
+}
+
+/* returns nonzero, if instruction was simplified */
+int apply_edge_condition (cuc_insn *ii)
+{
+ unsigned int c = ii->op[2];
+
+ switch (ii->index) {
+ case II_AND:
+ if (ii->opt[2] & OPT_CONST && c == 0) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = 0; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else if (ii->opt[2] & OPT_CONST && c == 0xffffffff) {
+ change_insn_type (ii, II_ADD);
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else break;
+ case II_OR:
+ if (ii->opt[2] & OPT_CONST && c == 0x0) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = c; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else if (ii->opt[2] & OPT_CONST && c == 0xffffffff) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = 0xffffffff; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else break;
+ case II_SUB:
+ if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = 0; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else break;
+ case II_MUL:
+ if (ii->opt[2] & OPT_CONST && c == 0) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = 0; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else
+ if (ii->opt[2] & OPT_CONST && c == 1) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = c; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else
+ if (ii->opt[2] & OPT_CONST && c == 0xffffffff) {
+ change_insn_type (ii, II_SUB);
+ ii->op[2] = ii->op[1]; ii->opt[2] = ii->opt[1];
+ ii->op[1] = 0; ii->opt[1] = OPT_CONST;
+ return 1;
+ } else break;
+ case II_SRL:
+ if (ii->opt[2] & OPT_CONST && c == 0) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = c; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else if (ii->opt[2] & OPT_CONST && c >= 32) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = 0; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else break;
+ case II_SLL:
+ if (ii->opt[2] & OPT_CONST && c == 0) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = c; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else if (ii->opt[2] & OPT_CONST && c >= 32) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = 0; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else break;
+ case II_SRA:
+ if (ii->opt[2] & OPT_CONST && c == 0) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = c; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else break;
+ case II_SFEQ:
+ if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = ii->op[1] == ii->op[2]; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else break;
+ case II_SFNE:
+ if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = ii->op[1] != ii->op[2]; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else break;
+ case II_SFLE:
+ if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = ii->op[1] <= ii->op[2]; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) {
+ change_insn_type (ii, II_SFEQ);
+ } else break;
+ case II_SFLT:
+ if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = ii->op[1] < ii->op[2]; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = 0; ii->opt[1] = OPT_CONST;
+ } break;
+ case II_SFGE:
+ if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = ii->op[1] >= ii->op[2]; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = 1; ii->opt[1] = OPT_CONST;
+ } else break;
+ case II_SFGT:
+ if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = ii->op[1] > ii->op[2]; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) {
+ change_insn_type (ii, II_SFNE);
+ } else break;
+ case II_CMOV:
+ if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) {
+ change_insn_type (ii, II_ADD);
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ ii->opt[3] = OPT_NONE;
+ return 1;
+ }
+ if (ii->opt[3] & OPT_CONST) {
+ change_insn_type (ii, II_ADD);
+ if (ii->op[3]) {
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ } else {
+ ii->op[1] = 0; ii->opt[1] = OPT_CONST;
+ }
+ ii->opt[3] = OPT_NONE;
+ return 1;
+ }
+ if (ii->type & IT_COND) {
+ if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
+ if (ii->op[1] && !ii->op[2]) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = ii->op[3]; ii->opt[1] = ii->opt[3];
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ ii->opt[3] = OPT_NONE;
+ return 1;
+ }
+ if (ii->op[1] && ii->op[2]) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = 1; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ ii->opt[3] = OPT_NONE;
+ return 1;
+ }
+ if (!ii->op[1] && !ii->op[2]) {
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = 0; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ ii->opt[3] = OPT_NONE;
+ return 1;
+ }
+ }
+ if (ii->op[1] == ii->op[3] && ii->opt[1] == ii->opt[3]) {
+ ii->op[1] = 1; ii->opt[1] = OPT_CONST;
+ return 1;
+ }
+ if (ii->op[2] == ii->op[3] && ii->opt[2] == ii->opt[3]) {
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ return 1;
+ }
+ }
+ break;
+ }
+ return 0;
+}
+
+/* First primary input */
+static unsigned long tmp_op, tmp_opt;
+
+/* Recursive function that searches for primary inputs;
+ returns 0 if cmov can be replaced by add */
+static int cmov_needed (cuc_func *f, int ref)
+{
+ cuc_insn *ii = &f->INSN(ref);
+ int j;
+
+ cucdebug (4, " %x", ref);
+ /* mark visited, if already marked, we have a loop, ignore */
+ if (ii->tmp) return 0;
+ ii->tmp = 1;
+
+ /* handle normal movs separately */
+ if (ii->index == II_ADD && !(ii->type & IT_VOLATILE)
+ && ii->opt[2] == OPT_CONST && ii->op[2] == 0) {
+ if (ii->opt[1] == OPT_REF) {
+ if (cmov_needed (f, ii->op[1])) {
+ ii->tmp = 0;
+ return 1;
+ }
+ } else {
+ if (tmp_opt == OPT_NONE) {
+ tmp_op = ii->op[1];
+ tmp_opt = ii->opt[1];
+ } else if (tmp_opt != ii->opt[1] || tmp_op != ii->op[1]) {
+ ii->tmp = 0;
+ return 1;
+ }
+ }
+ ii->tmp = 0;
+ return 0;
+ }
+
+ /* Is this instruction CMOV? no => add to primary inputs */
+ if ((ii->index != II_CMOV) || (ii->type & IT_VOLATILE)) {
+ if (tmp_opt == OPT_NONE) {
+ tmp_op = ref;
+ tmp_opt = OPT_REF;
+ ii->tmp = 0;
+ return 0;
+ } else if (tmp_opt != OPT_REF || tmp_op != ref) {
+ ii->tmp = 0;
+ return 1;
+ } else {
+ ii->tmp = 0;
+ return 0;
+ }
+ }
+
+ for (j = 1; j < 3; j++) {
+ cucdebug (4, "(%x:%i)", ref, j);
+ if (ii->opt[j] == OPT_REF) {
+ if (cmov_needed (f, ii->op[j])) {
+ ii->tmp = 0;
+ return 1;
+ }
+ } else {
+ if (tmp_opt == OPT_NONE) {
+ tmp_op = ii->op[j];
+ tmp_opt = ii->opt[j];
+ } else if (tmp_opt != ii->opt[j] || tmp_op != ii->op[j]) {
+ ii->tmp = 0;
+ return 1;
+ }
+ }
+ }
+
+ ii->tmp = 0;
+ return 0;
+}
+
+/* Search and optimize complex cmov assignments */
+int optimize_cmovs (cuc_func *f)
+{
+ int modified = 0;
+ int b, i;
+
+ /* Mark all instructions unvisited */
+ for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD))
+ for (i = 0; i < f->bb[b].ninsn; i++) f->bb[b].insn[i].tmp = 0;
+
+ for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) {
+ for (i = 0; i < f->bb[b].ninsn; i++) {
+ cuc_insn *ii = &f->bb[b].insn[i];
+ if (ii->index == II_CMOV && !(ii->type & IT_VOLATILE)) {
+ tmp_opt = OPT_NONE;
+ cucdebug (4, "\n");
+ if (!cmov_needed (f, REF(b, i))) {
+ assert (tmp_opt != OPT_NONE);
+ change_insn_type (ii, II_ADD);
+ ii->op[1] = tmp_op; ii->opt[1] = tmp_opt;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ ii->opt[3] = OPT_NONE;
+ modified = 1;
+ }
+ }
+ }
+ }
+ return modified;
+}
+
+/* returns number of instructions, using instruction ref */
+static int insn_uses (cuc_func *f, int ref)
+{
+ int b, i, j;
+ int cnt = 0;
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == ref) cnt++;
+ return cnt;
+}
+
+/* handles some common CMOV, CMOV-CMOV cases;
+ returns nonzero if anything optimized */
+static int optimize_cmov_more (cuc_func *f, int ref)
+{
+ int t = 0;
+ cuc_insn *ii = &f->INSN(ref);
+ assert (ii->index == II_CMOV);
+
+ /* In case of x = cmov x, y; or x = cmov y, x; we have
+ asynchroneous loop -> remove it */
+ if ((ii->opt[1] & OPT_REF) && ii->op[1] == ref) t = 1;
+ if ((ii->opt[2] & OPT_REF) && ii->op[2] == ref) t = 2;
+ if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) t = 2;
+ if (t) {
+ change_insn_type (ii, II_ADD);
+ cucdebug (2, "%8x:cmov %i\n", ref, t);
+ ii->opt[t] = OPT_CONST;
+ ii->op[t] = 0;
+ ii->opt[3] = OPT_NONE;
+ return 1;
+ }
+ if (!(ii->type & IT_COND)) {
+ for (t = 1; t <= 2; t++) {
+ /* cmov L, X, Y, C1
+ cmov Z, L, Y, C2
+ can be replaced with simpler:
+ cmov L, C1, C2, C2
+ cmov Z, X, Y, L */
+ if (ii->opt[t] == OPT_REF && f->INSN(ii->op[t]).index == II_CMOV) {
+ int r = ii->op[t];
+ unsigned long x, xt, y, yt;
+ cuc_insn *prev = &f->INSN(r);
+ cuc_check (f);
+ cucdebug (3, "%x-%x\n", ref, r);
+ assert (!(prev->type & IT_COND));
+ if (prev->op[3 - t] != ii->op[3 - t] || prev->opt[3 - t] != ii->opt[3 - t]
+ || insn_uses (f, r) > 1) continue;
+ cucdebug (3, "%x-%x cmov more\n", ref, r);
+ prev->type |= IT_COND;
+ x = prev->op[t]; xt = prev->opt[t];
+ y = prev->op[3 - t]; yt = prev->opt[3 - t];
+ prev->op[t] = ii->op[3]; prev->opt[t] = ii->opt[3]; /* C2 */
+ ii->op[3] = r; ii->opt[3] = OPT_REF; /* L */
+ prev->op[3 - t] = prev->op[3]; prev->opt[3 - t] = prev->opt[3]; /* C1 */
+ prev->op[3] = prev->op[t]; prev->opt[3] = prev->opt[t]; /* C2 */
+ ii->op[t] = x; ii->opt[t] = xt; /* X */
+ ii->op[3 - t] = y; ii->opt[3 - t] = yt; /* Y */
+ prev->op[0] = -1; prev->opt[0] = OPT_REGISTER | OPT_DEST;
+ cuc_check (f);
+ return 1;
+ }
+ }
+ }
+
+ if (ii->opt[3] & OPT_REF) {
+ cuc_insn *prev = &f->INSN(ii->op[3]);
+ assert (prev->type & IT_COND);
+ if (prev->index == II_CMOV) {
+ /* negated conditional:
+ cmov x, 0, 1, y
+ cmov z, a, b, x
+ is replaced by
+ cmov z, b, a, y */
+ if (prev->opt[1] & OPT_CONST && prev->opt[2] & OPT_CONST
+ && !prev->op[1] && prev->op[2]) {
+ unsigned long t;
+ t = ii->op[1]; ii->op[1] = ii->op[2]; ii->op[2] = t;
+ t = ii->opt[1]; ii->opt[1] = ii->opt[2]; ii->opt[2] = t;
+ ii->op[3] = prev->op[3]; ii->opt[3] = prev->opt[3];
+ }
+ } else if (prev->index == II_ADD) {
+ /* add x, y, 0
+ cmov z, a, b, x
+ is replaced by
+ cmov z, a, b, y */
+ if (prev->opt[2] & OPT_CONST && prev->op[2] == 0) {
+ ii->op[3] = prev->op[1]; ii->opt[3] = prev->opt[1];
+ return 1;
+ }
+ }
+ }
+
+ /* Check if both choices can be pushed through */
+ if (ii->opt[1] & OPT_REF && ii->opt[2] & OPT_REF
+ /* Usually doesn't make sense to move conditionals though => more area */
+ && !(ii->type & IT_COND)) {
+ cuc_insn *a, *b;
+ a = &f->INSN(ii->op[1]);
+ b = &f->INSN(ii->op[2]);
+ if (a->index == b->index && !(a->type & IT_VOLATILE) && !(b->type & IT_VOLATILE)) {
+ int diff = -1;
+ int i;
+ for (i = 0; i < MAX_OPERANDS; i++)
+ if (a->opt[i] != b->opt[i] || !(a->op[i] == b->op[i] || a->opt[i] & OPT_REGISTER)) {
+ if (diff == -1) diff = i; else diff = -2;
+ }
+ /* If diff == -1, it will be eliminated by CSE */
+ if (diff >= 0) {
+ cuc_insn tmp, cmov;
+ int ref2 = REF (REF_BB (ref), REF_I (ref) + 1);
+ insert_insns (f, ref, 1);
+ a = &f->INSN(f->INSN(ref2).op[1]);
+ b = &f->INSN(f->INSN(ref2).op[2]);
+ cucdebug (4, "ref = %x %lx %lx\n", ref, f->INSN(ref2).op[1],
+ f->INSN(ref2).op[2]);
+ if (cuc_debug >= 7) {
+ print_cuc_bb (f, "AAA");
+ cuc_check (f);
+ }
+ tmp = *a;
+ cmov = f->INSN(ref2);
+ tmp.op[diff] = ref; tmp.opt[diff] = OPT_REF;
+ cmov.op[0] = -1; cmov.opt[0] = OPT_REGISTER | OPT_DEST;
+ cmov.op[1] = a->op[diff]; cmov.opt[1] = a->opt[diff];
+ cmov.op[2] = b->op[diff]; cmov.opt[2] = b->opt[diff];
+ change_insn_type (&cmov, II_CMOV);
+ cmov.type &= ~IT_COND;
+ cucdebug (4, "ref2 = %x %lx %lx\n", ref2, cmov.op[1], cmov.op[2]);
+ if (cmov.opt[1] & OPT_REF && cmov.opt[2] & OPT_REF
+ && f->INSN(cmov.op[1]).type & IT_COND) {
+ assert (f->INSN(cmov.op[2]).type & IT_COND);
+ cmov.type |= IT_COND;
+ }
+ f->INSN(ref) = cmov;
+ f->INSN(ref2) = tmp;
+ if (cuc_debug >= 6) print_cuc_bb (f, "BBB");
+ cuc_check (f);
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Optimizes dataflow tree */
+int optimize_tree (cuc_func *f)
+{
+ int b, i, j;
+ int modified;
+ int gmodified = 0;
+
+ do {
+ modified = 0;
+ if (cuc_debug) cuc_check (f);
+ for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) {
+ for (i = 0; i < f->bb[b].ninsn; i++) {
+ cuc_insn *ii = &f->bb[b].insn[i];
+ /* We tend to have the third parameter const if instruction is cumutative */
+ if ((ii->opt[1] & OPT_CONST) && !(ii->opt[2] & OPT_CONST)) {
+ int cond = ii->index == II_SFEQ || ii->index == II_SFNE
+ || ii->index == II_SFLT || ii->index == II_SFLE
+ || ii->index == II_SFGT || ii->index == II_SFGE;
+ if (known[ii->index].comutative || cond) {
+ unsigned long t = ii->opt[1];
+ ii->opt[1] = ii->opt[2];
+ ii->opt[2] = t;
+ t = ii->op[1];
+ ii->op[1] = ii->op[2];
+ ii->op[2] = t;
+ modified = 1; cucdebug (2, "%08x:<>\n", REF(b, i));
+ if (cond) {
+ if (ii->index == II_SFEQ) ii->index = II_SFNE;
+ else if (ii->index == II_SFNE) ii->index = II_SFEQ;
+ else if (ii->index == II_SFLE) ii->index = II_SFGT;
+ else if (ii->index == II_SFLT) ii->index = II_SFGE;
+ else if (ii->index == II_SFGE) ii->index = II_SFLT;
+ else if (ii->index == II_SFGT) ii->index = II_SFLE;
+ else assert (0);
+ }
+ }
+ }
+
+ /* Try to do the promotion */
+ /* We have two consecutive expressions, containing constants,
+ * if previous is a simple expression we can handle it simply: */
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (ii->opt[j] & OPT_REF) {
+ cuc_insn *t = &f->INSN(ii->op[j]);
+ if (f->INSN(ii->op[j]).index == II_ADD
+ && f->INSN(ii->op[j]).opt[2] & OPT_CONST
+ && f->INSN(ii->op[j]).op[2] == 0
+ && !(ii->type & IT_MEMORY && t->type & IT_MEMADD)) {
+ /* do not promote through add-mem, and branches */
+ modified = 1;
+ cucdebug (2, "%8x:promote%i %8lx %8lx\n", REF (b, i), j, ii->op[j], t->op[1]);
+ ii->op[j] = t->op[1];
+ ii->opt[j] = t->opt[1];
+ }
+ }
+
+ /* handle some CMOV cases more deeply */
+ if (ii->index == II_CMOV && optimize_cmov_more (f, REF (b, i))) {
+ modified = 1;
+ continue;
+ }
+
+ /* Do nothing to volatile instructions */
+ if (ii->type & IT_VOLATILE) continue;
+
+ /* Check whether we can simplify the instruction */
+ if (apply_edge_condition (ii)) {
+ modified = 1;
+ continue;
+ }
+ /* We cannot do anything more if at least one is not constant */
+ if (!(ii->opt[2] & OPT_CONST)) continue;
+
+ if (ii->opt[1] & OPT_CONST) { /* We have constant expression */
+ unsigned long value;
+ int ok = 1;
+ /* Was constant expression already? */
+ if (ii->index == II_ADD && !ii->op[2]) continue;
+
+ if (ii->index == II_ADD) value = ii->op[1] + ii->op[2];
+ else if (ii->index == II_SUB) value = ii->op[1] - ii->op[2];
+ else if (ii->index == II_SLL) value = ii->op[1] << ii->op[2];
+ else if (ii->index == II_SRL) value = ii->op[1] >> ii->op[2];
+ else if (ii->index == II_MUL) value = ii->op[1] * ii->op[2];
+ else if (ii->index == II_OR) value = ii->op[1] | ii->op[2];
+ else if (ii->index == II_XOR) value = ii->op[1] ^ ii->op[2];
+ else if (ii->index == II_AND) value = ii->op[1] & ii->op[2];
+ else ok = 0;
+ if (ok) {
+ change_insn_type (ii, II_ADD);
+ ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = value; ii->opt[1] = OPT_CONST;
+ ii->op[2] = 0; ii->opt[2] = OPT_CONST;
+ modified = 1; cucdebug (2, "%8x:const\n", REF (b, i));
+ }
+ } else if (ii->opt[1] & OPT_REF) {
+ cuc_insn *prev = &f->INSN(ii->op[1]);
+ /* Is this just a move? */
+ if (ii->index == II_ADD
+ && !(ii->type & IT_MEMADD) && ii->op[2] == 0) {
+ int b1, i1, j1;
+ cucdebug (2, "%8x:link %8lx: ", REF(b, i), ii->op[1]);
+ if (!(prev->type & (IT_OUTPUT | IT_VOLATILE))) {
+ assert (ii->opt[0] & OPT_DEST);
+ prev->op[0] = ii->op[0]; prev->opt[0] = ii->opt[0];
+ prev->type |= ii->type & IT_OUTPUT;
+ for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD))
+ for (i1 = 0; i1 < f->bb[b1].ninsn; i1++)
+ for (j1 = 0; j1 < MAX_OPERANDS; j1++)
+ if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF)
+ && f->bb[b1].insn[i1].op[j1] == REF(b, i)) {
+ cucdebug (2, "%x ", REF (b1, i1));
+ f->bb[b1].insn[i1].op[j1] = ii->op[1];
+ }
+ cucdebug (2, "\n");
+ change_insn_type (ii, II_NOP);
+ }
+ } else if (prev->opt[2] & OPT_CONST) {
+ /* Handle some common cases */
+ /* add - add joining */
+ if (ii->index == II_ADD && prev->index == II_ADD) {
+ ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
+ ii->op[2] += prev->op[2];
+ modified = 1; cucdebug (2, "%8x: add-add\n", REF(b, i));
+ } else /* add - sub joining */
+ if (ii->index == II_ADD && prev->index == II_SUB) {
+ change_insn_type (&insn[i], II_SUB);
+ ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
+ ii->op[2] += prev->op[2];
+ modified = 1; cucdebug (2, "%8x: add-sub\n", REF(b, i));
+ } else /* sub - add joining */
+ if (ii->index == II_SUB && prev->index == II_ADD) {
+ ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
+ ii->op[2] += prev->op[2];
+ modified = 1; cucdebug (2, "%8x: sub-add\n", REF(b, i));
+ } else /* add - sfxx joining */
+ if (prev->index == II_ADD && (
+ ii->index == II_SFEQ || ii->index == II_SFNE
+ || ii->index == II_SFLT || ii->index == II_SFLE
+ || ii->index == II_SFGT || ii->index == II_SFGE)) {
+ if (ii->opt[2] & OPT_CONST && ii->op[2] < 0x80000000) {
+ ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
+ ii->op[2] -= prev->op[2];
+ modified = 1; cucdebug (2, "%8x: add-sfxx\n", REF(b, i));
+ }
+ } else /* sub - sfxx joining */
+ if (prev->index == II_SUB && (
+ ii->index == II_SFEQ || ii->index == II_SFNE
+ || ii->index == II_SFLT || ii->index == II_SFLE
+ || ii->index == II_SFGT || ii->index == II_SFGE)) {
+ if (ii->opt[2] & OPT_CONST && ii->op[2] < 0x80000000) {
+ ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
+ ii->op[2] += prev->op[2];
+ modified = 1; cucdebug (2, "%8x: sub-sfxx\n", REF(b, i));
+ }
+ }
+ }
+ }
+ }
+ }
+ if (modified) gmodified = 1;
+ } while (modified);
+ return gmodified;
+}
+
+/* Remove nop instructions */
+int remove_nops (cuc_func *f)
+{
+ int b;
+ int modified = 0;
+ for (b = 0; b < f->num_bb; b++) {
+ int c, d = 0, i, j;
+ cuc_insn *insn = f->bb[b].insn;
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (insn[i].index != II_NOP) {
+ reloc [i] = d;
+ insn[d++] = insn[i];
+ } else {
+ reloc[i] = d; /* For jumps only */
+ }
+ if (f->bb[b].ninsn != d) modified = 1;
+ f->bb[b].ninsn = d;
+
+ /* Relocate references from all basic blocks */
+ for (c = 0; c < f->num_bb; c++)
+ for (i = 0; i < f->bb[c].ninsn; i++) {
+ dep_list *d = f->bb[c].insn[i].dep;
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if ((f->bb[c].insn[i].opt[j] & OPT_REF)
+ && REF_BB(f->bb[c].insn[i].op[j]) == b)
+ f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]);
+
+ while (d) {
+ if (REF_BB(d->ref) == b) d->ref = REF (b, reloc[REF_I (d->ref)]);
+ d = d->next;
+ }
+ }
+ }
+ return modified;
+}
+
+static void unmark_tree (cuc_func *f, int ref)
+{
+ cuc_insn *ii = &f->INSN(ref);
+ cucdebug (5, "%x ", ref);
+ if (ii->type & IT_UNUSED) {
+ int j;
+ ii->type &= ~IT_UNUSED;
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (ii->opt[j] & OPT_REF) unmark_tree (f, ii->op[j]);
+ }
+}
+
+/* Remove unused assignments */
+int remove_dead (cuc_func *f)
+{
+ int b, i;
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ f->bb[b].insn[i].type |= IT_UNUSED;
+
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++) {
+ cuc_insn *ii = &f->bb[b].insn[i];
+ if (ii->type & IT_VOLATILE || ii->type & IT_OUTPUT
+ || II_IS_LOAD (ii->index) && (f->memory_order == MO_NONE || f->memory_order == MO_WEAK)
+ || II_IS_STORE (ii->index)) {
+ unmark_tree (f, REF (b, i));
+ cucdebug (5, "\n");
+ }
+ }
+
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (f->bb[b].insn[i].type & IT_UNUSED) {
+ change_insn_type (&f->bb[b].insn[i], II_NOP);
+ }
+
+ return remove_nops (f);
+}
+
+/* Removes trivial register assignments */
+int remove_trivial_regs (cuc_func *f)
+{
+ int b, i;
+ for (i = 0; i < MAX_REGS; i++) f->saved_regs[i] = caller_saved[i];
+
+ for (b = 0; b < f->num_bb; b++) {
+ cuc_insn *insn = f->bb[b].insn;
+ for (i = 0; i < f->bb[b].ninsn; i++) {
+ if (insn[i].index == II_ADD
+ && insn[i].opt[0] & OPT_REGISTER
+ && insn[i].opt[1] & OPT_REGISTER && insn[i].op[0] == insn[i].op[1]
+ && insn[i].opt[2] & OPT_CONST && insn[i].op[2] == 0) {
+ if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1;
+ change_insn_type (&insn[i], II_NOP);
+ }
+ }
+ }
+ if (cuc_debug >= 2) {
+ PRINTF ("saved regs ");
+ for (i = 0; i < MAX_REGS; i++) PRINTF ("%i:%i ", i, f->saved_regs[i]);
+ PRINTF ("\n");
+ }
+ return remove_nops (f);
+}
+
+/* Determine inputs and outputs */
+void set_io (cuc_func *f)
+{
+ int b, i, j;
+ /* Determine register usage */
+ for (i = 0; i < MAX_REGS; i++) {
+ f->lur[i] = -1;
+ f->used_regs[i] = 0;
+ }
+ if (cuc_debug > 5) print_cuc_bb (f, "SET_IO");
+ for (b = 0; b < f->num_bb; b++) {
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (f->bb[b].insn[i].opt[j] & OPT_REGISTER && f->bb[b].insn[i].op[j] >= 0) {
+ if (f->bb[b].insn[i].opt[j] & OPT_DEST) f->lur[f->bb[b].insn[i].op[j]] = REF (b, i);
+ else f->used_regs[f->bb[b].insn[i].op[j]] = 1;
+ }
+ }
+}
+
+/* relocate all accesses inside of BB b to back/fwd */
+static void relocate_bb (cuc_bb *bb, int b, int back, int fwd)
+{
+ int i, j;
+ for (i = 0; i < bb->ninsn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (bb->insn[i].opt[j] & OPT_REF
+ && REF_BB (bb->insn[i].op[j]) == b) {
+ int t = REF_I (bb->insn[i].op[j]);
+ if (t < i) bb->insn[i].op[j] = REF (back, t);
+ else bb->insn[i].op[j] = REF (fwd, t);
+ }
+}
+
+/* Latch outputs in loops */
+void add_latches (cuc_func *f)
+{
+ int b, i, j;
+
+ //print_cuc_bb (f, "ADD_LATCHES a");
+ /* Cuts the tree and marks registers */
+ mark_cut (f);
+
+ /* Split BBs with more than one group */
+ for (b = 0; b < f->num_bb; b++) expand_bb (f, b);
+ remove_nops (f);
+ //print_cuc_bb (f, "ADD_LATCHES 0");
+
+ /* Convert accesses in BB_INLOOP type block to latched */
+ for (b = 0; b < f->num_bb; b++) {
+ int j;
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] == OPT_REF) {
+ int t = f->bb[b].insn[i].op[j];
+ /* If we are pointing to a INLOOP block from outside, or forward
+ (= previous loop iteration) we must register that data */
+ if ((f->bb[REF_BB(t)].type & BB_INLOOP || config.cuc.no_multicycle)
+ && !(f->INSN(t).type & (IT_BRANCH | IT_COND))
+ && (REF_BB(t) != b || REF_I(t) >= i)) {
+ f->INSN(t).type |= IT_LATCHED;
+ }
+ }
+ }
+ //print_cuc_bb (f, "ADD_LATCHES 1");
+
+ /* Add latches at the end of blocks as needed */
+ for (b = 0; b < f->num_bb; b++) {
+ int nreg = 0;
+ cuc_insn *insn;
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (f->bb[b].insn[i].type & IT_LATCHED) nreg++;
+ if (nreg) {
+ insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (f->bb[b].ninsn + nreg));
+ j = 0;
+ for (i = 0; i < f->bb[b].ninsn; i++) {
+ insn[i] = f->bb[b].insn[i];
+ if (insn[i].type & IT_LATCHED) {
+ cuc_insn *ii = &insn[f->bb[b].ninsn + j++];
+ change_insn_type (ii, II_REG);
+ ii->op[0] = -1; ii->opt[0] = OPT_DEST | OPT_REGISTER;
+ ii->op[1] = REF (b, i); ii->opt[1] = OPT_REF;
+ ii->opt[2] = ii->opt[3] = OPT_NONE;
+ ii->dep = NULL;
+ ii->type = IT_VOLATILE;
+ sprintf (ii->disasm, "reg %i_%i", b, i);
+ }
+ }
+ f->bb[b].ninsn += nreg;
+ free (f->bb[b].insn);
+ f->bb[b].insn = insn;
+ }
+ }
+ //print_cuc_bb (f, "ADD_LATCHES 2");
+
+ /* Repair references */
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++)
+ /* If destination instruction is latched, use register instead */
+ if (f->bb[b].insn[i].opt[j] == OPT_REF
+ && f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) {
+ int b1, i1;
+ b1 = REF_BB (f->bb[b].insn[i].op[j]);
+ //cucdebug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]);
+ if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) {
+ for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) {
+ assert (f->bb[b1].insn[i1].index == II_REG);
+ if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) {
+ f->bb[b].insn[i].op[j] = REF (b1, i1);
+ break;
+ }
+ }
+ }
+ }
+}
+
+/* CSE -- common subexpression elimination */
+int cse (cuc_func *f)
+{
+ int modified = 0;
+ int b, i, j, b1, i1, b2, i2;
+ for (b1 = 0; b1 < f->num_bb; b1++)
+ for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) if (f->bb[b1].insn[i1].index != II_NOP
+ && f->bb[b1].insn[i1].index != II_LRBB && !(f->bb[b1].insn[i1].type & IT_MEMORY)
+ && !(f->bb[b1].insn[i1].type & IT_MEMADD))
+ for (b2 = 0; b2 < f->num_bb; b2++)
+ for (i2 = 0; i2 < f->bb[b2].ninsn; i2++)
+ if (f->bb[b2].insn[i2].index != II_NOP && f->bb[b2].insn[i2].index != II_LRBB
+ && !(f->bb[b2].insn[i2].type & IT_MEMORY) && !(f->bb[b2].insn[i2].type & IT_MEMADD)
+ && (b1 != b2 || i2 > i1)) {
+ cuc_insn *ii1 = &f->bb[b1].insn[i1];
+ cuc_insn *ii2 = &f->bb[b2].insn[i2];
+ int ok = 1;
+
+ /* Do we have an exact match? */
+ if (ii1->index != ii2->index) continue;
+ if (ii2->type & IT_VOLATILE) continue;
+
+ /* Check all operands also */
+ for (j = 0; j < MAX_OPERANDS; j++) {
+ if (ii1->opt[j] != ii2->opt[j]) {
+ ok = 0;
+ break;
+ }
+ if (ii1->opt[j] & OPT_DEST) continue;
+ if (ii1->opt[j] != OPT_NONE && ii1->op[j] != ii2->op[j]) {
+ ok = 0;
+ break;
+ }
+ }
+
+ if (ok) {
+ /* remove duplicated instruction and relink the references */
+ cucdebug (3, "%x - %x are same\n", REF(b1, i1), REF(b2, i2));
+ change_insn_type (ii2, II_NOP);
+ modified = 1;
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (f->bb[b].insn[i].opt[j] & OPT_REF
+ && f->bb[b].insn[i].op[j] == REF (b2, i2))
+ f->bb[b].insn[i].op[j] = REF (b1, i1);
+ }
+ }
+ return modified;
+}
+
+static int count_cmovs (cuc_insn *ii, int match)
+{
+ int c = 0, j;
+ if (match & 2) {
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (ii->opt[j] & OPT_DEST) c++;
+ }
+ if (match & 1) {
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (!(ii->opt[j] & OPT_DEST) && ii->opt[j] & OPT_REF) c++;
+ } else {
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (!(ii->opt[j] & OPT_DEST) && ii->opt[j] != OPT_NONE) c++;
+ }
+ return c;
+}
+
+static void search_csm (int iter, cuc_func *f, cuc_shared_list *list);
+static cuc_shared_list *main_list;
+static int *iteration;
+
+/* CSM -- common subexpression matching -- resource sharing
+ We try to match tree of instruction inside a BB with as many
+ matches as possible. All possibilities are collected and
+ options, making situation worse are removed */
+void csm (cuc_func *f)
+{
+ int b, i, j;
+ int cnt;
+ cuc_shared_list *list;
+ cuc_timings timings;
+
+ analyse_timings (f, &timings);
+ main_list = NULL;
+ for (b = 0; b < f->num_bb; b++) {
+ assert (iteration = (int *)malloc (sizeof (int) * f->bb[b].ninsn));
+ for (i = 0; i < f->bb[b].ninsn; i++) {
+ int cnt = 0, cntc = 0;
+ double size = 0., sizec = 0.;
+ int j2 = 0;
+ for (j = 0; j < f->bb[b].ninsn; j++)
+ if (f->bb[b].insn[i].index == f->bb[b].insn[j].index) {
+ int ok = 1;
+ for (j2 = 0; j2 < MAX_OPERANDS; j2++) if (!(f->bb[b].insn[j].opt[j2] & OPT_REF))
+ if (f->bb[b].insn[j].opt[j2] != f->bb[b].insn[i].opt[j2]
+ || f->bb[b].insn[j].op[j2] != f->bb[b].insn[i].opt[j2]) {
+ ok = 0;
+ break;
+ }
+ if (ok) {
+ cntc++;
+ sizec = sizec + insn_size (&f->bb[b].insn[j]);
+ } else {
+ cnt++;
+ size = size + insn_size (&f->bb[b].insn[j]);
+ }
+ iteration[j] = 0;
+ } else iteration[j] = -1;
+ if (cntc > 1) {
+ assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list)));
+ list->next = main_list;
+ list->from = NULL;
+ list->ref = REF (b, i);
+ list->cnt = cnt;
+ list->cmatch = 1;
+ list->cmovs = count_cmovs (&f->bb[b].insn[i], 3);
+ list->osize = sizec;
+ list->size = ii_size (f->bb[b].insn[i].index, 1);
+ main_list = list;
+ search_csm (0, f, list);
+ }
+ if (cnt > 1) {
+ assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list)));
+ list->next = main_list;
+ list->from = NULL;
+ list->ref = REF (b, i);
+ list->cnt = cnt + cntc;
+ list->cmatch = 0;
+ list->cmovs = count_cmovs (&f->bb[b].insn[i], 2);
+ list->osize = size + sizec;
+ list->size = ii_size (f->bb[b].insn[i].index, 0);
+ main_list = list;
+ search_csm (0, f, list);
+ }
+ }
+ free (iteration);
+ }
+
+ for (list = main_list; list; list = list->next) list->dead = 0;
+ cnt = 0;
+ for (list = main_list; list; list = list->next) if (!list->dead) cnt++;
+ cucdebug (1, "noptions = %i\n", cnt);
+
+ /* Now we will check the real size of the 'improvements'; if the size
+ actually increases, we abandom the option */
+ for (list = main_list; list; list = list->next)
+ if (list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size >= list->osize) list->dead = 1;
+
+ cnt = 0;
+ for (list = main_list; list; list = list->next) if (!list->dead) cnt++;
+ cucdebug (1, "noptions = %i\n", cnt);
+
+ /* Count number of instructions grouped */
+ for (list = main_list; list; list = list->next) {
+ cuc_shared_list *l = list;
+ int c = 0;
+ while (l) {
+ c++;
+ if (f->INSN(l->ref).type & (IT_VOLATILE | IT_MEMORY | IT_MEMADD)) list->dead = 1;
+ l = l->from;
+ }
+ list->ninsn = c;
+ }
+
+ cnt = 0;
+ for (list = main_list; list; list = list->next)
+ if (!list->dead) cnt++;
+ cucdebug (1, "noptions = %i\n", cnt);
+
+#if 1
+ /* We can get a lot of options here, so we will delete duplicates */
+ for (list = main_list; list; list = list->next) if (!list->dead) {
+ cuc_shared_list *l;
+ for (l = list->next; l; l = l->next) if (!l->dead) {
+ int ok = 1;
+ cuc_shared_list *t1 = list;
+ cuc_shared_list *t2 = l;
+ while (ok && t1 && t2) {
+ if (f->INSN(t1->ref).index == f->INSN(t2->ref).index) {
+ /* If other operands are matching, we must check for them also */
+ if (t1->cmatch) {
+ int j;
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (!(f->INSN(t1->ref).opt[j] & OPT_REF) || !(f->INSN(t2->ref).opt[j] & OPT_REF)
+ || f->INSN(t1->ref).opt[j] != f->INSN(t2->ref).opt[j]
+ || f->INSN(t1->ref).op[j] != f->INSN(t2->ref).op[j]) {
+ ok = 0;
+ break;
+ }
+ }
+
+ /* This option is duplicate, remove */
+ if (ok) t1->dead = 1;
+ }
+ t1 = t1->from;
+ t2 = t2->from;
+ }
+ }
+ }
+ cnt = 0;
+ for (list = main_list; list; list = list->next) if (!list->dead) cnt++;
+ cucdebug (1, "noptions = %i\n", cnt);
+#endif
+ /* Print out */
+ for (list = main_list; list; list = list->next) if (!list->dead) {
+ cuc_shared_list *l = list;
+ cucdebug (1, "%-4s cnt %3i ninsn %3i size %8.1f osize %8.1f cmovs %3i @",
+ cuc_insn_name (&f->INSN(list->ref)), list->cnt, list->ninsn,
+ list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size, list->osize, list->cmovs);
+ while (l) {
+ cucdebug (1, "%c%x,", l->cmatch ? '.' : '!', l->ref);
+ l = l->from;
+ }
+ cucdebug (1, "\n");
+ }
+
+ /* Calculate estimated timings */
+ for (b = 0; b < f->num_bb; b++) {
+ cnt = 0;
+ for (list = main_list; list; list = list->next)
+ if (!list->dead && REF_BB(list->ref) == b) cnt++;
+
+ f->bb[b].ntim = cnt;
+ if (!cnt) {
+ f->bb[b].tim = NULL;
+ continue;
+ }
+ assert (f->bb[b].tim = (cuc_timings *)malloc (sizeof (cuc_timings) * cnt));
+
+ cnt = 0;
+ for (list = main_list; list; list = list->next) if (!list->dead && REF_BB(list->ref) == b) {
+ cuc_shared_list *l = list;
+ f->bb[b].tim[cnt].b = b;
+ f->bb[b].tim[cnt].preroll = f->bb[b].tim[cnt].unroll = 1;
+ f->bb[b].tim[cnt].nshared = list->ninsn;
+ assert (f->bb[b].tim[cnt].shared = (cuc_shared_item *)
+ malloc (sizeof(cuc_shared_item) * list->ninsn));
+ for (i = 0; i < list->ninsn; i++, l = l->from) {
+ f->bb[b].tim[cnt].shared[i].ref = l->ref;
+ f->bb[b].tim[cnt].shared[i].cmatch = l->cmatch;
+ }
+ f->bb[b].tim[cnt].new_time = timings.new_time + f->bb[b].cnt * (list->cnt - 1);
+ f->bb[b].tim[cnt].size = timings.size +
+ list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size - list->osize;
+ cnt++;
+ }
+ }
+}
+
+/* Recursive function for searching through instruction graph */
+static void search_csm (int iter, cuc_func *f, cuc_shared_list *list)
+{
+ int b, i, j, i1;
+ cuc_shared_list *l;
+ b = REF_BB(list->ref);
+ i = REF_I(list->ref);
+
+ for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] & OPT_REF) {
+ int t = f->bb[b].insn[i].op[j];
+ int cnt = 0, cntc = 0;
+ double size = 0., sizec = 0.;
+
+ /* Mark neighbours */
+ for (i1 = 0; i1 < f->bb[b].ninsn; i1++) {
+ if (iteration[i1] == iter && f->bb[b].insn[i1].opt[j] & OPT_REF) {
+ int t2 = f->bb[b].insn[i1].op[j];
+ if (f->INSN(t).index == f->INSN(t2).index && f->INSN(t2).opt[j] & OPT_REF) {
+ int j2;
+ int ok = 1;
+ iteration[REF_I(t2)] = iter + 1;
+ for (j2 = 0; j2 < MAX_OPERANDS; j2++) if (!(f->bb[b].insn[i1].opt[j2] & OPT_REF))
+ if (f->bb[b].insn[i1].opt[j2] != f->bb[b].insn[i].opt[j2]
+ || f->bb[b].insn[i1].op[j2] != f->bb[b].insn[i].opt[j2]) {
+ ok = 0;
+ break;
+ }
+ if (ok) {
+ cntc++;
+ sizec = sizec + insn_size (&f->bb[b].insn[i1]);
+ } else {
+ cnt++;
+ size = size + insn_size (&f->bb[b].insn[i1]);
+ }
+ }
+ }
+ }
+
+ if (cntc > 1) {
+ assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list)));
+ l->next = main_list;
+ main_list = l;
+ l->from = list;
+ l->ref = t;
+ l->cnt = cnt;
+ l->cmatch = 1;
+ l->cmovs = list->cmovs + count_cmovs (&f->bb[b].insn[i], 1) - 1;
+ l->size = list->size + ii_size (f->bb[b].insn[i].index, 1);
+ l->osize = sizec;
+ search_csm (iter + 1, f, l);
+ }
+ if (cnt > 1) {
+ assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list)));
+ l->next = main_list;
+ main_list = l;
+ l->from = list;
+ l->ref = t;
+ l->cnt = cnt + cntc;
+ l->cmatch = 0;
+ l->osize = size + sizec;
+ l->cmovs = list->cmovs + count_cmovs (&f->bb[b].insn[i], 0) - 1;
+ l->size = list->size + ii_size (f->bb[b].insn[i].index, 0);
+ search_csm (iter + 1, f, l);
+ }
+
+ /* Unmark them back */
+ for (i1 = 0; i1 < f->bb[b].ninsn; i1++) if (iteration[i1] > iter) iteration[i1] = -1;
+ }
+}
+
+/* Displays shared instructions */
+void print_shared (cuc_func *rf, cuc_shared_item *shared, int nshared)
+{
+ int i, first = 1;
+ for (i = 0; i < nshared; i++) {
+ PRINTF ("%s%s%s", first ? "" : "-", cuc_insn_name (&rf->INSN(shared[i].ref)),
+ shared[i].cmatch ? "!" : "");
+ first = 0;
+ }
+}
+
+/* Common subexpression matching -- resource sharing, generation pass
+
+ Situation here is much simpler than with analysis -- we know the instruction sequence
+ we are going to share, but we are going to do this on whole function, not just one BB.
+ We can find sequence in reference function, as pointed from "shared" */
+void csm_gen (cuc_func *f, cuc_func *rf, cuc_shared_item *shared, int nshared)
+{
+ int b, i, cnt = 0;
+#warning some code here (2)
+ PRINTF ("Replacing: ");
+ print_shared (rf, shared, nshared);
+
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++) {
+ }
+
+ PRINTF ("\nFound %i matches.\n", cnt);
+}
+
Index: verilog.c
===================================================================
--- verilog.c (nonexistent)
+++ verilog.c (revision 1765)
@@ -0,0 +1,1032 @@
+/* verilog.c -- OpenRISC Custom Unit Compiler, verilog generator
+ * Copyright (C) 2002 Marko Mlinar, markom@opencores.org
+ *
+ * This file is part of OpenRISC 1000 Architectural Simulator.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include
+#include
+#include
+#include
+
+#include "config.h"
+
+#ifdef HAVE_INTTYPES_H
+#include
+#endif
+
+#include "port.h"
+#include "arch.h"
+#include "abstract.h"
+#include "cuc.h"
+#include "insn.h"
+#include "profiler.h"
+#include "sim-config.h"
+
+/* Shortcut */
+#define GEN(x...) fprintf (fo, x)
+
+/* Find index of load/store/call */
+int find_lsc_index (cuc_func *f, int ref)
+{
+ int c = 0;
+ int i;
+ int load;
+
+ if (f->INSN(ref).index == II_CALL) {
+ for (i = 0; i < f->nmsched; i++) {
+ if (f->msched[i] == ref) break;
+ if (f->mtype[i] & MT_CALL) c++;
+ }
+ } else {
+ load = II_IS_LOAD (f->INSN(ref).index);
+ for (i = 0; i < f->nmsched; i++) {
+ if (f->msched[i] == ref) break;
+ if (load && (f->mtype[i] & MT_LOAD)
+ || !load && (f->mtype[i] & MT_STORE)) c++;
+ }
+ }
+ return c;
+}
+
+/* Print out dependencies as verilog expression */
+void print_deps (FILE *fo, cuc_func *f, int b, dep_list *t, int registered)
+{
+ if (t) {
+ int first = 0;
+ while (t) {
+ if (f->INSN(t->ref).type & IT_MEMORY) {
+ GEN ("%s%c_end[%i]", first ? " && " : "",
+ II_IS_LOAD (f->INSN(t->ref).index) ? 'l' : 's', find_lsc_index (f, t->ref));
+ } else if (f->INSN(t->ref).index == II_CALL) {
+ GEN ("%sf_end[%i]", first ? " && " : "", find_lsc_index (f, t->ref));
+ } else {
+ PRINTF ("print_deps: err %lx\n", t->ref);
+ assert (0);
+ }
+ first = 1;
+ t = t->next;
+ }
+ } else {
+ if (registered) GEN ("bb_start_r[%i]", b);
+ else GEN ("bb_start[%i]", b);
+ }
+}
+
+char *print_op_v (cuc_func *f, char *s, int ref, int j)
+{
+ unsigned long op = f->INSN(ref).op[j];
+ unsigned long opt = f->INSN(ref).opt[j];
+ switch (opt & ~OPT_DEST) {
+ case OPT_NONE: assert (0); break;
+ case OPT_CONST: if (f->INSN(ref).type & IT_COND && (f->INSN(ref).index == II_CMOV
+ || f->INSN(ref).index == II_ADD)) {
+ assert (op == 0 || op == 1);
+ sprintf (s, "1'b%lx", op);
+ } else sprintf (s, "32'h%lx", op);
+ break;
+ case OPT_REGISTER:
+ if (opt & OPT_DEST) sprintf (s, "t%x_%x", REF_BB(ref), REF_I(ref));
+ else sprintf (s, "r%li_%c", op, opt & OPT_DEST ? 'o' : 'i');
+ break;
+#if 0
+ case OPT_FREG: assert (opt & OPT_DEST);
+ sprintf (s, "fr%i_o", op);
+ break;
+#endif
+ case OPT_REF: sprintf (s, "t%lx_%lx", REF_BB(op), REF_I(op)); break;
+ }
+ return s;
+}
+
+/* Prints out specified instruction */
+void print_insn_v (FILE *fo, cuc_func *f, int b, int i)
+{
+ cuc_insn *ii = &f->bb[b].insn[i];
+ char *s = known[ii->index].rtl;
+ char tmp[200] = "";
+
+ assert (s);
+ while (*s) {
+ if (*s <= MAX_OPERANDS) {
+ char t[30];
+ sprintf (tmp, "%s%s", tmp, print_op_v (f, t, REF(b, i), *s - 1));
+ } else if (*s == '\b') sprintf (tmp, "%s%i", b);
+ else sprintf (tmp, "%s%c", tmp, *s);
+ s++;
+ }
+ GEN ("%-40s /* %s */\n", tmp, ii->disasm);
+ if (ii->type & IT_MEMORY) {
+ int nls = find_lsc_index (f, REF (b, i));
+ if (II_IS_LOAD (ii->index)) {
+ int nm;
+ for (nm = 0; nm < f->nmsched; nm++) if (f->msched[nm] == REF (b, i)) break;
+ assert (nm < f->nmsched);
+
+ GEN (" if (l_end[%i]) t%x_%x <= #Tp ", nls, b, i);
+ switch (f->mtype[nm] & (MT_WIDTH | MT_SIGNED)) {
+ case 1: GEN ("l_dat_i & 32'hff;\n");
+ break;
+ case 2: GEN ("l_dat_i & 32'hffff;\n");
+ break;
+ case 4 | MT_SIGNED:
+ case 4: GEN ("l_dat_i;\n");
+ break;
+ case 1 | MT_SIGNED:
+ GEN ("{24{l_dat_i[7]}, l_dat_i[7:0]};\n");
+ break;
+ case 2 | MT_SIGNED:
+ GEN ("{16{l_dat_i[15]}, l_dat_i[15:0]};\n");
+ break;
+ default: assert (0);
+ }
+ }
+ } else if (ii->index == II_LRBB) {
+ GEN (" if (rst) t%x_%x <= #Tp 1'b0;\n", b, i);
+ assert (f->bb[b].prev[0] >= 0);
+ if (f->bb[b].prev[0] == BBID_START)
+ GEN (" else if (bb_start[%i]) t%x_%x <= #Tp start_i;\n", b, b, i);
+ else
+ GEN (" else if (bb_start[%i]) t%x_%x <= #Tp bb_stb[%i];\n", b, b, i, f->bb[b].prev[0]);
+ } else if (ii->index == II_REG) {
+ assert (ii->opt[1] == OPT_REF);
+ GEN (" if (");
+ if (f->bb[b].mdep) print_deps (fo, f, b, f->bb[b].mdep, 0);
+ else GEN ("bb_stb[%i]", b);
+ GEN (") t%x_%x <= #Tp t%lx_%lx;\n", b, i,
+ REF_BB (ii->op[1]), REF_I (ii->op[1]));
+ }
+}
+
+/* Outputs binary number */
+static char *bin_str (unsigned long x, int len)
+{
+ static char bx[33];
+ char *s = bx;
+ while (len > 0) *s++ = '0' + ((x >> --len) & 1);
+ *s = '\0';
+ return bx;
+}
+
+/* Returns index of branch instruction inside a block b */
+static int branch_index (cuc_bb *bb)
+{
+ int i;
+ for (i = bb->ninsn - 1; i >= 0; i--)
+ if (bb->insn[i].type & IT_BRANCH) return i;
+ return -1;
+}
+
+static void print_turn_off_dep (FILE *fo, cuc_func *f, dep_list *dep)
+{
+ while (dep) {
+ assert (f->INSN(dep->ref).type & IT_MEMORY || f->INSN(dep->ref).index == II_CALL);
+ GEN (" %c_stb[%i] <= #Tp 1'b0;\n", f->INSN(dep->ref).index == II_CALL ? 'f'
+ : II_IS_LOAD (f->INSN(dep->ref).index) ? 'l' : 's', find_lsc_index (f, dep->ref));
+ dep = dep->next;
+ }
+}
+
+static int func_index (cuc_func *f, int ref)
+{
+ int i;
+ unsigned long addr;
+ assert (f->INSN(ref).index == II_CALL && f->INSN(ref).opt[0] & OPT_CONST);
+ addr = f->INSN(ref).op[0];
+ for (i = 0; i < f->nfdeps; i++)
+ if (f->fdeps[i]->start_addr == addr) return i;
+
+ assert (0);
+ return -1;
+}
+
+/* Generates verilog file out of insn dataflow */
+void output_verilog (cuc_func *f, char *filename, char *funcname)
+{
+ FILE *fo;
+ int b, i, j;
+ int ci = 0, co = 0;
+ int nloads = 0, nstores = 0, ncalls = 0;
+ char tmp[256];
+ sprintf (tmp, "%s.v", filename);
+
+ log ("Generating verilog file \"%s\"\n", tmp);
+ PRINTF ("Generating verilog file \"%s\"\n", tmp);
+ if ((fo = fopen (tmp, "wt+")) == NULL) {
+ fprintf (stderr, "Cannot open '%s'\n", tmp);
+ exit (1);
+ }
+
+ /* output header */
+ GEN ("/* %s -- generated by Custom Unit Compiler\n", tmp);
+ GEN (" (C) 2002 Opencores\n");
+ GEN (" function \"%s\"\n", funcname);
+ GEN (" at %08lx - %08lx\n", f->start_addr, f->end_addr);
+ GEN (" num BBs %i */\n\n", f->num_bb);
+
+ GEN ("`include \"timescale.v\"\n\n");
+ GEN ("module %s (clk, rst,\n", filename);
+ GEN (" l_adr_o, l_dat_i, l_req_o,\n");
+ GEN (" l_sel_o, l_linbrst_o, l_rdy_i,\n");
+ GEN (" s_adr_o, s_dat_o, s_req_o,\n");
+ GEN (" s_sel_o, s_linbrst_o, s_rdy_i,\n");
+
+ GEN ("/* inputs */ ");
+ for (i = 0; i < MAX_REGS; i++)
+ if (f->used_regs[i]) {
+ GEN ("r%i_i, ", i);
+ ci++;
+ }
+ if (!ci) GEN ("/* NONE */");
+
+ GEN ("\n/* outputs */ ");
+ for (i = 0; i < MAX_REGS; i++)
+ if (f->lur[i] >= 0 && !f->saved_regs[i]) {
+ GEN ("r%i_o, ", i);
+ co++;
+ }
+
+ if (!co) GEN ("/* NONE */");
+ if (f->nfdeps) {
+ GEN ("\n/* f. calls */, fstart_o, %sfend_i, fr11_i, ",
+ log2_int (f->nfdeps) > 0 ? "fid_o, " : "");
+ for (i = 0; i < 6; i++) GEN ("fr%i_o, ", i + 3);
+ }
+ GEN ("\n start_i, end_o, busy_o);\n\n");
+
+ GEN ("parameter Tp = 1;\n\n");
+
+ GEN ("input clk, rst;\n");
+ GEN ("input start_i;\t/* Module starts when set to 1 */ \n");
+ GEN ("output end_o;\t/* Set when module finishes, cleared upon start_i == 1 */\n");
+ GEN ("output busy_o;\t/* Set when module should not be interrupted */\n");
+ GEN ("\n/* Bus signals */\n");
+ GEN ("output l_req_o, s_req_o;\n");
+ GEN ("input l_rdy_i, s_rdy_i;\n");
+ GEN ("output [3:0] l_sel_o, s_sel_o;\n");
+ GEN ("output [31:0] l_adr_o, s_adr_o;\n");
+ GEN ("output l_linbrst_o, s_linbrst_o;\n");
+ GEN ("input [31:0] l_dat_i;\n");
+ GEN ("output [31:0] s_dat_o;\n\n");
+
+ GEN ("reg l_req_o, s_req_o;\n");
+ GEN ("reg [31:0] l_adr_o, s_adr_o;\n");
+ GEN ("reg [3:0] l_sel_o, s_sel_o;\n");
+ GEN ("reg [31:0] s_dat_o;\n");
+ GEN ("reg l_linbrst_o, s_linbrst_o;\n");
+
+ if (ci || co) GEN ("\n/* module ports */\n");
+ if (ci) {
+ int first = 1;
+ GEN ("input [31:0]");
+ for (i = 0; i < MAX_REGS; i++)
+ if (f->used_regs[i]) {
+ GEN ("%sr%i_i", first ? " " : ", ", i);
+ first = 0;
+ }
+ GEN (";\n");
+ }
+
+ if (co) {
+ int first = 1;
+ GEN ("output [31:0]");
+ for (i = 0; i < MAX_REGS; i++)
+ if (f->lur[i] >= 0 && !f->saved_regs[i]) {
+ GEN ("%sr%i_o", first ? " " : ", ", i);
+ first = 0;
+ }
+ GEN (";\n");
+ }
+
+ if (f->nfdeps) {
+ GEN ("\n/* Function calls */\n");
+ GEN ("output [31:0] fr3_o");
+ for (i = 1; i < 6; i++) GEN (", fr%i_o", i + 3);
+ GEN (";\n");
+ GEN ("input [31:0] fr11_i;\n");
+ if (log2_int(f->nfdeps) > 0) GEN ("output [%i:0] fid_o;\n", log2_int(f->nfdeps));
+ GEN ("output fstart_o;\n");
+ GEN ("input fend_i;\n");
+ }
+
+ /* Count loads & stores */
+ for (i = 0; i < f->nmsched; i++)
+ if (f->mtype[i] & MT_STORE) nstores++;
+ else if (f->mtype[i] & MT_LOAD) nloads++;
+ else if (f->mtype[i] & MT_CALL) ncalls++;
+
+ /* Output internal registers for loads */
+ if (nloads) {
+ int first = 1;
+ int num = 0;
+ GEN ("\n/* internal registers for loads */\n");
+ for (i = 0; i < f->nmsched; i++)
+ if (f->mtype[i] & MT_LOAD) {
+ GEN ("%st%x_%x", first ? "reg [31:0] " : ", ",
+ REF_BB(f->msched[i]), REF_I(f->msched[i]));
+
+ if (num >= 8) {
+ GEN (";\n");
+ first = 1;
+ num = 0;
+ } else {
+ first = 0;
+ num++;
+ }
+ }
+ if (!first) GEN (";\n");
+ }
+
+ /* Internal register for function return value */
+ if (f->nfdeps) {
+ GEN ("\n/* Internal register for function return value */\n");
+ GEN ("reg [31:0] fr11_r;\n");
+ }
+
+ GEN ("\n/* 'zero or one' hot state machines */\n");
+ if (nloads) GEN ("reg [%i:0] l_stb; /* loads */\n", nloads - 1);
+ if (nstores) GEN ("reg [%i:0] s_stb; /* stores */\n", nstores - 1);
+ GEN ("reg [%i:0] bb_stb; /* basic blocks */\n", f->num_bb - 1);
+
+ {
+ int first = 2;
+ int num = 0;
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (f->bb[b].insn[i].type & IT_COND
+ && f->bb[b].insn[i].index != II_REG
+ && f->bb[b].insn[i].index != II_LRBB) {
+ if (first == 2) GEN ("\n/* basic block condition wires */\n");
+ GEN ("%st%x_%x", first ? "wire " : ", ", b, i);
+ if (num >= 8) {
+ GEN (";\n");
+ first = 1;
+ num = 0;
+ } else {
+ first = 0;
+ num++;
+ }
+ }
+ if (!first) GEN (";\n");
+
+ GEN ("\n/* forward declaration of normal wires */\n");
+ num = 0;
+ first = 1;
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (!(f->bb[b].insn[i].type & (IT_COND | IT_BRANCH))
+ && f->bb[b].insn[i].index != II_REG
+ && f->bb[b].insn[i].index != II_LRBB) {
+ /* Exclude loads */
+ if (f->bb[b].insn[i].type & IT_MEMORY && II_IS_LOAD (f->bb[b].insn[i].index)) continue;
+ GEN ("%st%x_%x", first ? "wire [31:0] " : ", ", b, i);
+ if (num >= 8) {
+ GEN (";\n");
+ first = 1;
+ num = 0;
+ } else {
+ first = 0;
+ num++;
+ }
+ }
+ if (!first) GEN (";\n");
+
+ GEN ("\n/* forward declaration registers */\n");
+ num = 0;
+ first = 1;
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (f->bb[b].insn[i].index == II_REG
+ && f->bb[b].insn[i].index != II_LRBB) {
+ GEN ("%st%x_%x", first ? "reg [31:0] " : ", ", b, i);
+ if (num >= 8) {
+ GEN (";\n");
+ first = 1;
+ num = 0;
+ } else {
+ first = 0;
+ num++;
+ }
+ }
+ if (!first) GEN (";\n");
+
+ num = 0;
+ first = 1;
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (f->bb[b].insn[i].index != II_REG
+ && f->bb[b].insn[i].index == II_LRBB) {
+ GEN ("%st%x_%x", first ? "reg " : ", ", b, i);
+ if (num >= 8) {
+ GEN (";\n");
+ first = 1;
+ num = 0;
+ } else {
+ first = 0;
+ num++;
+ }
+ }
+ if (!first) GEN (";\n");
+ }
+
+ if (nloads || nstores) GEN ("\n/* dependencies */\n");
+ if (nloads) GEN ("wire [%i:0] l_end = l_stb & {%i{l_rdy_i}};\n",
+ nloads - 1, nloads);
+ if (nstores) GEN ("wire [%i:0] s_end = s_stb & {%i{s_rdy_i}};\n",
+ nstores - 1, nstores);
+ if (ncalls) GEN ("wire [%i:0] f_end = f_stb & {%i{fend_i}};\n",
+ ncalls - 1, ncalls);
+
+ GEN ("\n/* last dependency */\n");
+ GEN ("wire end_o = ");
+ for (b = 0; b < f->num_bb; b++) {
+ for (i = 0; i < 2; i++) if (f->bb[b].next[i] == BBID_END) {
+ GEN ("bb_stb[%i]", b);
+ if (f->bb[b].mdep) {
+ GEN (" && ");
+ print_deps (fo, f, b, f->bb[b].mdep, 0);
+ }
+ /* Is branch to BBID_END conditional? */
+ if (f->bb[b].next[1 - i] >= 0) {
+ int bidx = branch_index (&f->bb[b]);
+ char t[30];
+ print_op_v (f, t, REF (b, bidx), 1);
+ GEN (" && %s%s", i ? "" : "!", t);
+ }
+ }
+ }
+ GEN (";\n");
+ GEN ("wire busy_o = |bb_stb;\n");
+
+
+ GEN ("\n/* Basic block triggers */\n");
+ GEN ("wire [%2i:0] bb_start = {\n", f->num_bb - 1);
+ for (b = f->num_bb - 1; b >= 0; b--) {
+ GEN (" /* bb_start[%2i] */ ", b);
+ for (i = 0; i < 2; i++) if (f->bb[b].prev[i] >= 0 && f->bb[b].prev[i] != BBID_START) {
+ cuc_bb *prev = &f->bb[f->bb[b].prev[i]];
+ int t;
+ if (i) GEN ("\n || ");
+ if (prev->mdep) {
+ print_deps (fo, f, f->bb[b].prev[i], prev->mdep, 0);
+ GEN (" && ");
+ }
+ GEN ("bb_stb[%i]", f->bb[b].prev[i]);
+ if (prev->next[0] >= 0 && prev->next[0] != BBID_END
+ && prev->next[1] >= 0 && prev->next[1] != BBID_END) {
+ int bi = REF (f->bb[b].prev[i], branch_index (&f->bb[f->bb[b].prev[i]]));
+ int ci;
+ assert (bi >= 0);
+ ci = f->INSN(bi).op[1];
+ t = prev->next[0] == b;
+ GEN (" && ");
+ if (f->INSN(bi).opt[1] & OPT_REF) {
+ GEN ("%st%x_%x", t ? "" : "!", REF_BB(ci), REF_I(ci));
+ } else {
+ cucdebug (5, "%x!%x!%x\n", bi, ci, f->INSN(bi).opt[1]);
+ assert (f->INSN(bi).opt[1] & OPT_CONST);
+ GEN ("%s%i", t ? "" : "!", ci);
+ }
+ }
+ } else break;
+ if (!i) GEN ("start_i");
+ if (b == 0) GEN ("};\n");
+ else GEN (",\n");
+ }
+
+ GEN ("\n/* Register the bb_start */\n");
+ GEN ("reg [%2i:0] bb_start_r;\n\n", f->num_bb - 1);
+ GEN ("always @(posedge rst or posedge clk)\n");
+ GEN ("begin\n");
+ GEN (" if (rst) bb_start_r <= #Tp %i'b0;\n", f->num_bb);
+ GEN (" else if (end_o) bb_start_r <= #Tp %i'b0;\n", f->num_bb);
+ GEN (" else bb_start_r <= #Tp bb_start;\n");
+ GEN ("end\n");
+
+ GEN ("\n/* Logic */\n");
+ /* output body */
+ for (b = 0; b < f->num_bb; b++) {
+ GEN ("\t\t/* BB%i */\n", b);
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ print_insn_v (fo, f, b, i);
+ GEN ("\n");
+ }
+
+ if (co) {
+ GEN ("\n/* Outputs */\n");
+ for (i = 0; i < MAX_REGS; i++)
+ if (f->lur[i] >= 0 && !f->saved_regs[i])
+ GEN ("assign r%i_o = t%x_%x;\n", i, REF_BB(f->lur[i]),
+ REF_I(f->lur[i]));
+ }
+
+ if (nstores) {
+ int cur_store;
+ GEN ("\n/* Memory stores */\n");
+ GEN ("always @(s_stb");
+ for (i = 0; i < f->nmsched; i++)
+ if (f->mtype[i] & MT_STORE) {
+ char t[30];
+ unsigned long opt = f->INSN(f->msched[i]).opt[0];
+ if ((opt & ~OPT_DEST) != OPT_CONST) {
+ GEN (" or %s", print_op_v (f, t, f->msched[i], 0));
+ }
+ }
+
+ cur_store = 0;
+ GEN (")\nbegin\n");
+ for (i = 0; i < f->nmsched; i++) if (f->mtype[i] & MT_STORE) {
+ char t[30];
+ GEN (" %sif (s_stb[%i]) s_dat_o = %s;\n", cur_store == 0 ? "" : "else ", cur_store,
+ print_op_v (f, t, f->msched[i], 0));
+ cur_store++;
+ //PRINTF ("msched[%i] = %x (mtype %x) %x\n", i, f->msched[i], f->mtype[i], f->INSN(f->msched[i]).op[0]);
+ }
+ GEN (" else s_dat_o = 32'hx;\n");
+ GEN ("end\n");
+ }
+
+ /* Generate load and store state machine */
+#if 0
+ GEN ("\n/* Load&store state machine */\n");
+ GEN ("always @(posedge clk or posedge rst)\n");
+ GEN (" if (rst) begin\n");
+ if (nloads) GEN (" l_stb <= #Tp %i'h0;\n", nloads);
+ if (nstores) GEN (" s_stb <= #Tp %i'h0;\n", nstores);
+ GEN (" end else begin\n");
+ for (i = 0; i < f->nmsched; i++) if (f->mtype[i] & MT_LOAD || f->mtype[i] & MT_STORE) {
+ int cur = 0;
+ dep_list *dep = f->INSN(f->msched[i]).dep;
+ assert (f->INSN(f->msched[i]).opt[1] & (OPT_REF | OPT_REGISTER));
+ GEN (" if (");
+ print_deps (fo, f, REF_BB(f->msched[i]), f->INSN(f->msched[i]).dep, 1);
+ GEN (") begin\n");
+ print_turn_off_dep (fo, f, dep);
+ GEN (" %c_stb[%i] <= #Tp 1'b1;\n", f->mtype[i] & MT_LOAD ? 'l' : 's', cur++);
+ GEN (" end\n");
+ }
+ GEN (" if (%c_end[%i]) %c_stb <= #Tp %i'h0;\n", c, cur - 1, c, cur);
+ GEN (" end\n");
+#endif
+
+ /* Generate state generator machine */
+ for (j = 0; j < 2; j++) {
+ char c;
+ char *s;
+
+ switch (j) {
+ case 0: c = 'l'; s = "Load"; break;
+ case 1: c = 's'; s = "Store"; break;
+ case 2: c = 'c'; s = "Calls"; break;
+ }
+ if (j == 0 && nloads
+ || j == 1 && nstores
+ || j == 2 && ncalls) {
+ int cur = 0;
+ char t[30];
+
+ GEN ("\n/* %s state generator machine */\n", s);
+ GEN ("always @(");
+ for (i = 0; i < f->nmsched; i++) {
+ print_op_v (f, t, f->msched[i], 1);
+ GEN ("%s or ", t);
+ }
+ GEN ("bb_start_r");
+ if (nloads) GEN (" or l_end");
+ if (nstores) GEN (" or s_end");
+ GEN (")\n");
+ GEN ("begin\n ");
+ cucdebug (1, "%s\n", s);
+ for (i = 0; i < f->nmsched; i++)
+ if (j == 0 && f->mtype[i] & MT_LOAD
+ || j == 1 && f->mtype[i] & MT_STORE
+ || j == 2 && f->mtype[i] & MT_CALL) {
+ cucdebug (1, "msched[%i] = %x (mtype %x)\n", i, f->msched[i], f->mtype[i]);
+ assert (f->INSN(f->msched[i]).opt[1] & (OPT_REF | OPT_REGISTER));
+ GEN ("if (");
+ print_deps (fo, f, REF_BB(f->msched[i]), f->INSN(f->msched[i]).dep, 1);
+ GEN (") begin\n");
+ GEN (" %c_req_o = 1'b1;\n", c);
+ GEN (" %c_sel_o[3:0] = 4'b", c);
+ switch (f->mtype[i] & MT_WIDTH) {
+ case 1: GEN ("0001 << (%s & 32'h3);\n",
+ print_op_v (f, t, f->msched[i], 1)); break;
+ case 2: GEN ("0011 << ((%s & 32'h1) << 1);\n",
+ print_op_v (f, t, f->msched[i], 1)); break;
+ case 4: GEN ("1111;\n"); break;
+ default: assert (0);
+ }
+ GEN (" %c_linbrst_o = 1'b%i;\n", c,
+ (f->mtype[i] & MT_BURST) && !(f->mtype[i] & MT_BURSTE) ? 1 : 0);
+ GEN (" %c_adr_o = t%lx_%lx & ~32'h3;\n", c,
+ REF_BB(f->INSN(f->msched[i]).op[1]), REF_I(f->INSN(f->msched[i]).op[1]));
+ GEN (" end else ");
+ }
+ GEN ("if (%c_end[%i]) begin\n", c, cur - 1);
+ GEN (" %c_req_o = 1'b0;\n", c);
+ GEN (" %c_sel_o[3:0] = 4'bx;\n", c);
+ GEN (" %c_linbrst_o = 1'b0;\n", c);
+ GEN (" %c_adr_o = 32'hx;\n", c);
+ GEN (" end else begin\n");
+ GEN (" %c_req_o = 1'b0;\n", c);
+ GEN (" %c_sel_o[3:0] = 4'bx;\n", c);
+ GEN (" %c_linbrst_o = 1'b0;\n", c);
+ GEN (" %c_adr_o = 32'hx;\n", c);
+ GEN (" end\n");
+ GEN ("end\n");
+ }
+ }
+
+ if (ncalls) {
+ int cur_call = 0;
+ GEN ("\n/* Function calls state machine */\n");
+ GEN ("always @(posedge clk or posedge rst)\n");
+ GEN ("begin\n");
+ GEN (" if (rst) begin\n");
+ GEN (" f_stb <= #Tp %i'h0;\n", nstores);
+ for (i = 0; i < 6; i++) GEN (" fr%i_o <= #Tp 32'h0;\n", i + 3);
+ if (log2_int(ncalls)) GEN (" fid_o <= #Tp %i'h0;\n", log2_int (f->nfdeps));
+ GEN (" fstart_o <= #Tp 1'b0;\n");
+ //GEN (" f11_r <= #Tp 32'h0;\n");
+ GEN (" end else begin\n");
+ cucdebug (1, "calls \n");
+ for (i = 0; i < f->nmsched; i++) if (f->mtype[i] & MT_CALL) {
+ dep_list *dep = f->INSN(f->msched[i]).dep;
+ cucdebug (1, "msched[%i] = %x (mtype %x)\n", i, f->msched[i], f->mtype[i]);
+ assert (f->INSN(f->msched[i]).opt[1] & (OPT_REF | OPT_REGISTER));
+ GEN (" if (");
+ print_deps (fo, f, REF_BB(f->msched[i]), f->INSN(f->msched[i]).dep, 1);
+ GEN (") begin\n");
+ print_turn_off_dep (fo, f, dep);
+ GEN (" f_stb[%i] <= #Tp 1'b1;\n", cur_call++);
+ GEN (" fstart_o <= #Tp 1'b1;\n");
+ if (log2_int (f->nfdeps))
+ GEN (" fid_o <= #Tp %i'h%x;\n", log2_int (f->nfdeps), func_index (f, f->msched[i]));
+
+ for (j = 0; j < 6; j++)
+ GEN (" fr%i_o <= #Tp t%x_%x;\n", j + 3,
+ REF_BB (f->msched[i]), REF_I (f->msched[i]) - 6 + i);
+ GEN (" end\n");
+ }
+ GEN (" if (f_end[%i]) begin\n", ncalls - 1);
+ GEN (" f_stb <= #Tp %i'h0;\n", ncalls);
+ GEN (" f_start_o <= #Tp 1'b0;\n");
+ GEN (" end\n");
+ GEN (" end\n");
+ GEN ("end\n");
+ }
+
+ GEN ("\n/* Basic blocks state machine */\n");
+ GEN ("always @(posedge clk or posedge rst)\n");
+ GEN ("begin\n");
+ GEN (" if (rst) bb_stb <= #Tp %i'h%x;\n", f->num_bb, 0);
+ GEN (" else if (end_o) begin\n");
+ GEN (" bb_stb <= #Tp %i'h%x;\n", f->num_bb, 0);
+ for (i = 0; i < f->num_bb; i++) {
+ GEN (" end else if (bb_start[%i]) begin\n", i);
+ GEN (" bb_stb <= #Tp %i'h%x;\n", f->num_bb, 1 << i);
+ }
+ GEN (" end else if (end_o) begin\n");
+ GEN (" bb_stb <= #Tp %i'h%x;\n", f->num_bb, 0);
+ GEN (" end\n");
+ GEN ("end\n");
+
+ /* output footer */
+ GEN ("\nendmodule\n");
+
+ fclose (fo);
+}
+
+void generate_main (int nfuncs, cuc_func **f, char *filename)
+{
+ FILE *fo;
+ int i, j, nrf, first;
+ char tmp[256];
+ int ncallees[MAX_FUNCS];
+ int nl[MAX_FUNCS], ns[MAX_FUNCS];
+ int maxncallees = 0;
+ sprintf (tmp, "%s_top.v", filename);
+
+ for (i = 0, nrf = 0; i < nfuncs; i++) {
+ nl[i] = ns[i] = 0;
+ ncallees[i] = 0;
+ if (f[i]) {
+ f[i]->tmp = nrf++;
+ for (j = 0; j < f[i]->nmsched; j++)
+ if (f[i]->mtype[j] & MT_LOAD) nl[i]++;
+ else if (f[i]->mtype[j] & MT_STORE) ns[i]++;
+ for (j = 0; j < f[i]->nfdeps; j++)
+ ncallees[f[i]->fdeps[j]->tmp]++;
+ }
+ }
+ if (!nrf) return;
+
+ for (i = 0; i < nrf; i++)
+ if (maxncallees < ncallees[i]) maxncallees = ncallees[i];
+
+ log ("Generating verilog file \"%s\"\n", tmp);
+ PRINTF ("Generating verilog file \"%s\"\n", tmp);
+ if ((fo = fopen (tmp, "wt+")) == NULL) {
+ fprintf (stderr, "Cannot open '%s'\n", tmp);
+ exit (1);
+ }
+
+ /* output header */
+ GEN ("/* %s -- generated by Custom Unit Compiler\n", tmp);
+ GEN (" (C) 2002 Opencores */\n\n");
+ GEN ("/* Includes %i functions:", nrf);
+ for (i = 0; i < nfuncs; i++) if (f[i])
+ GEN ("\n%s", prof_func[i].name);
+ GEN (" */\n\n");
+
+ GEN ("`include \"timescale.v\"\n\n");
+ GEN ("module %s (clk, rst,\n", filename);
+ GEN (" /* Load and store master Wishbone ports */\n");
+ GEN (" l_adr_o, l_dat_i, l_cyc_o, l_stb_o,\n");
+ GEN (" l_sel_o, l_linbrst_o, l_rdy_i, l_we_o,\n");
+ GEN (" s_adr_o, s_dat_o, s_cyc_o, s_stb_o,\n");
+ GEN (" s_sel_o, s_linbrst_o, s_rdy_i, s_we_o,\n\n");
+ GEN (" /* cuc interface */\n");
+ GEN (" cuc_stb_i, cuc_adr_i, cuc_dat_i, cuc_dat_o, cuc_we_i, cuc_rdy_o);\n\n");
+
+ GEN ("parameter Tp = 1;\n");
+ GEN ("\n/* module ports */\n");
+ GEN ("input clk, rst, cuc_stb_i, cuc_we_i;\n");
+ GEN ("input l_rdy_i, s_rdy_i;\n");
+ GEN ("output l_cyc_o, l_stb_o, l_we_o, l_linbrst_o;\n");
+ GEN ("reg l_cyc_o, l_stb_o, l_we_o, l_linbrst_o;\n");
+ GEN ("output s_cyc_o, s_stb_o, s_we_o, s_linbrst_o;\n");
+ GEN ("reg s_cyc_o, s_stb_o, s_we_o, s_linbrst_o;\n");
+ GEN ("output cuc_rdy_o; /* Not registered ! */\n");
+ GEN ("output [3:0] l_sel_o, s_sel_o;\n");
+ GEN ("reg [3:0] l_sel_o, s_sel_o;\n");
+ GEN ("output [31:0] l_adr_o, s_adr_o, s_dat_o, cuc_dat_o;\n");
+ GEN ("reg [31:0] l_adr_o, s_adr_o, s_dat_o, cuc_dat_o;\n");
+ GEN ("input [15:0] cuc_adr_i;\n");
+ GEN ("input [31:0] l_dat_i, cuc_dat_i;\n\n");
+
+ GEN ("wire [%2i:0] i_we, i_re, i_finish, i_selected, i_first_reg;\n", nrf - 1);
+ GEN ("wire [%2i:0] i_bidok, i_start_bid, i_start_bidok, main_start, main_end;\n", nrf - 1);
+ GEN ("wire [%2i:0] i_start, i_end, i_start_block, i_busy;\n", nrf - 1);
+ GEN ("wire [%2i:0] i_l_req, i_s_req;\n", nrf - 1);
+ GEN ("reg [%2i:0] i_go_bsy, main_start_r;\n", nrf - 1);
+
+ GEN ("assign i_selected = {\n");
+ for (i = 0; i < nrf; i++)
+ GEN (" cuc_adr_i[15:6] == %i%s\n", i, i < nrf - 1 ? "," : "};");
+
+ GEN ("assign i_first_reg = {\n");
+ for (i = 0; i < nfuncs; i++) if (f[i]) {
+ for (j = 0; j <= MAX_REGS; j++) if (f[i]->used_regs[j]) break;
+ GEN (" cuc_adr_i[5:0] == %i%s\n", j, f[i]->tmp < nrf - 1 ? "," : "};");
+ }
+
+ GEN ("assign i_we = {%i{cuc_stb_i && cuc_we_i}} & i_selected;\n", nrf);
+ GEN ("assign i_re = {%i{cuc_stb_i && !cuc_we_i}} & i_selected;\n", nrf);
+
+ GEN ("assign i_start = i_go_bsy & {%i{cuc_rdy_o}};\n", nrf);
+ GEN ("assign i_start_bidok = {\n");
+ for (i = 0; i < nrf; i++)
+ GEN (" i_start_bid[%i] < %i%s\n", i, i, i < nrf - 1 ? "," : "};");
+ GEN ("assign main_start = i_start & i_selected & i_first_reg & i_we;\n");
+ GEN ("assign main_end = {%i{i_end}} & i_selected;\n");
+
+ GEN ("\nalways @(posedge clk or posedge rst)\n");
+ GEN ("begin\n");
+ GEN (" if (rst) i_go_bsy <= #Tp %i'b0;\n", nrf);
+ GEN (" else i_go_bsy <= #Tp i_we | ~i_finish & i_go_bsy;\n");
+ GEN ("end\n");
+
+
+ /* Function specific data */
+ for (i = 0; i < nfuncs; i++) if (f[i]) {
+ int ci = 0, co = 0;
+ int fn = f[i]->tmp;
+ GEN ("\n/* Registers for function %s */\n", prof_func[i].name);
+ for (j = 0, first = 1; j < MAX_REGS; j++) if (f[i]->used_regs[j]) {
+ GEN ("%s i%i_r%ii", first ? "/* inputs */\nreg [31:0]" : ",", fn, j);
+ first = 0;
+ ci++;
+ }
+ if (ci) GEN (";\n");
+
+ for (j = 0, first = 1; j < MAX_REGS; j++)
+ if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j]) {
+ GEN ("%s i%i_r%io", first ? "/* outputs */\nwire [31:0]" : ",", fn, j);
+ first = 0;
+ co++;
+ }
+ if (co) GEN (";\n");
+ GEN ("wire [31:0] i%i_l_adr, i%i_s_adr;\n", fn, fn);
+
+ GEN ("always @(posedge clk or posedge rst)\n");
+ GEN (" if (rst) main_start_r <= #Tp %i'b0;\n", nrf);
+ GEN (" else main_start_r <= #Tp main_start & i_start_bidok | i_busy | ~i_end & main_start_r;\n");
+
+ if (ci) {
+ GEN ("\n/* write register access */\n");
+ GEN ("always @(posedge clk or posedge rst)\n");
+ GEN ("begin\n");
+ GEN (" if (rst) begin\n");
+ for (j = 0; j < MAX_REGS; j++) if (f[i]->used_regs[j])
+ GEN (" i%i_r%ii <= #Tp 32'h0;\n", fn, j);
+ GEN (" end else if (!i_go_bsy[%i] && i_we[%i])\n", fn, fn);
+ GEN (" case (cuc_adr_i[5:0])\n");
+ for (j = 0; j < MAX_REGS; j++) if (f[i]->used_regs[j])
+ GEN (" %-2i: i%i_r%ii <= #Tp cuc_dat_i;\n", j, fn, j);
+ GEN (" endcase\n");
+ GEN ("end\n");
+ }
+
+ GEN ("\n");
+ }
+
+ /* Generate machine for reading all function registers. Register read can be
+ delayed till function completion */
+ {
+ int co;
+ GEN ("/* read register access - data */\n");
+ GEN ("always @(posedge clk or posedge rst)\n");
+ GEN (" if (rst) cuc_dat_o <= #Tp 32'h0;\n");
+ GEN (" else if (cuc_stb_i && cuc_we_i) begin\n");
+ GEN (" ");
+
+ for (i = 0; i < nfuncs; i++) if (f[i]) {
+ co = 0;
+ for (j = 0; j < MAX_REGS; j++)
+ if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j]) co++;
+
+ GEN ("if (cuc_adr_i[15:6] == %i)", f[i]->tmp);
+ if (co) {
+ first = 1;
+ GEN ("\n case (cuc_adr_i[5:0])\n");
+ for (j = 0; j < MAX_REGS; j++)
+ if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j])
+ GEN (" %-2i: cuc_dat_o <= #Tp i%i_r%io;\n", j, f[i]->tmp, j);
+ GEN (" endcase\n");
+ } else {
+ GEN (" cuc_dat_o <= #Tp 32'hx;\n");
+ }
+ GEN (" else ");
+ }
+ GEN ("cuc_dat_o <= #Tp 32'hx;\n");
+ GEN (" end else cuc_dat_o <= #Tp 32'hx;\n");
+
+ GEN ("\n/* read register access - acknowledge */\n");
+ GEN ("assign cuc_rdy_o = cuc_stb_i && cuc_we_i && |(i_selected & main_end);\n");
+ }
+
+ /* Store/load Wishbone bridge */
+ for (j = 0; j < 2; j++) {
+ char t = j ? 's' : 'l';
+ GEN ("\n/* %s Wishbone bridge */\n", j ? "store" : "load");
+ GEN ("reg [%i:0] %cm_sel;\n", log2_int (nrf), t);
+ GEN ("reg [%i:0] %cm_bid;\n", log2_int (nrf), t);
+ GEN ("reg %ccyc_ip;\n\n", t);
+ GEN ("always @(posedge clk)\n");
+ GEN ("begin\n");
+ GEN (" %c_we_o <= #Tp 1'b%i;\n", t, j);
+ GEN (" %c_cyc_o <= #Tp |i_%c_req;\n", t, t);
+ GEN (" %c_stb_o <= #Tp |i_%c_req;\n", t, t);
+ GEN ("end\n");
+
+ GEN ("\n/* highest bid */\n");
+ GEN ("always @(");
+ for (i = 0; i < nrf; i++) GEN ("%si_%c_req", i > 0 ? " or " : "", t);
+ GEN (")\n");
+ for (i = 0; i < nrf; i++) GEN (" %sif (i_%c_req) %cm_bid = %i'h%x;\n",
+ i ? "else " : "", t, t, log2_int (nrf) + 1, i);
+
+ GEN ("\n/* selected transfer */\n");
+ GEN ("always @(posedge clk or posedge rst)\n");
+ GEN (" if (rst) %cm_sel <= #Tp %i'h0;\n", t, log2_int (nrf) + 1);
+ GEN (" else if (%c_rdy_i) %cm_sel <= #Tp %i'h0;\n", t, t, log2_int (nrf) + 1);
+ GEN (" else if (!%ccyc_ip) %cm_sel <= #Tp %cm_bid;\n", t, t, t);
+
+ GEN ("\n/* Cycle */\n");
+ GEN ("\nalways @(posedge clk or posedge rst)\n");
+ GEN (" if (rst) %ccyc_ip <= #Tp 1'b0;\n", t);
+ GEN (" else if (%c_rdy_i) %ccyc_ip <= #Tp 1'b0;\n", t, t);
+ GEN (" else %ccyc_ip <= #Tp %c_cyc_o;\n", t, t);
+ }
+
+ GEN ("\n/* Acknowledge */\n");
+ for (i = 0; i < nrf; i++) {
+ GEN ("wire i%i_s_rdy = ((sm_bid == %i & !scyc_ip) | sm_sel == %i) & s_rdy_i;\n", i, i, i);
+ GEN ("wire i%i_l_rdy = ((lm_bid == %i & !lcyc_ip) | lm_sel == %i) & l_rdy_i;\n", i, i, i);
+ }
+
+ GEN ("\n/* data, address selects and burst enables */\n");
+ for (i = 0; i < nrf; i++) GEN ("wire [31:0] i%i_s_dat;\n", i);
+ for (i = 0; i < nrf; i++) GEN ("wire i%i_s_linbrst, i%i_l_linbrst;\n", i, i);
+ for (i = 0; i < nrf; i++) GEN ("wire [3:0] i%i_s_sel, i%i_l_sel;\n", i, i);
+ for (i = 0; i < nrf; i++) GEN ("wire [31:0] i%i_l_dat = l_dat_i;\n", i);
+ GEN ("\nalways @(posedge clk)\n");
+ GEN ("begin\n");
+ GEN (" s_dat_o <= #Tp ");
+ for (i = 0; i < nrf - 1; i++)
+ GEN ("\n sm_bid == %i ? i%i_s_dat : ", i, i);
+ GEN ("i%i_s_dat;\n", nrf - 1);
+ GEN (" s_adr_o <= #Tp ");
+ for (i = 0; i < nrf - 1; i++)
+ GEN ("\n sm_bid == %i ? i%i_s_adr : ", i, i);
+ GEN ("i%i_s_adr;\n", nrf - 1);
+ GEN (" s_sel_o <= #Tp ");
+ for (i = 0; i < nrf - 1; i++)
+ GEN ("\n sm_bid == %i ? i%i_s_sel : ", i, i);
+ GEN ("i%i_s_sel;\n", nrf - 1);
+ GEN (" s_linbrst_o <= #Tp ");
+ for (i = 0; i < nrf - 1; i++)
+ GEN ("\n sm_bid == %i ? i%i_s_linbrst : ", i, i);
+ GEN ("i%i_s_linbrst;\n", nrf - 1);
+ GEN ("end\n\n");
+
+ GEN ("always @(posedge clk)\n");
+ GEN ("begin\n");
+ GEN (" l_adr_o <= #Tp ");
+ for (i = 0; i < nrf - 1; i++)
+ GEN ("\n lm_bid == %i ? i%i_l_adr : ", i, i);
+ GEN ("i%i_l_adr;\n", nrf - 1);
+ GEN (" l_sel_o <= #Tp ");
+ for (i = 0; i < nrf - 1; i++)
+ GEN ("\n lm_bid == %i ? i%i_l_sel : ", i, i);
+ GEN ("i%i_l_sel;\n", nrf - 1);
+ GEN (" l_linbrst_o <= #Tp ");
+ for (i = 0; i < nrf - 1; i++)
+ GEN ("\n lm_bid == %i ? i%i_l_linbrst : ", i, i);
+ GEN ("i%i_l_linbrst;\n", nrf - 1);
+ GEN ("end\n\n");
+
+ /* start/end signals */
+ GEN ("\n\n/* start/end signals */\n");
+ for (i = 0; i < nrf; i++) {
+ if (log2_int (maxncallees + 1))
+ GEN ("wire [%i:0] i%i_current = i%i_busy ? i%i_current_r : i%i_start_bid;\n",
+ log2_int (maxncallees + 1), i, i, i, i, i);
+ else GEN ("wire i%i_current = 0;\n", i);
+ }
+ GEN ("\n");
+
+ for (i = 0, j = 0; i < nfuncs; i++) if (f[i]) {
+ if (log2_int (ncallees[i])) {
+ GEN ("reg [%i:0] i%i_start_bid;\n", log2_int (ncallees[i]), j);
+ GEN ("always @(start%i", f[i]->tmp);
+ for (j = 0, first = 1; j < f[i]->nfdeps; j++)
+ if (f[i]->fdeps[j]) GEN (", ");
+ GEN (")\n");
+ GEN ("begin !!!\n"); //TODO
+ GEN (" \n");
+ GEN ("end\n");
+ }
+ GEN ("wire i%i_start = main_start[%i];\n", j, j);
+ j++;
+ }
+ GEN ("\n");
+
+ for (i = 0; i < nfuncs; i++) if (f[i]) {
+ int nf = f[i]->tmp;
+ GEN ("\n%s%s i%i(.clk(clk), .rst(rst),\n", filename, prof_func[i].name, nf);
+ GEN (" .l_adr_o(i%i_l_adr), .l_dat_i(i%i_l_dat), .l_req_o(i_l_req[%i]),\n",
+ nf, nf, nf);
+ GEN (" .l_sel_o(i%i_l_sel), .l_linbrst_o(i%i_l_linbrst), .l_rdy_i(i%i_l_rdy),\n",
+ nf, nf, nf);
+ GEN (" .s_adr_o(i%i_s_adr), .s_dat_o(i%i_s_dat), .s_req_o(i_s_req[%i]),\n",
+ nf, nf, nf);
+ GEN (" .s_sel_o(i%i_s_sel), .s_linbrst_o(i%i_s_linbrst), .s_rdy_i(i%i_s_rdy),\n",
+ nf, nf, nf);
+ GEN (" ");
+ for (j = 0, first = 1; j < MAX_REGS; j++) if (f[i]->used_regs[j])
+ GEN (".r%i_i(i%i_r%ii), ", j, nf, j), first = 0;
+
+ if (first) GEN ("\n ");
+ for (j = 0, first = 1; j < MAX_REGS; j++)
+ if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j])
+ GEN (".r%i_o(i%i_r%io), ", j, nf, j), first = 0;
+ if (first) GEN ("\n ");
+ if (f[i]->nfdeps) {
+ GEN (".fstart_o(i_fstart[%i]), .fend_i(i_fend[%i]), .fid_o(i%i_fid),\n", i, i, i),
+ GEN (" .fr3_o(i%i_fr3), .fr4_o(i%i_fr4), .fr5_o(i%i_fr5), .fr6_o(i%i_fr6),\n");
+ GEN (" .fr7_o(i%i_fr7), .fr8_o(i%i_fr8), .fr11_i(i%i_fr11i),\n ");
+ }
+ GEN (".start_i(i_start[%i]), .end_o(i_end[%i]), .busy_o(i_busy[%i]));\n", nf, nf, nf);
+ }
+
+ /* output footer */
+ GEN ("\nendmodule\n");
+
+ fclose (fo);
+}
+
verilog.c
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: memory.c
===================================================================
--- memory.c (nonexistent)
+++ memory.c (revision 1765)
@@ -0,0 +1,542 @@
+/* memory.c -- OpenRISC Custom Unit Compiler, memory optimization and scheduling
+ * Copyright (C) 2002 Marko Mlinar, markom@opencores.org
+ *
+ * This file is part of OpenRISC 1000 Architectural Simulator.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include
+#include
+#include
+#include
+
+#include "config.h"
+
+#ifdef HAVE_INTTYPES_H
+#include
+#endif
+
+#include "port.h"
+#include "arch.h"
+#include "abstract.h"
+#include "sim-config.h"
+#include "cuc.h"
+#include "insn.h"
+
+
+/* Cleans memory & data dependencies */
+void clean_deps (cuc_func *f)
+{
+ int b, i;
+ dep_list *t;
+ for (b = 0; b < f->num_bb; b++) {
+ for (i = 0; i < f->bb[b].ninsn; i++) {
+ t = f->bb[b].insn[i].dep;
+ while (t) {
+ dep_list *tmp = t;
+ t = t->next;
+ free (tmp);
+ }
+ f->bb[b].insn[i].dep = NULL;
+ }
+
+ t = f->bb[b].mdep;
+ while (t) {
+ dep_list *tmp = t;
+ t = t->next;
+ free (tmp);
+ }
+ f->bb[b].mdep = NULL;
+ }
+
+ f->nmsched = 0;
+}
+
+/* Checks for memory conflicts between two instructions; returns 1 if detected
+ 0 - exact; 1 - strong; 2 - weak; 3 - none */
+static int check_memory_conflict (cuc_func *f, cuc_insn *a, cuc_insn *b, int otype)
+{
+ switch (otype) {
+ case MO_EXACT: /* exact */
+ case MO_STRONG: /* strong */
+ return 1;
+ case MO_WEAK: /* weak */
+ assert (a->type & IT_MEMORY);
+ assert (b->type & IT_MEMORY);
+ if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD
+ &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) {
+ int aw, bw;
+ assert ((aw = II_MEM_WIDTH (a->index)) >= 0);
+ assert ((bw = II_MEM_WIDTH (b->index)) >= 0);
+
+ a = &f->INSN(a->op[1]);
+ b = &f->INSN(b->op[1]);
+ if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1]
+ || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) return 1;
+
+ /* Check if they overlap */
+ if (a->op[2] >= b->op[2] && a->op[2] < b->op[2] + bw) return 1;
+ if (b->op[2] >= a->op[2] && b->op[2] < a->op[2] + aw) return 1;
+ return 0;
+ } else return 1;
+ case MO_NONE: /* none */
+ return 0;
+ default:
+ assert (0);
+ }
+ return 1;
+}
+
+/* Adds memory dependencies based on ordering type:
+ 0 - exact; 1 - strong; 2 - weak; 3 - none */
+void add_memory_dep (cuc_func *f, int otype)
+{
+ int b, i;
+ dep_list *all_mem = NULL;
+
+ for (b = 0; b < f->num_bb; b++) {
+ cuc_insn *insn = f->bb[b].insn;
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (insn[i].type & IT_MEMORY) {
+ dep_list *tmp = all_mem;
+ while (tmp) {
+ //PRINTF ("%x %x\n", REF (b,i), tmp->ref);
+ if (check_memory_conflict (f, &insn[i], &f->INSN(tmp->ref), otype))
+ add_dep (&insn[i].dep, tmp->ref);
+ tmp = tmp->next;
+ }
+ add_dep (&all_mem, REF (b, i));
+ }
+ }
+ dispose_list (&all_mem);
+}
+
+/* Check if they address the same location, so we can join them */
+static int same_transfers (cuc_func *f, int otype)
+{
+ int i, j;
+ int modified = 0;
+ if (otype == MO_WEAK || otype == MO_NONE) {
+ for (i = 1, j = 1; i < f->nmsched; i++)
+ /* Exclude memory stores and different memory types */
+ if (f->mtype[i - 1] == f->mtype[i] && f->mtype[i] & MT_LOAD) {
+ cuc_insn *a = &f->INSN(f->msched[i - 1]);
+ cuc_insn *b = &f->INSN(f->msched[i]);
+ if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD
+ &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) {
+ a = &f->INSN(a->op[1]);
+ b = &f->INSN(b->op[1]);
+ /* Not in usual form? */
+ if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1]
+ || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) goto keep;
+
+ //PRINTF ("%i %i, ", a->op[2], b->op[2]);
+
+ /* Check if they are the same => do not copy */
+ if (a->op[2] == b->op[2]
+ && REF_BB(f->msched[i - 1]) == REF_BB(f->msched[i])) {
+ /* yes => remove actual instruction */
+ int t1 = MIN (f->msched[i - 1], f->msched[i]);
+ int t2 = MAX (f->msched[i - 1], f->msched[i]);
+ int b, i, j;
+ cucdebug (2, "Removing %x_%x and using %x_%x instead.\n",
+ REF_BB(t2), REF_I(t2), REF_BB(t1), REF_I(t1));
+ change_insn_type (&f->INSN(t2), II_NOP);
+ modified = 1;
+ /* Update references */
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == t2)
+ f->bb[b].insn[i].op[j] = t1;
+
+ } else goto keep;
+ } else goto keep;
+ } else {
+keep:
+ f->msched[j] = f->msched[i];
+ f->mtype[j++] = f->mtype[i];
+ }
+ f->nmsched = j;
+ }
+ return modified;
+}
+
+/* Check if two consecutive lb[zs] can be joined into lhz and if
+ two consecutive lh[zs] can be joined into lwz */
+static int join_transfers (cuc_func *f, int otype)
+{
+ int i, j;
+ int modified = 0;
+
+ /* We can change width even with strong memory ordering */
+ if (otype == MO_WEAK || otype == MO_NONE || otype == MO_STRONG) {
+ for (i = 1, j = 1; i < f->nmsched; i++)
+ /* Exclude memory stores and different memory types */
+ if (f->mtype[i - 1] == f->mtype[i] && f->mtype[i] & MT_LOAD) {
+ cuc_insn *a = &f->INSN(f->msched[i - 1]);
+ cuc_insn *b = &f->INSN(f->msched[i]);
+ int aw = f->mtype[i - 1] & MT_WIDTH;
+ if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD
+ &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) {
+ a = &f->INSN(a->op[1]);
+ b = &f->INSN(b->op[1]);
+
+ /* Not in usual form? */
+ if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1]
+ || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) goto keep;
+
+ /* Check if they touch together */
+ if (a->op[2] + aw == b->op[2]
+ && REF_BB(f->msched[i - 1]) == REF_BB(f->msched[i])) {
+ /* yes => remove second instruction */
+ int t1 = MIN (f->msched[i - 1], f->msched[i]);
+ int t2 = MAX (f->msched[i - 1], f->msched[i]);
+ dep_list *t1dep = f->INSN(t1).dep;
+ int x, p;
+ cuc_insn *ii;
+
+ cucdebug (2, "Joining %x and %x.\n", t1, t2);
+ if (cuc_debug >= 8) print_cuc_bb (f, "PREJT");
+ change_insn_type (&f->INSN(t1), II_NOP);
+ change_insn_type (&f->INSN(t2), II_NOP);
+ /* We will reuse the memadd before the first load, and add some
+ custom code at the end */
+ insert_insns (f, t1, 10);
+ if (cuc_debug > 8) print_cuc_bb (f, "PREJT2");
+
+ /* Remove all dependencies to second access */
+ for (x = 0; x < f->num_bb; x++) {
+ int i;
+ for (i = 0; i < f->bb[x].ninsn; i++) {
+ dep_list *d = f->bb[x].insn[i].dep;
+ dep_list **old = &f->bb[x].insn[i].dep;
+ while (d) {
+ if (d->ref == t2) {
+ d = d->next;
+ *old = d;
+ } else {
+ d = d->next;
+ old = &((*old)->next);
+ }
+ }
+ }
+ }
+
+ /* Build the folowing code:
+ l[hw]z p-1
+ and p-1, 0xff
+ sfle p-1, 0x7f
+ or p-2, 0xffffff00
+ cmov p-3, p-1, p-2
+ shr p-5, 8
+ and p-1, 0xff
+ sfle p-1 0x7f
+ or p-2 0xffffff00
+ cmov p-3, p-1, p-2*/
+ p = REF_I(t1);
+ cucdebug (8, "%x %x\n", f->mtype[i - 1], f->mtype[i]);
+ for (x = 0; x < 2; x++) {
+ int t = f->mtype[i - 1 + x];
+ ii = &f->bb[REF_BB(t1)].insn[p];
+ if (!x) {
+ change_insn_type (ii, aw == 1 ? II_LH : II_LW);
+ ii->type = IT_MEMORY | IT_VOLATILE;
+ ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = t1 - 1; ii->opt[1] = OPT_REF;
+ ii->opt[2] = ii->opt[3] = OPT_NONE;
+ ii->dep = t1dep;
+ f->mtype[i - 1] = MT_LOAD | (aw == 1 ? 2 : 4);
+ f->msched[i - 1] = REF (REF_BB(t1), p);
+ } else {
+ change_insn_type (ii, II_SRL);
+ ii->type = 0;
+ ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = t1; ii->opt[1] = OPT_REF;
+ ii->op[2] = 8; ii->opt[2] = OPT_CONST;
+ ii->opt[3] = OPT_NONE;
+ }
+
+ ii = &f->bb[REF_BB(t1)].insn[++p];
+ change_insn_type (ii, II_AND);
+ ii->type = 0;
+ ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = REF (REF_BB(t1), p - 1); ii->opt[1] = OPT_REF;
+ ii->op[2] = 0xff; ii->opt[2] = OPT_CONST;
+ ii->opt[3] = OPT_NONE;
+
+ ii = &f->bb[REF_BB(t1)].insn[++p];
+ change_insn_type (ii, II_SFLE);
+ ii->type = IT_COND;
+ ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = REF (REF_BB(t1), p - 1); ii->opt[1] = OPT_REF;
+ ii->op[2] = 0x7f; ii->opt[2] = OPT_CONST;
+ ii->opt[3] = OPT_NONE;
+
+ ii = &f->bb[REF_BB(t1)].insn[++p];
+ change_insn_type (ii, II_OR);
+ ii->type = 0;
+ ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = REF (REF_BB(t1), p - 2); ii->opt[1] = OPT_REF;
+ if (t & MT_SIGNED) ii->op[2] = 0xffffff00;
+ else ii->op[2] = 0;
+ ii->opt[2] = OPT_CONST;
+ ii->opt[3] = OPT_NONE;
+
+ ii = &f->bb[REF_BB(t1)].insn[++p];
+ change_insn_type (ii, II_CMOV);
+ ii->type = 0;
+ ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
+ ii->op[1] = REF (REF_BB(t1), p - 1); ii->opt[1] = OPT_REF;
+ ii->op[2] = REF (REF_BB(t1), p - 3); ii->opt[2] = OPT_REF;
+ ii->op[3] = REF (REF_BB(t1), p - 2); ii->opt[3] = OPT_REF;
+ p++;
+ }
+
+ modified = 1;
+
+ {
+ int b, i, j;
+ /* Update references */
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ for (j = 0; j < MAX_OPERANDS; j++)
+ if (REF_I (f->bb[b].insn[i].op[j]) < REF_I (t1)
+ || REF_I(f->bb[b].insn[i].op[j]) >= REF_I (t1) + 10) {
+ if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == t1)
+ f->bb[b].insn[i].op[j] = t1 + 4;
+ else if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == t2)
+ f->bb[b].insn[i].op[j] = t1 + 9;
+ }
+ }
+ if (cuc_debug >= 8) print_cuc_bb (f, "POSTJT");
+ } else goto keep;
+ } else goto keep;
+ } else {
+keep:
+ f->msched[j] = f->msched[i];
+ f->mtype[j++] = f->mtype[i];
+ }
+ f->nmsched = j;
+ }
+ return modified;
+}
+
+/* returns nonzero if a < b */
+int mem_ordering_cmp (cuc_func *f, cuc_insn *a, cuc_insn *b)
+{
+ assert (a->type & IT_MEMORY);
+ assert (b->type & IT_MEMORY);
+ if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD
+ &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) {
+ a = &f->INSN(a->op[1]);
+ b = &f->INSN(b->op[1]);
+ if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1]
+ || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) return 0;
+
+ /* Order linearly, we can then join them to bursts */
+ return a->op[2] < b->op[2];
+ } else return 0;
+}
+
+/* Schedule memory accesses
+ 0 - exact; 1 - strong; 2 - weak; 3 - none */
+int schedule_memory (cuc_func *f, int otype)
+{
+ int b, i, j;
+ int modified = 0;
+ f->nmsched = 0;
+
+ for (b = 0; b < f->num_bb; b++) {
+ cuc_insn *insn = f->bb[b].insn;
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ if (insn[i].type & IT_MEMORY) {
+ f->msched[f->nmsched++] = REF (b, i);
+ if (otype == MO_NONE || otype == MO_WEAK) insn[i].type |= IT_FLAG1; /* mark unscheduled */
+ }
+ }
+
+ for (i = 0; i < f->nmsched; i++)
+ cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' ');
+ cucdebug (2, "\n");
+
+ /* We can reorder just more loose types
+ We assume, that memory accesses are currently in valid (but not neccesserly)
+ optimal order */
+ if (otype == MO_WEAK || otype == MO_NONE) {
+ for (i = 0; i < f->nmsched; i++) {
+ int best = i;
+ int tmp;
+ for (j = i + 1; j < f->nmsched; j++) if (REF_BB(f->msched[j]) == REF_BB(f->msched[best])) {
+ if (mem_ordering_cmp (f, &f->INSN (f->msched[j]), &f->INSN(f->msched[best]))) {
+ /* Check dependencies */
+ dep_list *t = f->INSN(f->msched[j]).dep;
+ while (t) {
+ if (f->INSN(t->ref).type & IT_FLAG1) break;
+ t = t->next;
+ }
+ if (!t) best = j; /* no conflicts -> ok */
+ }
+ }
+
+ /* we have to shift instructions up, to maintain valid dependencies
+ and make space for best candidate */
+
+ /* make local copy */
+ tmp = f->msched[best];
+ for (j = best; j > i; j--) f->msched[j] = f->msched[j - 1];
+ f->msched[i] = tmp;
+ f->INSN(f->msched[i]).type &= ~IT_FLAG1; /* mark scheduled */
+ }
+ }
+
+ for (i = 0; i < f->nmsched; i++)
+ cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' ');
+ cucdebug (2, "\n");
+
+ /* Assign memory types */
+ for (i = 0; i < f->nmsched; i++) {
+ cuc_insn *a = &f->INSN(f->msched[i]);
+ f->mtype[i] = !II_IS_LOAD(a->index) ? MT_STORE : MT_LOAD;
+ f->mtype[i] |= II_MEM_WIDTH (a->index);
+ if (a->type & IT_SIGNED) f->mtype[i] |= MT_SIGNED;
+ }
+
+ if (same_transfers (f, otype)) modified = 1;
+ if (join_transfers (f, otype)) modified = 1;
+
+ for (i = 0; i < f->nmsched; i++)
+ cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' ');
+ cucdebug (2, "\n");
+ if (cuc_debug > 5) print_cuc_bb (f, "AFTER_MEM_REMOVAL");
+
+ if (config.cuc.enable_bursts) {
+ //PRINTF ("\n");
+ for (i = 1; i < f->nmsched; i++) {
+ cuc_insn *a = &f->INSN(f->msched[i - 1]);
+ cuc_insn *b = &f->INSN(f->msched[i]);
+ int aw = f->mtype[i - 1] & MT_WIDTH;
+
+ /* Burst can only be out of words */
+ if (aw != 4) continue;
+
+ if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD
+ &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) {
+ a = &f->INSN(a->op[1]);
+ b = &f->INSN(b->op[1]);
+ /* Not in usual form? */
+ if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1]
+ || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) continue;
+
+ //PRINTF ("%i %i, ", a->op[2], b->op[2]);
+
+ /* Check if they touch together */
+ if (a->op[2] + aw == b->op[2]
+ && REF_BB(f->msched[i - 1]) == REF_BB(f->msched[i])) {
+ /* yes => do burst */
+ f->mtype[i - 1] &= ~MT_BURSTE;
+ f->mtype[i - 1] |= MT_BURST;
+ f->mtype[i] |= MT_BURST | MT_BURSTE;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < f->nmsched; i++)
+ cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' ');
+ cucdebug (2, "\n");
+
+ /* We don't need dependencies in non-memory instructions */
+ for (b = 0; b < f->num_bb; b++) {
+ cuc_insn *insn = f->bb[b].insn;
+ for (i = 0; i < f->bb[b].ninsn; i++) if (!(insn[i].type & IT_MEMORY))
+ dispose_list (&insn[i].dep);
+ }
+
+ if (cuc_debug > 5) print_cuc_bb (f, "AFTER_MEM_REMOVAL2");
+ /* Reduce number of dependecies, keeping just direct dependencies, based on memory schedule */
+ {
+ int lastl[3] = {-1, -1, -1};
+ int lasts[3] = {-1, -1, -1};
+ int lastc[3] = {-1, -1, -1};
+ int last_load = -1, last_store = -1, last_call = -1;
+ for (i = 0; i < f->nmsched; i++) {
+ int t = f->mtype[i] & MT_LOAD ? 0 : f->mtype[i] & MT_STORE ? 1 : 2;
+ int maxl = lastl[t];
+ int maxs = lasts[t];
+ int maxc = lastc[t];
+ dep_list *tmp = f->INSN(f->msched[i]).dep;
+ cucdebug (7, "!%i %x %p\n", i, f->msched[i], tmp);
+ while (tmp) {
+ if (f->INSN(tmp->ref).type & IT_MEMORY && REF_BB(tmp->ref) == REF_BB(f->msched[i])) {
+ cucdebug (7, "%i %x %lx\n", i, f->msched[i], tmp->ref);
+ /* Search for the reference */
+ for (j = 0; j < f->nmsched; j++) if (f->msched[j] == tmp->ref) break;
+ assert (j < f->nmsched);
+ if (f->mtype[j] & MT_STORE) {
+ if (maxs < j) maxs = j;
+ } else if (f->mtype[j] & MT_LOAD) {
+ if (maxl < j) maxl = j;
+ } else if (f->mtype[j] & MT_CALL) {
+ if (maxc < j) maxc = j;
+ }
+ }
+ tmp = tmp->next;
+ }
+ dispose_list (&f->INSN(f->msched[i]).dep);
+ if (f->mtype[i] & MT_STORE) {
+ maxs = last_store;
+ last_store = i;
+ } else if (f->mtype[i] & MT_LOAD) {
+ maxl = last_load;
+ last_load = i;
+ } else if (f->mtype[i] & MT_CALL) {
+ maxc = last_call;
+ last_call = i;
+ }
+
+ if (maxl > lastl[t]) {
+ add_dep (&f->INSN(f->msched[i]).dep, f->msched[maxl]);
+ lastl[t] = maxl;
+ }
+ if (maxs > lasts[t]) {
+ add_dep (&f->INSN(f->msched[i]).dep, f->msched[maxs]);
+ lasts[t] = maxs;
+ }
+ if (maxc > lastc[t]) {
+ add_dep (&f->INSN(f->msched[i]).dep, f->msched[maxc]);
+ lastc[t] = maxc;
+ }
+ //PRINTF ("%i(%i)> ml %i(%i) ms %i(%i) lastl %i %i lasts %i %i last_load %i last_store %i\n", i, f->msched[i], maxl, f->msched[maxl], maxs, f->msched[maxs], lastl[0], lastl[1], lasts[0], lasts[1], last_load, last_store);
+
+ /* What we have to wait to finish this BB? */
+ if (i + 1 >= f->nmsched || REF_BB(f->msched[i + 1]) != REF_BB(f->msched[i])) {
+ if (last_load > lastl[t]) {
+ add_dep (&f->bb[REF_BB(f->msched[i])].mdep, f->msched[last_load]);
+ lastl[t] = last_load;
+ }
+ if (last_store > lasts[t]) {
+ add_dep (&f->bb[REF_BB(f->msched[i])].mdep, f->msched[last_store]);
+ lasts[t] = last_store;
+ }
+ if (last_call > lastc[t]) {
+ add_dep (&f->bb[REF_BB(f->msched[i])].mdep, f->msched[last_call]);
+ lastc[t] = last_call;
+ }
+ }
+ }
+ }
+ return modified;
+}
memory.c
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: adv.c
===================================================================
--- adv.c (nonexistent)
+++ adv.c (revision 1765)
@@ -0,0 +1,297 @@
+/* adv.c -- OpenRISC Custom Unit Compiler, Advanced Optimizations
+ * Copyright (C) 2002 Marko Mlinar, markom@opencores.org
+ *
+ * This file is part of OpenRISC 1000 Architectural Simulator.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include
+#include
+#include
+#include
+
+#include "config.h"
+
+#ifdef HAVE_INTTYPES_H
+#include
+#endif
+
+#include "port.h"
+#include "arch.h"
+#include "sim-config.h"
+#include "abstract.h"
+#include "cuc.h"
+#include "insn.h"
+#include "support/profile.h"
+
+/* Marks successor of b with mask m */
+static void mark_successors (cuc_func *f, int b, int m, int stopb)
+{
+ if (b < 0 || b == BBID_END) return;
+ if (f->bb[b].tmp & m) return;
+ f->bb[b].tmp |= m;
+ /* mark stopb also; and stop searching -- we will gen new result in stopb */
+ if (b == stopb) return;
+ mark_successors (f, f->bb[b].next[0], m, stopb);
+ mark_successors (f, f->bb[b].next[1], m, stopb);
+}
+
+static unsigned long mask (unsigned long c)
+{
+ if (c) return (1 << (log2_int (c) + 1)) - 1;
+ else return 0;
+}
+
+/* Calculates facts, that are determined by conditionals */
+void insert_conditional_facts (cuc_func *f)
+{
+ int b, j;
+ int b1, i1, j1;
+ cuc_insn n[2];
+ for (b = 0; b < f->num_bb; b++) if (f->bb[b].ninsn > 0) {
+ cuc_insn *ii = &f->bb[b].insn[f->bb[b].ninsn - 1];
+ /* We have following situation
+ x <= ...
+ sfxx f, x, CONST
+ bf ..., f */
+ if (ii->type & IT_BRANCH && ii->opt[1] & OPT_REF && REF_BB(ii->op[1]) == b
+ && f->INSN(ii->op[1]).opt[2] & OPT_CONST) {
+ int ok = 0;
+ unsigned long c = f->INSN(ii->op[1]).op[2];
+ int rref = f->INSN(ii->op[1]).op[1];
+ unsigned long r;
+ if (!(f->INSN(ii->op[1]).opt[1] & OPT_REF)) continue;
+ r = f->INSN(rref).op[0];
+
+ /* Assignment must be in same basic block */
+ if (REF_BB(rref) != b) continue;
+
+ for (j = 0; j < 2; j++) {
+ change_insn_type (&n[j], II_ADD);
+ n[j].type = 0;
+ n[j].dep = NULL;
+ n[j].op[0] = r; n[j].opt[0] = OPT_REGISTER | OPT_DEST;
+ n[j].op[1] = 0; n[j].opt[1] = OPT_CONST;
+ n[j].op[2] = rref; n[j].opt[2] = OPT_REF;
+ n[j].opt[3] = OPT_NONE;
+ sprintf (n[j].disasm, "conditional %s fact", j ? "false" : "true");
+ }
+
+ /* First get the conditional and two instruction to place after the current BB */
+ switch (f->INSN(ii->op[1]).index) {
+ case II_SFEQ:
+ change_insn_type (&n[0], II_ADD);
+ n[0].op[0] = r; n[0].opt[0] = OPT_REGISTER | OPT_DEST;
+ n[0].op[1] = 0; n[0].opt[1] = OPT_CONST;
+ n[0].op[2] = c; n[0].opt[2] = OPT_CONST;
+ ok = 1;
+ break;
+ case II_SFNE:
+ change_insn_type (&n[1], II_ADD);
+ n[1].op[0] = r; n[1].opt[0] = OPT_REGISTER | OPT_DEST;
+ n[1].op[1] = 0; n[1].opt[1] = OPT_CONST;
+ n[1].op[2] = c; n[1].opt[2] = OPT_CONST;
+ ok = 2;
+ break;
+ case II_SFLT:
+ change_insn_type (&n[0], II_AND);
+ n[0].op[0] = r; n[0].opt[0] = OPT_REGISTER | OPT_DEST;
+ n[0].op[1] = rref; n[0].opt[1] = OPT_REF;
+ n[0].op[2] = mask (c); n[0].opt[2] = OPT_CONST;
+ ok = 1;
+ break;
+ case II_SFGT:
+ change_insn_type (&n[1], II_ADD);
+ n[1].op[0] = r; n[1].opt[0] = OPT_REGISTER | OPT_DEST;
+ n[1].op[1] = rref; n[1].opt[1] = OPT_REF;
+ n[1].op[2] = mask (c + 1); n[1].opt[2] = OPT_CONST;
+ ok = 2;
+ break;
+ case II_SFLE:
+ change_insn_type (&n[0], II_AND);
+ n[0].op[0] = r; n[0].opt[0] = OPT_REGISTER | OPT_DEST;
+ n[0].op[1] = rref; n[0].opt[1] = OPT_REF;
+ n[0].op[2] = mask (c); n[0].opt[2] = OPT_CONST;
+ ok = 1;
+ break;
+ case II_SFGE:
+ change_insn_type (&n[1], II_ADD);
+ n[1].op[0] = r; n[1].opt[0] = OPT_REGISTER | OPT_DEST;
+ n[1].op[1] = rref; n[1].opt[1] = OPT_REF;
+ n[1].op[2] = mask (c + 1); n[1].opt[2] = OPT_CONST;
+ ok = 2;
+ break;
+ default:
+ ok = 0;
+ break;
+ }
+
+ /* Now add two BBs at the end and relink */
+ if (ok) {
+ int cnt = 0;
+ cucdebug (1, "%x rref %x cnt %i\n", b, rref, cnt);
+ fflush (stdout);
+ for (j = 0; j < 2; j++) {
+ int nb = f->num_bb++;
+ int sb;
+ assert (nb < MAX_BB);
+ f->bb[nb].type = 0;
+ f->bb[nb].first = -1; f->bb[nb].last = -1;
+ f->bb[nb].prev[0] = b; f->bb[nb].prev[1] = -1;
+ sb = f->bb[nb].next[0] = f->bb[b].next[j]; f->bb[nb].next[1] = -1;
+ assert (cnt >= 0);
+ cucdebug (2, "%x %x %x rref %x cnt %i\n", b, sb, nb, rref, cnt);
+ fflush (stdout);
+ assert (sb >= 0);
+ f->bb[b].next[j] = nb;
+ if (sb != BBID_END) {
+ if (f->bb[sb].prev[0] == b) f->bb[sb].prev[0] = nb;
+ else if (f->bb[sb].prev[1] == b) f->bb[sb].prev[1] = nb;
+ else assert (0);
+ }
+ f->bb[nb].insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (cnt + 1));
+ assert (f->bb[nb].insn);
+ f->bb[nb].insn[0] = n[j];
+ f->bb[nb].ninsn = cnt + 1;
+ f->bb[nb].mdep = NULL;
+ f->bb[nb].nmemory = 0;
+ f->bb[nb].cnt = 0;
+ f->bb[nb].unrolled = 0;
+ f->bb[nb].ntim = 0;
+ f->bb[nb].selected_tim = -1;
+ }
+ for (b1 = 0; b1 < f->num_bb; b1++) f->bb[b1].tmp = 0;
+
+ /* Find successor blocks and change links accordingly */
+ mark_successors (f, f->num_bb - 2, 2, b);
+ mark_successors (f, f->num_bb - 1, 1, b);
+ for (b1 = 0; b1 < f->num_bb - 2; b1++) if (f->bb[b1].tmp == 1 || f->bb[b1].tmp == 2) {
+ int end;
+ if (REF_BB (rref) == b1) end = REF_I (rref) + 1;
+ else end = f->bb[b1].ninsn;
+ for (i1 = 0; i1 < end; i1++)
+ for (j1 = 0; j1 < MAX_OPERANDS; j1++)
+ if (f->bb[b1].insn[i1].opt[j1] & OPT_REF && f->bb[b1].insn[i1].op[j1] == rref)
+ f->bb[b1].insn[i1].op[j1] = REF (f->num_bb - f->bb[b1].tmp, 0);
+ }
+ if (cuc_debug >= 3) print_cuc_bb (f, "FACT");
+ }
+ }
+ }
+}
+
+static unsigned long max_op (cuc_func *f, int ref, int o)
+{
+ if (f->INSN(ref).opt[o] & OPT_REF) return f->INSN(f->INSN(ref).op[o]).max;
+ else if (f->INSN(ref).opt[o] & OPT_CONST) return f->INSN(ref).op[o];
+ else if (f->INSN(ref).opt[o] & OPT_REGISTER) return 0xffffffff;
+ else assert (0);
+}
+
+/* Returns maximum value, based on inputs */
+static unsigned long calc_max (cuc_func *f, int ref)
+{
+ cuc_insn *ii = &f->INSN(ref);
+ if (ii->type & IT_COND) return 1;
+ switch (ii->index) {
+ case II_ADD : return MIN ((unsigned long long) max_op (f, ref, 1)
+ + (unsigned long long)max_op (f, ref, 2), 0xffffffff);
+ case II_SUB : return 0xffffffff;
+ case II_AND : return MIN (max_op (f, ref, 1), max_op (f, ref, 2));
+ case II_OR : return max_op (f, ref, 1) | max_op (f, ref, 2);
+ case II_XOR : return max_op (f, ref, 1) | max_op (f, ref, 2);
+ case II_MUL : return MIN ((unsigned long long) max_op (f, ref, 1)
+ * (unsigned long long)max_op (f, ref, 2), 0xffffffff);
+ case II_SLL : if (ii->opt[2] & OPT_CONST) return max_op (f, ref, 1) << ii->op[2];
+ else return max_op (f, ref, 1);
+ case II_SRA : return max_op (f, ref, 1);
+ case II_SRL : if (ii->opt[2] & OPT_CONST) return max_op (f, ref, 1) >> ii->op[2];
+ else return max_op (f, ref, 1);
+ case II_LB : return 0xff;
+ case II_LH : return 0xffff;
+ case II_LW : return 0xffffffff;
+ case II_SB :
+ case II_SH :
+ case II_SW : return 0;
+ case II_SFEQ:
+ case II_SFNE:
+ case II_SFLE:
+ case II_SFLT:
+ case II_SFGE:
+ case II_SFGT: return 1;
+ case II_BF : return 0;
+ case II_LRBB: return 1;
+ case II_CMOV: return MAX (max_op (f, ref, 1), max_op (f, ref, 2));
+ case II_REG : return max_op (f, ref, 1);
+ case II_NOP : assert (0);
+ case II_CALL: assert (0);
+ default: assert (0);
+ }
+ return -1;
+}
+
+/* Width optimization -- detect maximum values;
+ these values are actually estimates, since the problem
+ is to hard otherwise...
+ We calculate these maximums iteratively -- we are slowly
+ approaching final solution. This algorithm is surely finite,
+ but can be very slow; so we stop after some iterations;
+ normal loops should be in this range */
+void detect_max_values (cuc_func *f)
+{
+ int b, i;
+ int modified = 0;
+ int iteration = 0;
+
+ for (b = 0; b < f->num_bb; b++) {
+ for (i = 0; i < f->bb[b].ninsn; i++) f->bb[b].insn[i].max = 0;
+ f->bb[b].tmp = 1;
+ }
+
+ /* Repeat until something is changing */
+ do {
+ modified = 0;
+ for (b = 0; b < f->num_bb; b++) {
+ if (f->bb[b].tmp) {
+ for (i = 0; i < f->bb[b].ninsn; i++) {
+ unsigned long m = calc_max (f, REF (b, i));
+ if (m > f->bb[b].insn[i].max) {
+ f->bb[b].insn[i].max = m;
+ modified = 1;
+ }
+ }
+ }
+ }
+ if (iteration++ > CUC_WIDTH_ITERATIONS) break;
+ } while (modified);
+
+ /* Something bad has happened; now we will assign 0xffffffff to all unsatisfied
+ instructions; this one is stoppable in O(n ^ 2) */
+ if (iteration > CUC_WIDTH_ITERATIONS) {
+ do {
+ modified = 0;
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++) {
+ unsigned long m = calc_max (f, REF (b, i));
+ if (m > f->bb[b].insn[i].max) {
+ f->bb[b].insn[i].max = 0xffffffff;
+ modified = 1;
+ }
+ }
+ } while (modified);
+ }
+ cucdebug (1, "detect_max_values %i iterations\n", iteration);
+}
+
Index: timings.c
===================================================================
--- timings.c (nonexistent)
+++ timings.c (revision 1765)
@@ -0,0 +1,305 @@
+/* timings.c -- OpenRISC Custom Unit Compiler, timing and size estimation
+ * Copyright (C) 2002 Marko Mlinar, markom@opencores.org
+ *
+ * This file is part of OpenRISC 1000 Architectural Simulator.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "config.h"
+
+#ifdef HAVE_INTTYPES_H
+#include
+#endif
+
+#include "port.h"
+#include "arch.h"
+#include "abstract.h"
+#include "sim-config.h"
+#include "cuc.h"
+#include "insn.h"
+
+static cuc_timing_table *timing_table;
+static double max_bb_delay;
+
+/* Returns instruction delay */
+double insn_time (cuc_insn *ii)
+{
+ if (ii->opt[2] & OPT_CONST) {
+ if (ii->opt[1] & OPT_CONST) return 0.;
+ else return timing_table[ii->index].delayi;
+ } else return timing_table[ii->index].delay;
+}
+
+/* Returns instruction size */
+double insn_size (cuc_insn *ii)
+{
+ double s = (ii->opt[2] & OPT_CONST) ? timing_table[ii->index].sizei
+ : timing_table[ii->index].size;
+ if (ii->opt[1] & OPT_CONST) return 0.;
+ if (ii->type & IT_COND && (ii->index == II_CMOV || ii->index == II_ADD)) return s / 32.;
+ else return s;
+}
+
+/* Returns normal instruction size */
+double ii_size (int index, int imm)
+{
+ if (imm) return timing_table[index].sizei;
+ else return timing_table[index].size;
+}
+
+/* Returns dataflow tree height in cycles */
+static double max_delay (cuc_func *f, int b)
+{
+ double max_d = 0.;
+ double *d;
+ cuc_bb *bb = &f->bb[b];
+ int i, j;
+ d = (double *) malloc (sizeof (double) * bb->ninsn);
+ for (i = 0; i < bb->ninsn; i++) {
+ double md = 0.;
+ for (j = 0; j < MAX_OPERANDS; j++) {
+ int op = bb->insn[i].op[j];
+ if (bb->insn[i].opt[j] & OPT_REF && op >= 0 && REF_BB (op) == b && REF_I (op) < i) {
+ double t = d[REF_I (op)];
+ if (t > md) md = t;
+ }
+ }
+ d[i] = md + insn_time (&bb->insn[i]);
+ if (d[i] > max_d) max_d = d[i];
+ }
+ free (d);
+ //PRINTF ("max_d%i=%f\n", b, max_d);
+ return max_d;
+}
+
+/* Calculates memory delay of a single run of a basic block */
+static int memory_delay (cuc_func *f, int b)
+{
+ int i;
+ int d = 0;
+ for (i = 0; i < f->nmsched; i++)
+ if (REF_BB (f->msched[i]) == b) {
+ if (f->mtype[i] & MT_STORE) {
+ if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += runtime.cuc.mdelay[2];
+ else d += runtime.cuc.mdelay[3];
+ } else if (f->mtype[i] & MT_LOAD) {
+ if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += runtime.cuc.mdelay[0];
+ else d += runtime.cuc.mdelay[1];
+ }
+ }
+ //PRINTF ("md%i=%i\n", b, d);
+ return d;
+}
+
+/* Cuts the tree and marks registers */
+void cut_tree (cuc_func *f, int b, double sd)
+{
+ int i, j;
+ double *depths;
+ cuc_bb *bb = &f->bb[b];
+ depths = (double *) malloc (sizeof (double) * bb->ninsn);
+
+ for (i = 0; i < bb->ninsn; i++) {
+ double md = 0.;
+ int mg = 0;
+ for (j = 0; j < MAX_OPERANDS; j++) {
+ int op = bb->insn[i].op[j];
+ if (bb->insn[i].opt[j] & OPT_REF && op >= 0 && REF_BB (op) == b && REF_I (op) < i) {
+ double t = depths[REF_I (op)];
+ if (f->INSN(op).type & IT_CUT) {
+ if (f->INSN(op).tmp + 1 >= mg) {
+ if (f->INSN(op).tmp + 1 > mg) md = 0.;
+ mg = f->INSN(op).tmp + 1;
+ if (t > md) md = t;
+ }
+ } else {
+ if (f->INSN(op).tmp >= mg) {
+ if (f->INSN(op).tmp > mg) md = 0.;
+ mg = f->INSN(op).tmp;
+ if (t > md) md = t;
+ }
+ }
+ }
+ }
+ //PRINTF ("%2x md%.1f ", i, md);
+ md += insn_time (&bb->insn[i]);
+ //PRINTF ("md%.1f mg%i %.1f\n", md, mg, sd);
+ bb->insn[i].tmp = mg;
+ if (md > sd) {
+ bb->insn[i].type |= IT_CUT;
+ if (md > runtime.cuc.cycle_duration)
+ log ("WARNING: operation t%x_%x may need to be registered inbetween\n", b, i);
+ depths[i] = 0.;
+ } else depths[i] = md;
+ }
+ free (depths);
+}
+
+/* How many cycles we need now to get through the BB */
+static int new_bb_cycles (cuc_func *f, int b, int cut)
+{
+ long d;
+ double x = max_delay (f, b);
+ d = ceil (x / runtime.cuc.cycle_duration);
+ if (d < 1) d = 1;
+ if (cut && x > runtime.cuc.cycle_duration) cut_tree (f, b, x / d);
+
+ if (x / d > max_bb_delay) max_bb_delay = x / d;
+
+ return memory_delay (f, b) + d;
+}
+
+/* Cuts the tree and marks registers */
+void mark_cut (cuc_func *f)
+{
+ int b, i;
+ for (b = 0; b < f->num_bb; b++)
+ for (i = 0; i < f->bb[b].ninsn; i++)
+ f->bb[b].insn[i].tmp = 0; /* Set starting groups */
+ if (config.cuc.no_multicycle)
+ for (b = 0; b < f->num_bb; b++)
+ new_bb_cycles (f, b, 1);
+}
+
+/* Returns basic block circuit area */
+static double bb_size (cuc_bb *bb)
+{
+ int i;
+ double d = 0.;
+ for (i = 0; i < bb->ninsn; i++) {
+ if (bb->insn[i].opt[2] & OPT_CONST)
+ d = d + timing_table[bb->insn[i].index].sizei;
+ else d = d + timing_table[bb->insn[i].index].size;
+ }
+ return d;
+}
+
+/* Recalculates bb[].cnt values, based on generated profile file */
+void recalc_cnts (cuc_func *f, char *bb_filename)
+{
+ int i, r, b, prevbb = -1, prevcnt = 0;
+ int buf[256];
+ const int bufsize = 256;
+ FILE *fi = fopen (bb_filename, "rb");
+
+ assert (fi);
+
+ /* initialize counts */
+ for (b = 0; b < f->num_bb; b++) f->bb[b].cnt = 0;
+
+ /* read control flow from file and set counts */
+ do {
+ r = fread (buf, sizeof (int), bufsize, fi);
+ for (i = 0; i < r; i++) {
+ b = f->init_bb_reloc[buf[i]];
+ if (b < 0) continue;
+ /* Were we in the loop? */
+ if (b == prevbb) {
+ prevcnt++;
+ } else {
+ /* End the block */
+ if (prevbb >= 0 && prevbb != BBID_START)
+ f->bb[prevbb].cnt += prevcnt / f->bb[prevbb].unrolled + 1;
+ prevcnt = 0;
+ prevbb = b;
+ }
+ }
+ } while (r == bufsize);
+
+ fclose (fi);
+}
+
+/* Analizes current version of design and places results into timings structure */
+void analyse_timings (cuc_func *f, cuc_timings *timings)
+{
+ long new_time = 0;
+ double size = 0.;
+ int b, i;
+
+ /* Add time needed for mtspr/mfspr */
+ for (i = 0; i < MAX_REGS; i++) if (f->used_regs[i]) new_time++;
+ new_time++; /* always one mfspr at the end */
+ new_time *= f->num_runs;
+
+ max_bb_delay = 0.;
+ for (b = 0; b < f->num_bb; b++) {
+ new_time += new_bb_cycles (f, b, 0) * f->bb[b].cnt;
+ size = size + bb_size (&f->bb[b]);
+ }
+ timings->new_time = new_time;
+ timings->size = size;
+ log ("Max circuit delay %.2fns; max circuit clock speed %.1fMHz\n",
+ max_bb_delay, 1000. / max_bb_delay);
+}
+
+/* Loads in the specified timings table */
+void load_timing_table (char *filename)
+{
+ int i;
+ FILE *fi;
+
+ log ("Loading timings from %s\n", filename);
+ log ("Using clock delay %.2fns (frequency %.0fMHz)\n", runtime.cuc.cycle_duration,
+ 1000. / runtime.cuc.cycle_duration);
+ assert (fi = fopen (filename, "rt"));
+
+ timing_table = (cuc_timing_table *)malloc ((II_LAST + 1) * sizeof (cuc_timing_table));
+ assert (timing_table);
+ for (i = 0; i <= II_LAST; i++) {
+ timing_table[i].size = -1.;
+ timing_table[i].sizei = -1.;
+ timing_table[i].delay = -1.;
+ timing_table[i].delayi = -1.;
+ }
+
+ while (!feof(fi)) {
+ char tmp[256];
+ int index;
+ if (fscanf (fi, "%s", tmp) != 1) break;
+ if (tmp[0] == '#') {
+ while (!feof (fi) && fgetc (fi) != '\n');
+ continue;
+ }
+ for (i = 0; i <= II_LAST; i++)
+ if (strcmp (known[i].name, tmp) == 0) {
+ index = i;
+ break;
+ }
+ assert (index <= II_LAST);
+ i = index;
+ if (fscanf (fi, "%lf%lf%lf%lf\n", &timing_table[i].size,
+ &timing_table[i].sizei, &timing_table[i].delay, &timing_table[i].delayi) != 4) break;
+ /*PRINTF ("!%s size %f,%f delay %f,%f\n", known[i].name, timing_table[i].size,
+ timing_table[i].sizei, timing_table[i].delay, timing_table[i].delayi);*/
+ }
+
+ /* Was everything initialized? */
+ for (i = 0; i <= II_LAST; i++) {
+ assert (timing_table[i].size >= 0 && timing_table[i].sizei >= 0
+ && timing_table[i].delay >= 0 && timing_table[i].delayi >= 0);
+ /*PRINTF ("%s size %f,%f delay %f,%f\n", known[i], timing_table[i].size,
+ timing_table[i].sizei, timing_table[i].delay, timing_table[i].delayi);*/
+ }
+
+ fclose (fi);
+}
+
timings.c
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: cuc.h
===================================================================
--- cuc.h (nonexistent)
+++ cuc.h (revision 1765)
@@ -0,0 +1,332 @@
+/* cuc.h -- OpenRISC Custom Unit Compiler, main header file
+ * Copyright (C) 2002 Marko Mlinar, markom@opencores.org
+ *
+ * This file is part of OpenRISC 1000 Architectural Simulator.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef __DATAF_H__
+#define __DATAF_H__
+
+/* Maximum number of instructions per function */
+#define MAX_INSNS 0x10000
+#define MAX_BB 0x1000
+#define MAX_REGS 34
+#define FLAG_REG (MAX_REGS - 2)
+#define LRBB_REG (MAX_REGS - 1)
+#define CUC_MAX_STACK 0x1000 /* if more, not converted */
+#define MAX_PREROLL 32
+#define MAX_UNROLL 32
+
+#define IT_BRANCH 0x0001 /* Branch instruction */
+#define IT_INDELAY 0x0002 /* Instruction is in delay slot */
+#define IT_BBSTART 0x0004 /* BB start marker */
+#define IT_BBEND 0x0008 /* BB end marker */
+#define IT_OUTPUT 0x0010 /* this instruction holds final value of the register */
+#define IT_SIGNED 0x0020 /* Instruction is signed */
+#define IT_MEMORY 0x0040 /* Instruction does memory access */
+#define IT_UNUSED 0x0080 /* dead instruction marker */
+#define IT_FLAG1 0x0100 /* misc flags */
+#define IT_FLAG2 0x0200
+#define IT_VOLATILE 0x0400 /* Should not be moved/removed */
+#define IT_MEMADD 0x0800 /* add before the load -- should not be removed */
+#define IT_COND 0x1000 /* Conditional */
+#define IT_LATCHED 0x2000 /* Output of this instruction is latched/registered */
+#define IT_CUT 0x4000 /* After this instruction register is placed */
+
+#define OPT_NONE 0x00
+#define OPT_CONST 0x01
+#define OPT_REGISTER 0x02
+#define OPT_REF 0x04
+#define OPT_JUMP 0x08 /* Jump to an instruction index */
+#define OPT_DEST 0x10 /* This operand is dest */
+#define OPT_BB 0x20 /* Jumpt to BB */
+#define OPT_LRBB 0x40 /* 0 if we came in from left BB, or 1 otherwise */
+
+#define MT_WIDTH 0x007 /* These bits hold memory access width in bytes */
+#define MT_BURST 0x008 /* burst start & end markers */
+#define MT_BURSTE 0x010
+#define MT_CALL 0x020 /* This is a call */
+#define MT_LOAD 0x040 /* This memory access does a read */
+#define MT_STORE 0x080 /* This memory access does a write */
+#define MT_SIGNED 0x100 /* Signed memory access */
+
+#define MO_NONE 0 /* different memory ordering, even if there are dependencies,
+ burst can be made, width can change */
+#define MO_WEAK 1 /* different memory ordering, if there cannot be dependencies,
+ burst can be made, width can change */
+#define MO_STRONG 2 /* Same memory ordering, burst can be made, width can change */
+#define MO_EXACT 3 /* Exacltly the same memory ordering and widths */
+
+#define BB_INLOOP 0x01 /* This block is inside a loop */
+#define BB_OPTIONAL 0x02
+#define BB_DEAD 0x08 /* This block is unaccessible -> to be removed */
+
+#define BBID_START MAX_BB /* Start BB pointer */
+#define BBID_END (MAX_BB + 1) /* End BB pointer */
+
+/* Various macros to minimize code size */
+#define REF(bb,i) (((bb) * MAX_INSNS) + (i))
+#define REF_BB(r) ((r) / MAX_INSNS)
+#define REF_I(r) ((r) % MAX_INSNS)
+#define INSN(ref) bb[REF_BB(ref)].insn[REF_I(ref)]
+
+#ifndef MIN
+# define MIN(x,y) ((x) < (y) ? (x) : (y))
+#endif
+
+#ifndef MAX
+# define MAX(x,y) ((x) > (y) ? (x) : (y))
+#endif
+
+#define log(x...) {fprintf (flog, x); fflush (flog); }
+
+#define cucdebug(x,s...) {if ((x) <= cuc_debug) PRINTF (s);}
+
+#define CUC_WIDTH_ITERATIONS 256
+
+/* Options */
+/* Whether we are debugging cuc (0-9) */
+extern int cuc_debug;
+
+/* Temporary registers by software convention */
+extern const int caller_saved[MAX_REGS];
+
+typedef struct _dep_list_t {
+ unsigned long ref;
+ struct _dep_list_t *next;
+} dep_list;
+
+/* Shared list, if marked dead, entry is not used */
+typedef struct _csm_list {
+ int ref;
+ int cnt;
+ int cmovs;
+ double size, osize;
+ int cmatch;
+ int dead;
+ int ninsn; /* Number of associated instructions */
+ struct _csm_list *from;
+ struct _csm_list *next;
+} cuc_shared_list;
+
+/* Shared resource item definition */
+typedef struct {
+ int ref;
+ int cmatch;
+} cuc_shared_item;
+
+/* Implementation specific timings */
+typedef struct {
+ int b; /* Basic block # this timing is referring to */
+ int preroll; /* How many times was this BB pre/unrolled */
+ int unroll;
+ int nshared;
+ cuc_shared_item *shared; /* List of shared resources */
+ int new_time;
+ double size;
+} cuc_timings;
+
+/* Instructionn entity */
+typedef struct {
+ int type; /* type of the instruction */
+ int index; /* Instruction index */
+ int opt[MAX_OPERANDS]; /* operand types */
+ unsigned long op[MAX_OPERANDS]; /* operand values */
+ dep_list *dep; /* instruction dependencies */
+ unsigned long insn; /* Instruction opcode */
+ char disasm[40]; /* disassembled string */
+ unsigned long max; /* max result value */
+ int tmp;
+} cuc_insn;
+
+/* Basic block entity */
+typedef struct {
+ unsigned long type; /* Type of the bb */
+ int first, last; /* Where this block lies */
+ int prev[2], next[2];
+ int tmp;
+ cuc_insn *insn; /* Instructions lie here */
+ int ninsn; /* Number of instructions */
+ int last_used_reg[MAX_REGS];
+ dep_list *mdep; /* Last memory access dependencies */
+ int nmemory;
+ int cnt; /* how many times was this block executed */
+ int unrolled; /* how many times has been this block unrolled */
+
+ int ntim; /* Basic block options */
+ cuc_timings *tim;
+ int selected_tim; /* Selected option, -1 if none */
+} cuc_bb;
+
+/* Function entity */
+typedef struct _cuc_func {
+ /* Basic blocks */
+ int num_bb;
+ cuc_bb bb[MAX_BB];
+ int saved_regs[MAX_REGS];/* Whether this register was saved */
+ int lur[MAX_REGS]; /* Location of last use */
+ int used_regs[MAX_REGS]; /* Nonzero if it was used */
+
+ /* Schedule of memory instructions */
+ int nmsched;
+ int msched[MAX_INSNS];
+ int mtype[MAX_INSNS];
+
+ /* initial bb and their relocations to new block numbers */
+ int num_init_bb;
+ int *init_bb_reloc;
+ int orig_time; /* time in cyc required for SW implementation */
+ int num_runs; /* Number times this function was run */
+ cuc_timings timings; /* Base timings */
+ unsigned long start_addr; /* Address of first instruction inn function */
+ unsigned long end_addr; /* Address of last instruction inn function */
+ int memory_order; /* Memory order */
+
+ int nfdeps; /* Function dependencies */
+ struct _cuc_func **fdeps;
+
+ int tmp;
+} cuc_func;
+
+/* Instructions from function */
+extern cuc_insn insn[MAX_INSNS];
+extern int num_insn;
+extern int reloc[MAX_INSNS];
+extern FILE *flog;
+
+/* returns log2(x) */
+int log2_int (unsigned long x);
+
+/* Loads from file into global array insn */
+int cuc_load (char *in_fn);
+
+/* Negates conditional instruction */
+void negate_conditional (cuc_insn *ii);
+
+/* Scans sequence of BBs and set bb[].cnt */
+void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename);
+
+/* Prints out instructions */
+void print_insns (int bb, cuc_insn *insn, int size, int verbose);
+
+/* prints out bb string */
+void print_bb_num (int num);
+
+/* Print out basic blocks */
+void print_cuc_bb (cuc_func *func, char *s);
+
+/* Duplicates function */
+cuc_func *dup_func (cuc_func *f);
+
+/* Releases memory allocated by function */
+void free_func (cuc_func *f);
+
+/* Common subexpression matching -- resource sharing, analysis pass */
+void csm (cuc_func *f);
+
+/* Common subexpression matching -- resource sharing, generation pass */
+void csm_gen (cuc_func *f, cuc_func *rf, cuc_shared_item *shared, int nshared);
+
+/* Set the BB limits */
+void detect_bb (cuc_func *func);
+
+/* Optimize basic blocks */
+int optimize_bb (cuc_func *func);
+
+/* Search and optimize complex cmov assignments */
+int optimize_cmovs (cuc_func *func);
+
+/* Optimizes dataflow tree */
+int optimize_tree (cuc_func *func);
+
+/* Remove nop instructions */
+int remove_nops (cuc_func *func);
+
+/* Removes dead instruction */
+int remove_dead (cuc_func *func);
+
+/* Removes trivial register assignments */
+int remove_trivial_regs (cuc_func *f);
+
+/* Determine inputs and outputs */
+void set_io (cuc_func *func);
+
+/* Removes BBs marked as dead */
+int remove_dead_bb (cuc_func *func);
+
+/* Common subexpression elimination */
+int cse (cuc_func *f);
+
+/* Detect register dependencies */
+void reg_dep (cuc_func *func);
+
+/* Cuts the tree and marks registers */
+void mark_cut (cuc_func *f);
+
+/* Unroll loop b times times and return new function. Original
+ function is unmodified. */
+cuc_func *preunroll_loop (cuc_func *func, int b, int preroll, int unroll, char *bb_filename);
+
+/* Clean memory and data dependencies */
+void clean_deps (cuc_func *func);
+
+/* Schedule memory accesses
+ 0 - exact; 1 - strong; 2 - weak; 3 - none */
+int schedule_memory (cuc_func *func, int otype);
+
+/* Generates verilog file out of insn dataflow */
+void output_verilog (cuc_func *func, char *filename, char *funcname);
+
+/* Recalculates bb[].cnt values, based on generated profile file */
+void recalc_cnts (cuc_func *f, char *bb_filename);
+
+/* Calculate timings */
+void analyse_timings (cuc_func *func, cuc_timings *timings);
+
+/* Calculates facts, that are determined by conditionals */
+void insert_conditional_facts (cuc_func *func);
+
+/* Width optimization -- detect maximum values */
+void detect_max_values (cuc_func *f);
+
+/* Inserts n nops before insn 'ref' */
+void insert_insns (cuc_func *f, int ref, int n);
+
+/* Checks for some anomalies with references */
+void cuc_check(cuc_func *f);
+
+/* Adds memory dependencies based on ordering type */
+void add_memory_dep(cuc_func *f, int otype);
+
+/* Prints out instructions */
+void print_cuc_insns(char *s, int verbose);
+
+/* Build basic blocks */
+void build_bb(cuc_func *f);
+
+/* Latch outputs in loops */
+void add_latches(cuc_func *f);
+
+void generate_main(int nfuncs, cuc_func **f, char *filename);
+
+void add_dep(dep_list **list, int dep);
+
+void dispose_list(dep_list **list);
+
+void main_cuc(char *filename);
+
+void add_data_dep(cuc_func *f);
+#endif /* __DATAF_H__ */
cuc.h
Property changes :
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: Makefile.am
===================================================================
--- Makefile.am (nonexistent)
+++ Makefile.am (revision 1765)
@@ -0,0 +1,25 @@
+# Makefile -- Makefile for cpu architecture independent simulation
+# Copyright (C) 2002 Marko Mlinar, markom@opencores.org
+#
+# This file is part of OpenRISC 1000 Architectural Simulator.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+noinst_LIBRARIES = libcuc.a
+
+libcuc_a_SOURCES = cuc.c cuc.h load.c bb.c memory.c \
+ verilog.c timings.c insn.c insn.h adv.c
+
Index: insn.h
===================================================================
--- insn.h (nonexistent)
+++ insn.h (revision 1765)
@@ -0,0 +1,111 @@
+/* insn.h -- OpenRISC Custom Unit Compiler, internal instruction definitions
+ * Copyright (C) 2002 Marko Mlinar, markom@opencores.org
+ *
+ * This file is part of OpenRISC 1000 Architectural Simulator.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef _DF_INSN_
+#define _DF_INSN_
+
+#include "cuc.h"
+
+/* Instruction types */
+#define II_ADD 0
+#define II_SUB 1
+#define II_AND 2
+#define II_OR 3
+#define II_XOR 4
+#define II_MUL 5
+#define II_SRL 6
+#define II_SLL 7
+#define II_SRA 8
+#define II_LB 9
+#define II_LH 10
+#define II_LW 11
+#define II_SB 12
+#define II_SH 13
+#define II_SW 14
+#define II_SFEQ 15
+#define II_SFNE 16
+#define II_SFLE 17
+#define II_SFLT 18
+#define II_SFGE 19
+#define II_SFGT 20
+#define II_BF 21
+#define II_LRBB 22
+#define II_CMOV 23
+#define II_REG 24
+#define II_NOP 25
+#define II_CALL 26
+#define II_LAST 26
+
+/* misc flags */
+#define II_MASK 0x0fff
+#define II_MEM 0x1000
+#define II_SIGNED 0x2000
+
+#define II_IS_LOAD(x) ((x) == II_LB || (x) == II_LH || (x) == II_LW)
+#define II_IS_STORE(x) ((x) == II_SB || (x) == II_SH || (x) == II_SW)
+#define II_MEM_WIDTH(x) (((x) == II_LB || (x) == II_SB) ? 1 :\
+ ((x) == II_LH || (x) == II_SH) ? 2 :\
+ ((x) == II_LW || (x) == II_SW) ? 4 : -1)
+
+/* List of known instructions and their rtl representation */
+typedef struct {
+ char *name;
+ int comutative;
+ char *rtl;
+} cuc_known_insn;
+
+extern const cuc_known_insn known[II_LAST + 1];
+
+/* Timing table -- same indexes as known table */
+typedef struct {
+ double delay;
+ double size;
+ double delayi;
+ double sizei;
+} cuc_timing_table;
+
+/* Conversion links */
+typedef struct {
+ const char *from;
+ const int to;
+} cuc_conv;
+
+/* normal (not immediate) size of a function */
+double ii_size (int index, int imm);
+
+/* Returns instruction size */
+double insn_time (cuc_insn *ii);
+
+/* Returns instruction time */
+double insn_size (cuc_insn *ii);
+
+/* Find known instruction and attach them to insn */
+void change_insn_type (cuc_insn *i, int index);
+
+/* Returns instruction name */
+const char *cuc_insn_name (cuc_insn *ii);
+
+/* Loads in the specified timings table */
+void load_timing_table (char *filename);
+
+/* Displays shared instructions */
+void print_shared (cuc_func *rf, cuc_shared_item *shared, int nshared);
+
+#endif /* _DF_INSN_ */
+
Index: .
===================================================================
--- . (nonexistent)
+++ . (revision 1765)
.
Property changes :
Added: svn:ignore
## -0,0 +1,2 ##
+Makefile
+.deps