OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /or1k/tags/nog_patch_52/or1ksim/cuc
    from Rev 1429 to Rev 1765
    Reverse comparison

Rev 1429 → Rev 1765

/Makefile.in
0,0 → 1,346
# Makefile.in generated by automake 1.6.3 from Makefile.am.
# @configure_input@
 
# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
# Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
 
@SET_MAKE@
 
# Makefile -- Makefile for cpu architecture independent simulation
# Copyright (C) 2002 Marko Mlinar, markom@opencores.org
#
# This file is part of OpenRISC 1000 Architectural Simulator.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
SHELL = @SHELL@
 
srcdir = @srcdir@
top_srcdir = @top_srcdir@
VPATH = @srcdir@
prefix = @prefix@
exec_prefix = @exec_prefix@
 
bindir = @bindir@
sbindir = @sbindir@
libexecdir = @libexecdir@
datadir = @datadir@
sysconfdir = @sysconfdir@
sharedstatedir = @sharedstatedir@
localstatedir = @localstatedir@
libdir = @libdir@
infodir = @infodir@
mandir = @mandir@
includedir = @includedir@
oldincludedir = /usr/include
pkgdatadir = $(datadir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
top_builddir = ..
 
ACLOCAL = @ACLOCAL@
AUTOCONF = @AUTOCONF@
AUTOMAKE = @AUTOMAKE@
AUTOHEADER = @AUTOHEADER@
 
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
INSTALL = @INSTALL@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_DATA = @INSTALL_DATA@
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_HEADER = $(INSTALL_DATA)
transform = @program_transform_name@
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_alias = @build_alias@
build_triplet = @build@
host_alias = @host_alias@
host_triplet = @host@
target_alias = @target_alias@
target_triplet = @target@
 
EXEEXT = @EXEEXT@
OBJEXT = @OBJEXT@
PATH_SEPARATOR = @PATH_SEPARATOR@
AMTAR = @AMTAR@
AR = @AR@
ARFLAGS = @ARFLAGS@
AWK = @AWK@
BUILD_DIR = @BUILD_DIR@
CC = @CC@
CFLAGS = @CFLAGS@
CPU_ARCH = @CPU_ARCH@
DEPDIR = @DEPDIR@
INCLUDES = @INCLUDES@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LOCAL_CFLAGS = @LOCAL_CFLAGS@
LOCAL_DEFS = @LOCAL_DEFS@
MAKE_SHELL = @MAKE_SHELL@
PACKAGE = @PACKAGE@
RANLIB = @RANLIB@
STRIP = @STRIP@
SUMVERSION = @SUMVERSION@
TERMCAP_LIB = @TERMCAP_LIB@
VERSION = @VERSION@
am__include = @am__include@
am__quote = @am__quote@
host = @host@
host_cpu = @host_cpu@
host_os = @host_os@
install_sh = @install_sh@
 
noinst_LIBRARIES = libcuc.a
 
libcuc_a_SOURCES = cuc.c cuc.h load.c bb.c memory.c \
verilog.c timings.c insn.c insn.h adv.c
 
subdir = cuc
mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
LIBRARIES = $(noinst_LIBRARIES)
 
libcuc_a_AR = $(AR) cru
libcuc_a_LIBADD =
am_libcuc_a_OBJECTS = cuc.$(OBJEXT) load.$(OBJEXT) bb.$(OBJEXT) \
memory.$(OBJEXT) verilog.$(OBJEXT) timings.$(OBJEXT) \
insn.$(OBJEXT) adv.$(OBJEXT)
libcuc_a_OBJECTS = $(am_libcuc_a_OBJECTS)
 
DEFS = @DEFS@
DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
CPPFLAGS = @CPPFLAGS@
LDFLAGS = @LDFLAGS@
LIBS = @LIBS@
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
@AMDEP_TRUE@DEP_FILES = ./$(DEPDIR)/adv.Po ./$(DEPDIR)/bb.Po \
@AMDEP_TRUE@ ./$(DEPDIR)/cuc.Po ./$(DEPDIR)/insn.Po \
@AMDEP_TRUE@ ./$(DEPDIR)/load.Po ./$(DEPDIR)/memory.Po \
@AMDEP_TRUE@ ./$(DEPDIR)/timings.Po ./$(DEPDIR)/verilog.Po
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
DIST_SOURCES = $(libcuc_a_SOURCES)
DIST_COMMON = Makefile.am Makefile.in
SOURCES = $(libcuc_a_SOURCES)
 
all: all-am
 
.SUFFIXES:
.SUFFIXES: .c .o .obj
$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
cd $(top_srcdir) && \
$(AUTOMAKE) --gnu cuc/Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
 
clean-noinstLIBRARIES:
-test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES)
libcuc.a: $(libcuc_a_OBJECTS) $(libcuc_a_DEPENDENCIES)
-rm -f libcuc.a
$(libcuc_a_AR) libcuc.a $(libcuc_a_OBJECTS) $(libcuc_a_LIBADD)
$(RANLIB) libcuc.a
 
mostlyclean-compile:
-rm -f *.$(OBJEXT) core *.core
 
distclean-compile:
-rm -f *.tab.c
 
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adv.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bb.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cuc.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/insn.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/load.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/memory.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timings.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/verilog.Po@am__quote@
 
distclean-depend:
-rm -rf ./$(DEPDIR)
 
.c.o:
@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
$(COMPILE) -c `test -f '$<' || echo '$(srcdir)/'`$<
 
.c.obj:
@AMDEP_TRUE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@
@AMDEP_TRUE@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
$(COMPILE) -c `cygpath -w $<`
CCDEPMODE = @CCDEPMODE@
uninstall-info-am:
 
ETAGS = etags
ETAGSFLAGS =
 
tags: TAGS
 
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
mkid -fID $$unique
 
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
test -z "$(ETAGS_ARGS)$$tags$$unique" \
|| $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$tags $$unique
 
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& cd $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) $$here
 
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
 
top_distdir = ..
distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
 
distdir: $(DISTFILES)
@list='$(DISTFILES)'; for file in $$list; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
if test "$$dir" != "$$file" && test "$$dir" != "."; then \
dir="/$$dir"; \
$(mkinstalldirs) "$(distdir)$$dir"; \
else \
dir=''; \
fi; \
if test -d $$d/$$file; then \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
fi; \
cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
else \
test -f $(distdir)/$$file \
|| cp -p $$d/$$file $(distdir)/$$file \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LIBRARIES)
 
installdirs:
 
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
 
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
 
installcheck: installcheck-am
install-strip:
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
INSTALL_STRIP_FLAG=-s \
`test -z '$(STRIP)' || \
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
 
clean-generic:
 
distclean-generic:
-rm -f Makefile $(CONFIG_CLEAN_FILES)
 
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
 
clean-am: clean-generic clean-noinstLIBRARIES mostlyclean-am
 
distclean: distclean-am
 
distclean-am: clean-am distclean-compile distclean-depend \
distclean-generic distclean-tags
 
dvi: dvi-am
 
dvi-am:
 
info: info-am
 
info-am:
 
install-data-am:
 
install-exec-am:
 
install-info: install-info-am
 
install-man:
 
installcheck-am:
 
maintainer-clean: maintainer-clean-am
 
maintainer-clean-am: distclean-am maintainer-clean-generic
 
mostlyclean: mostlyclean-am
 
mostlyclean-am: mostlyclean-compile mostlyclean-generic
 
uninstall-am: uninstall-info-am
 
.PHONY: GTAGS all all-am check check-am clean clean-generic \
clean-noinstLIBRARIES distclean distclean-compile \
distclean-depend distclean-generic distclean-tags distdir dvi \
dvi-am info info-am install install-am install-data \
install-data-am install-exec install-exec-am install-info \
install-info-am install-man install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic tags uninstall uninstall-am \
uninstall-info-am
 
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
/cuc.c
0,0 → 1,887
/* cuc.c -- OpenRISC Custom Unit Compiler
* Copyright (C) 2002 Marko Mlinar, markom@opencores.org
*
* This file is part of OpenRISC 1000 Architectural Simulator.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
 
/* Main file, including code optimization and command prompt */
 
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <assert.h>
#include <ctype.h>
#include <string.h>
#include <unistd.h>
 
#include "config.h"
 
#ifdef HAVE_INTTYPES_H
#include <inttypes.h>
#endif
 
#include "port.h"
#include "arch.h"
#include "abstract.h"
#include "sim-config.h"
#include "cuc.h"
#include "insn.h"
#include "profiler.h"
#include "opcode/or32.h"
#include "parse.h"
#include "debug.h"
 
FILE *flog;
int cuc_debug = 0;
 
/* Last used registers by software convention */
/* Note that r11 is caller saved register, and we can destroy it.
Due to CUC architecture we must always return something, even garbage (so that
caller knows, we are finished, when we send acknowledge).
In case r11 was not used (trivial register assignment) we will remove it later,
but if we assigned a value to it, it must not be removed, so caller_saved[11] = 0 */
const int caller_saved[MAX_REGS] = {
0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 0, 0, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1,
1, 1};
 
/* returns log2(x) */
/* Call this log2_int, because there is a library function named log2 */
int log2_int (unsigned long x)
{
int c = 0;
assert (x >= 0);
if (!x) return 0; /* not by the book, but practical */
while (x != 1) x >>= 1, c++;
return c;
}
 
/* Does all known instruction optimizations */
void cuc_optimize (cuc_func *func)
{
int modified = 0;
int first = 1;
log ("Optimizing.\n");
do {
modified = 0;
clean_deps (func);
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_CLEAN_DEPS");
if (optimize_cmovs (func)) {
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_CMOVS");
modified = 1;
}
if (cuc_debug) cuc_check (func);
if (optimize_tree (func)) {
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_TREE1");
modified = 1;
}
if (remove_nops (func)) {
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
modified = 1;
}
if (cuc_debug) cuc_check (func);
if (remove_dead (func)) {
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD");
modified = 1;
}
if (cuc_debug) cuc_check (func);
if (cse (func)) {
log ("Common subexpression elimination.\n");
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_CSE");
modified = 1;
}
if (first) {
insert_conditional_facts (func);
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_COND_FACT");
if (cuc_debug) cuc_check (func);
first = 0;
}
if (optimize_bb (func)) {
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_OPT_BB");
modified = 1;
}
if (cuc_debug) cuc_check (func);
if (remove_nops (func)) {
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
modified = 1;
}
if (remove_dead_bb (func)) {
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD_BB");
modified = 1;
}
if (remove_trivial_regs (func)) {
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL");
modified = 1;
}
if (remove_nops (func)) {
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
modified = 1;
}
add_memory_dep (func, func->memory_order);
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP");
add_data_dep (func);
if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP");
if (schedule_memory (func, func->memory_order)) {
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM");
modified = 1;
}
} while (modified);
set_io (func);
#if 0
detect_max_values (func);
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_MAX_VALUES");
#endif
}
 
/* Pre/unrolls basic block and optimizes it */
cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j)
{
cuc_func *func;
cucdebug (2, "BB%i unroll %i times preroll %i times\n", b, j, i);
log ("BB%i unroll %i times preroll %i times\n", b, j, i);
func = preunroll_loop (f, b, i, j, bb_filename);
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_PREUNROLL");
cuc_optimize (func);
analyse_timings (func, timings);
 
cucdebug (2, "new_time = %i, old_time = %i, size = %f\n",
timings->new_time, func->orig_time, timings->size);
log ("new time = %icyc, old_time = %icyc, size = %.0f gates\n",
timings->new_time, func->orig_time, timings->size);
//output_verilog (func, argv[1]);
free_func (func);
timings->b = b;
timings->unroll = j;
timings->preroll = i;
timings->nshared = 0;
return timings;
}
 
/* Simple comparison function */
int tim_comp (cuc_timings *a, cuc_timings *b)
{
if (a->new_time < b->new_time) return -1;
else if (a->new_time > b->new_time) return 1;
else return 0;
}
 
/* Analyses function; done when cuc command is entered in (sim) prompt */
cuc_func *analyse_function (char *module_name, long orig_time,
unsigned long start_addr, unsigned long end_addr,
int memory_order, int num_runs)
{
cuc_timings timings;
cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func));
cuc_func *saved;
int b, i, j;
char tmp1[256];
char tmp2[256];
 
func->orig_time = orig_time;
func->start_addr = start_addr;
func->end_addr = end_addr;
func->memory_order = memory_order;
func->nfdeps = 0;
func->fdeps = NULL;
func->num_runs = num_runs;
 
sprintf (tmp1, "%s.bin", module_name);
cucdebug (2, "Loading %s.bin\n", module_name);
if (cuc_load (tmp1)) {
free (func);
return NULL;
}
 
log ("Detecting basic blocks\n");
detect_bb (func);
if (cuc_debug >= 2) print_cuc_insns ("WITH_BB_LIMITS", 0);
 
//sprintf (tmp1, "%s.bin.mp", module_name);
sprintf (tmp2, "%s.bin.bb", module_name);
generate_bb_seq (func, config.sim.mprof_fn, tmp2);
log ("Assuming %i clk cycle load (%i cyc burst)\n", runtime.cuc.mdelay[0], runtime.cuc.mdelay[2]);
log ("Assuming %i clk cycle store (%i cyc burst)\n", runtime.cuc.mdelay[1], runtime.cuc.mdelay[3]);
build_bb (func);
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_BUILD_BB");
reg_dep (func);
 
log ("Detecting dependencies\n");
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_REG_DEP");
cuc_optimize (func);
#if 0
csm (func);
#endif
assert (saved = dup_func (func));
 
timings.preroll = timings.unroll = 1;
timings.nshared = 0;
 
add_latches (func);
if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES");
analyse_timings (func, &timings);
 
free_func (func);
log ("Base option: pre%i,un%i,sha%i: %icyc %.1f\n",
timings.preroll, timings.unroll, timings.nshared, timings.new_time, timings.size);
saved->timings = timings;
#if 1
/* detect and unroll simple loops */
for (b = 0; b < saved->num_bb; b++) {
cuc_timings t[MAX_UNROLL * MAX_PREROLL];
cuc_timings *ut;
cuc_timings *cut = &t[0];
int nt = 1;
double csize;
saved->bb[b].selected_tim = -1;
 
/* Is it a loop? */
if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue;
log ("Found loop at BB%x. Trying to unroll.\n", b);
t[0] = timings;
t[0].b = b;
t[0].preroll = 1;
t[0].unroll = 1;
t[0].nshared = 0;
sprintf (tmp1, "%s.bin.bb", module_name);
i = 1;
do {
cuc_timings *pt;
cuc_timings *cpt = cut;
j = 1;
 
do {
pt = cpt;
cpt = preunroll_bb (tmp1, saved, &t[nt++], b, ++j, i);
} while (j <= MAX_PREROLL && pt->new_time > cpt->new_time);
i++;
ut = cut;
cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i);
} while (i <= MAX_UNROLL && ut->new_time > cut->new_time);
 
/* Sort the timings */
#if 0
if (cuc_debug >= 3)
for (i = 0; i < nt; i++) PRINTF ("%i:%i,%i: %icyc\n",
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time);
#endif
 
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
 
/* Delete timings, that have worst time and bigger size than other */
j = 1;
csize = t[0].size;
for (i = 1; i < nt; i++)
if (t[i].size < csize) t[j++] = t[i];
nt = j;
cucdebug (1, "Available options\n");
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
/* Add results from CSM */
j = nt;
for (i = 0; i < saved->bb[b].ntim; i++) {
int i1;
for (i1 = 0; i1 < nt; i1++) {
t[j] = t[i1];
t[j].size += saved->bb[b].tim[i].size - timings.size;
t[j].new_time += saved->bb[b].tim[i].new_time - timings.new_time;
t[j].nshared = saved->bb[b].tim[i].nshared;
t[j].shared = saved->bb[b].tim[i].shared;
if (++j >= MAX_UNROLL * MAX_PREROLL) goto full;
}
}
full:
nt = j;
 
cucdebug (1, "Available options:\n");
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
 
/* Sort again with new timings added */
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
 
/* Delete timings, that have worst time and bigger size than other */
j = 1;
csize = t[0].size;
for (i = 1; i < nt; i++)
if (t[i].size < csize) t[j++] = t[i];
nt = j;
 
cucdebug (1, "Available options:\n");
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
 
if (saved->bb[b].ntim) free (saved->bb[b].tim);
saved->bb[b].ntim = nt;
assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt));
 
/* Copy options in reverse order -- smallest first */
for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i];
 
log ("Available options:\n");
for (i = 0; i < saved->bb[b].ntim; i++) {
log ("%i:pre%i,un%i,sha%i: %icyc %.1f\n",
saved->bb[b].tim[i].b, saved->bb[b].tim[i].preroll, saved->bb[b].tim[i].unroll,
saved->bb[b].tim[i].nshared, saved->bb[b].tim[i].new_time, saved->bb[b].tim[i].size);
}
}
#endif
return saved;
}
 
/* Utility option formatting functions */
static const char *option_char = "?abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
 
/*static */char *gen_option (char *s, int bb_no, int f_opt)
{
if (bb_no >= 0) sprintf (s, "%i", bb_no);
assert (f_opt <= strlen (option_char));
sprintf (s, "%s%c", s, option_char[f_opt]);
return s;
}
 
/*static */void print_option (int bb_no, int f_opt)
{
char tmp1[10];
char tmp2[10];
sprintf (tmp2, "%s", gen_option (tmp1, bb_no, f_opt));
PRINTF ("%3s", tmp2);
}
 
static char *format_func_options (char *s, cuc_func *f)
{
int b, first = 1;
*s = '\0';
for (b = 0; b < f->num_bb; b++)
if (f->bb[b].selected_tim >= 0) {
char tmp[10];
sprintf (s, "%s%s%s", s, first ? "" : ",", gen_option (tmp, b, f->bb[b].selected_tim));
first = 0;
}
return s;
}
 
static void options_cmd (int func_no, cuc_func *f)
{
int b, i;
char tmp[30];
char *name = prof_func[func_no].name;
PRINTF ("-----------------------------------------------------------------------------\n");
PRINTF ("|%-28s|pre/unrolled|shared| time | gates |old_time|\n",
strstrip (tmp, name, 28));
PRINTF ("| BASE |%4i / %4i | %4i |%8i|%8.f|%8i|\n", 1, 1, 0,
f->timings.new_time, f->timings.size, f->orig_time);
for (b = 0; b < f->num_bb; b++) {
/* Print out results */
for (i = 1; i < f->bb[b].ntim; i++) { /* First one is base option */
int time = f->bb[b].tim[i].new_time - f->timings.new_time;
double size = f->bb[b].tim[i].size - f->timings.size;
PRINTF ("| ");
print_option (b, i);
PRINTF (" |%4i / %4i | %4i |%+8i|%+8.f| |\n",
f->bb[b].tim[i].preroll, f->bb[b].tim[i].unroll, f->bb[b].tim[i].nshared,
time, size);
}
}
}
 
/* Generates a function, based on specified parameters */
cuc_func *generate_function (cuc_func *rf, char *name, char *cut_filename)
{
int b;
char tmp[256];
cuc_timings tt;
cuc_func *f;
assert (f = dup_func (rf));
 
if (cuc_debug >= 2) print_cuc_bb (f, "BEFORE_GENERATE");
log ("Generating function %s.\n", name);
PRINTF ("Generating function %s.\n", name);
 
format_func_options (tmp, rf);
if (strlen (tmp)) PRINTF ("Applying options: %s\n", tmp);
else PRINTF ("Using basic options.\n");
 
/* Generate function as specified by options */
for (b = 0; b < f->num_bb; b++) {
cuc_timings *st;
if (rf->bb[b].selected_tim < 0) continue;
st = &rf->bb[b].tim[rf->bb[b].selected_tim];
sprintf (tmp, "%s.bin.bb", name);
preunroll_bb (&tmp[0], f, &tt, b, st->preroll, st->unroll);
if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_PREUNROLL");
}
for (b = 0; b < f->num_bb; b++) {
cuc_timings *st;
if (rf->bb[b].selected_tim < 0) continue;
st = &rf->bb[b].tim[rf->bb[b].selected_tim];
if (!st->nshared) continue;
assert (0);
//csm_gen (f, rf, st->nshared, st->shared);
}
add_latches (f);
if (cuc_debug >= 1) print_cuc_bb (f, "AFTER_LATCHES");
analyse_timings (f, &tt);
sprintf (tmp, "%s%s", cut_filename, name);
output_verilog (f, tmp, name);
return f;
}
 
/* Calculates required time, based on selected options */
int calc_cycles (cuc_func *f)
{
int b, ntime = f->timings.new_time;
for (b = 0; b < f->num_bb; b++)
if (f->bb[b].selected_tim >= 0) {
assert (f->bb[b].selected_tim < f->bb[b].ntim);
ntime += f->bb[b].tim[f->bb[b].selected_tim].new_time - f->timings.new_time;
}
return ntime;
}
 
/* Calculates required size, based on selected options */
double calc_size (cuc_func *f)
{
int b;
double size = f->timings.size;
for (b = 0; b < f->num_bb; b++)
if (f->bb[b].selected_tim >= 0) {
assert (f->bb[b].selected_tim < f->bb[b].ntim);
size += f->bb[b].tim[f->bb[b].selected_tim].size - f->timings.size;
}
return size;
}
 
/* Dumps specified function to file (hex) */
unsigned long extract_function (char *out_fn, unsigned long start_addr)
{
FILE *fo;
unsigned long a = start_addr;
int x = 0;
assert (fo = fopen (out_fn, "wt+"));
 
do {
unsigned long d = evalsim_mem32 (a);
int index = insn_decode (d);
assert (index >= 0);
if (x) x++;
if (strcmp (insn_name (index), "l.jr") == 0) x = 1;
a += 4;
fprintf (fo, "%08lx\n", d);
} while (x < 2);
 
fclose (fo);
return a - 4;
}
 
static cuc_func *func[MAX_FUNCS];
static int func_v[MAX_FUNCS];
 
/* Detects function dependencies and removes */
static void set_func_deps ()
{
int f, b, i, j;
restart:
for (f = 0; f < prof_nfuncs - 1; f++) if (func[f]) {
int fused[MAX_FUNCS] = {0};
int c;
for (b = 0; b < func[f]->num_bb; b++)
for (i = 0; i < func[f]->bb[b].ninsn; i++) {
cuc_insn *ii = &func[f]->bb[b].insn[i];
if (ii->index == II_CALL) {
assert (ii->opt[0] == OPT_CONST);
for (j = 0; j < prof_nfuncs - 1; j++)
if (func[j] && func[j]->start_addr == ii->op[0]) break;
if (j >= prof_nfuncs - 1) {
log ("%s is calling unknown function, address %08lx\n",
prof_func[f].name, ii->op[0]);
debug (1, "%s is calling unknown function, address %08lx\n",
prof_func[f].name, ii->op[0]);
free_func (func[f]);
func[f] = NULL;
goto restart;
} else if (f == j) {
log ("%s is recursive, ignoring\n", prof_func[f].name);
debug (1, "%s is recursive, ignoring\n", prof_func[f].name);
free_func (func[f]);
func[f] = NULL;
goto restart;
} else fused[j]++;
}
}
for (i = 0; i < MAX_FUNCS; i++) if (fused[i]) c++;
if (func[f]->nfdeps) free (func[f]->fdeps);
func[f]->nfdeps = c;
func[f]->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * c);
for (i = 0, j = 0; i < MAX_FUNCS; i++)
if (fused[i]) func[f]->fdeps[j++] = func[i];
}
 
/* Detect loops */
{
int change;
for (f = 0; f < MAX_FUNCS; f++) if (func[f]) func[f]->tmp = 0;
do {
change = 0;
for (f = 0; f < MAX_FUNCS; f++) if (func[f] && !func[f]->tmp) {
int o = 1;
for (i = 0; i < func[f]->nfdeps; i++)
if (!func[f]->fdeps[i]->tmp) {o = 0; break;}
if (o) {
func[f]->tmp = 1;
change = 1;
}
}
} while (change);
change = 0;
for (f = 0; f < MAX_FUNCS; f++) if (func[f] && !func[f]->tmp) {
free_func (func[f]);
func[f] = NULL;
change = 1;
}
if (change) goto restart;
}
}
 
void main_cuc (char *filename)
{
int i, j;
char tmp1[256];
char filename_cut[256];
#if 0 /* Select prefix, based on binary program name */
for (i = 0; i < sizeof (filename_cut); i++) {
if (isalpha(filename[i])) filename_cut[i] = filename[i];
else {
filename_cut[i] = '\0';
break;
}
}
#else
strcpy (filename_cut, "cu");
#endif
 
PRINTF ("Entering OpenRISC Custom Unit Compiler command prompt\n");
PRINTF ("Using profile file \"%s\" and memory profile file \"%s\".\n", config.sim.prof_fn, config.sim.mprof_fn);
sprintf (tmp1, "%s.log", filename_cut);
PRINTF ("Analyzing. (log file \"%s\").\n", tmp1);
assert (flog = fopen (tmp1, "wt+"));
 
/* Loads in the specified timings table */
PRINTF ("Using timings from \"%s\" at %s\n",config.cuc.timings_fn,
generate_time_pretty (tmp1, config.sim.clkcycle_ps));
load_timing_table (config.cuc.timings_fn);
runtime.cuc.cycle_duration = 1000. * config.sim.clkcycle_ps;
PRINTF ("Multicycle logic %s, bursts %s, %s memory order.\n",
config.cuc.no_multicycle ? "OFF" : "ON", config.cuc.enable_bursts ? "ON" : "OFF",
config.cuc.memory_order == MO_NONE ? "no" : config.cuc.memory_order == MO_WEAK ? "weak" :
config.cuc.memory_order == MO_STRONG ? "strong" : "exact");
 
prof_set (1, 0);
assert (prof_acquire (config.sim.prof_fn) == 0);
if (config.cuc.calling_convention)
PRINTF ("Assuming OpenRISC standard calling convention.\n");
 
/* Try all functions except "total" */
for (i = 0; i < prof_nfuncs - 1; i++) {
long orig_time;
unsigned long start_addr, end_addr;
orig_time = prof_func[i].cum_cycles;
start_addr = prof_func[i].addr;
/* Extract the function from the binary */
sprintf (tmp1, "%s.bin", prof_func[i].name);
end_addr = extract_function (tmp1, start_addr);
log ("Testing function %s (%08lx - %08lx)\n", prof_func[i].name, start_addr,
end_addr);
PRINTF ("Testing function %s (%08lx - %08lx)\n", prof_func[i].name,
start_addr, end_addr);
func[i] = analyse_function (prof_func[i].name, orig_time, start_addr,
end_addr, config.cuc.memory_order, prof_func[i].calls);
func_v[i] = 0;
}
set_func_deps ();
while (1) {
char *s;
wait_command:
PRINTF ("(cuc) ");
fflush (stdout);
wait_command_empty:
s = fgets(tmp1, sizeof tmp1, stdin);
usleep (100);
if (!s) goto wait_command_empty;
for (s = tmp1; *s != '\0' && *s != '\n' && *s != '\r'; s++);
*s = '\0';
 
/* quit command */
if (strcmp (tmp1, "q") == 0 || strcmp (tmp1, "quit") == 0) {
/* Delete temporary files */
for (i = 0; i < prof_nfuncs - 1; i++) {
sprintf (tmp1, "%s.bin", prof_func[i].name);
log ("Deleting temporary file %s %s\n", tmp1, remove (tmp1) ? "FAILED" : "OK");
sprintf (tmp1, "%s.bin.bb", prof_func[i].name);
log ("Deleting temporary file %s %s\n", tmp1, remove (tmp1) ? "FAILED" : "OK");
}
break;
/* profile command */
} else if (strcmp (tmp1, "p") == 0 || strcmp (tmp1, "profile") == 0) {
int ntime = 0;
int size = 0;
PRINTF ("-----------------------------------------------------------------------------\n");
PRINTF ("|function name |calls|avg cycles |old%%| max. f. | impr. f.| options |\n");
PRINTF ("|--------------------+-----+------------+----+----------|---------+---------|\n");
for (j = 0; j < prof_nfuncs; j++) {
int bestcyc = 0, besti = 0;
char tmp[100];
for (i = 0; i < prof_nfuncs; i++)
if (prof_func[i].cum_cycles > bestcyc) {
bestcyc = prof_func[i].cum_cycles;
besti = i;
}
i = besti;
PRINTF ("|%-20s|%5li|%12.1f|%3.0f%%| ",
strstrip (tmp, prof_func[i].name, 20), prof_func[i].calls,
((double)prof_func[i].cum_cycles / prof_func[i].calls),
(100. * prof_func[i].cum_cycles / prof_cycles));
if (func[i]) {
double f = 1.0;
if (func_v[i]) {
int nt = calc_cycles (func[i]);
int s = calc_size (func[i]);
f = 1. * func[i]->orig_time / nt;
ntime += nt;
size += s;
} else ntime += prof_func[i].cum_cycles;
PRINTF ("%8.1f |%8.1f | %-8s|\n", 1.f * prof_func[i].cum_cycles
/ func[i]->timings.new_time, f, format_func_options (tmp, func[i]));
} else {
PRINTF (" N/A | N/A | N/A |\n");
ntime += prof_func[i].cum_cycles;
}
prof_func[i].cum_cycles = -prof_func[i].cum_cycles;
}
for (i = 0; i < prof_nfuncs; i++)
prof_func[i].cum_cycles = -prof_func[i].cum_cycles;
PRINTF ("-----------------------------------------------------------------------------\n");
PRINTF ("Total %i cycles (was %i), total added gates = %i. Speed factor %.1f\n",
ntime, prof_cycles, size, 1. * prof_cycles / ntime);
/* debug command */
} else if (strncmp (tmp1, "d", 1) == 0 || strncmp (tmp1, "debug", 5) == 0) {
sscanf (tmp1, "%*s %i", &cuc_debug);
if (cuc_debug < 0) cuc_debug = 0;
if (cuc_debug > 9) cuc_debug = 9;
 
/* generate command */
} else if (strcmp (tmp1, "g") == 0 || strcmp (tmp1, "generate") == 0) {
/* check for function dependencies */
for (i = 0; i < prof_nfuncs; i++)
if (func[i]) func[i]->tmp = func_v[i];
for (i = 0; i < prof_nfuncs; i++) if (func[i])
for (j = 0; j < func[i]->nfdeps; j++)
if (!func[i]->fdeps[j] || !func[i]->fdeps[j]->tmp) {
PRINTF ("Function %s must be selected for translation (required by %s)\n",
prof_func[j].name, prof_func[i].name);
goto wait_command;
}
for (i = 0; i < prof_nfuncs; i++)
if (func[i] && func_v[i]) generate_function (func[i], prof_func[i].name, filename_cut);
generate_main (prof_nfuncs, func, filename_cut);
 
/* list command */
} else if (strcmp (tmp1, "l") == 0 || strcmp (tmp1, "list") == 0) {
/* check for function dependencies */
for (i = 0; i < prof_nfuncs; i++)
if (func_v[i]) {
PRINTF ("%s\n", prof_func[j].name);
}
 
/* selectall command */
} else if (strcmp (tmp1, "sa") == 0 || strcmp (tmp1, "selectall") == 0) {
int f;
for (f = 0; f < prof_nfuncs; f++) if (func[f]) {
func_v[f] = 1;
PRINTF ("Function %s selected for translation.\n", prof_func[f].name);
}
/* select command */
} else if (strncmp (tmp1, "s", 1) == 0 || strncmp (tmp1, "select", 6) == 0) {
char tmp[50], ch;
int p, o, b, f;
p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch);
if (p < 1) PRINTF ("Invalid parameters.\n");
else {
/* Check if we have valid option */
for (f = 0; f < prof_nfuncs; f++)
if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break;
if (f < prof_nfuncs) {
if (p == 1) {
if (func[f]) {
func_v[f] = 1;
PRINTF ("Function %s selected for translation.\n", prof_func[f].name);
} else PRINTF ("Function %s not suitable for translation.\n", prof_func[f].name);
} else {
if (!func_v[f])
PRINTF ("Function %s not yet selected for translation.\n", prof_func[f].name);
if (p < 3) goto invalid_option;
for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++);
if (!option_char[o]) goto invalid_option;
if (b < 0 || b >= func[f]->num_bb) goto invalid_option;
if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option;
/* select an option */
func[f]->bb[b].selected_tim = o;
if (func[f]->bb[b].tim[o].nshared) {
PRINTF ("Option has shared instructions: ");
print_shared (func[f], func[f]->bb[b].tim[o].shared, func[f]->bb[b].tim[o].nshared);
PRINTF ("\n");
}
goto wait_command;
invalid_option:
PRINTF ("Invalid option.\n");
}
} else PRINTF ("Invalid function.\n");
}
 
/* unselect command */
} else if (strncmp (tmp1, "u", 1) == 0 || strncmp (tmp1, "unselect", 8) == 0) {
char tmp[50], ch;
int p, o, b, f;
p = sscanf (tmp1, "%*s %s %i%c", tmp, &b, &ch);
if (p < 1) PRINTF ("Invalid parameters.\n");
else {
/* Check if we have valid option */
for (f = 0; f < prof_nfuncs; f++)
if (strcmp (prof_func[f].name, tmp) == 0 && func[f]) break;
if (f < prof_nfuncs) {
if (p == 1) {
if (func[f]) {
func_v[f] = 0;
PRINTF ("Function %s unselected for translation.\n", prof_func[f].name);
} else PRINTF ("Function %s not suitable for translation.\n", prof_func[f].name);
} else {
if (p < 3) goto invalid_option;
for (o = 0; option_char[o] != '\0' && option_char[o] != ch; o++);
if (!option_char[o]) goto invalid_option;
if (b < 0 || b >= func[f]->num_bb) goto invalid_option;
if (o < 0 || o >= func[f]->bb[b].ntim) goto invalid_option;
/* select an option */
func[f]->bb[b].selected_tim = -1;
}
} else PRINTF ("Invalid function.\n");
}
/* options command */
} else if (strcmp (tmp1, "o") == 0 || strcmp (tmp1, "options") == 0) {
int any = 0;
PRINTF ("Available options:\n");
for (i = 0; i < prof_nfuncs; i++)
if (func[i]) {
options_cmd (i, func[i]);
any = 1;
}
if (any) PRINTF ("-----------------------------------------------------------------------------\n");
else PRINTF ("Sorry. No available options.\n");
 
/* Ignore empty string */
} else if (strcmp (tmp1, "") == 0) {
 
/* help command */
} else {
if (strcmp (tmp1, "h") != 0 && strcmp (tmp1, "help") != 0)
PRINTF ("Unknown command.\n");
PRINTF ("OpenRISC Custom Unit Compiler command prompt\n");
PRINTF ("Available commands:\n");
PRINTF (" h | help displays this help\n");
PRINTF (" q | quit returns to or1ksim prompt\n");
PRINTF (" p | profile displays function profiling\n");
PRINTF (" d | debug # sets debug level (0-9)\n");
PRINTF (" o | options displays available options\n");
PRINTF (" s | select func [option] selects an option/function\n");
PRINTF (" u | unselect func [option] unselects an option/function\n");
PRINTF (" g | generate generates verilog file\n");
PRINTF (" l | list displays selected functions\n");
}
}
 
/* Dispose memory */
for (i = 0; i < prof_nfuncs -1; i++)
if (func[i]) free_func (func[i]);
 
fclose (flog);
}
 
/*----------------------------------------------------[ CUC Configuration ]---*/
void cuc_calling_convention(union param_val val, void *dat)
{
config.cuc.calling_convention = val.int_val;
}
 
void cuc_enable_bursts(union param_val val, void *dat)
{
config.cuc.enable_bursts = val.int_val;
}
 
void cuc_no_multicycle(union param_val val, void *dat)
{
config.cuc.no_multicycle = val.int_val;
}
 
void cuc_memory_order(union param_val val, void *dat)
{
if (strcmp (val.str_val, "none") == 0)
config.cuc.memory_order = MO_NONE;
else if (strcmp (val.str_val, "weak") == 0)
config.cuc.memory_order = MO_WEAK;
else if (strcmp (val.str_val, "strong") == 0)
config.cuc.memory_order = MO_STRONG;
else if (strcmp (val.str_val, "exact") == 0) {
config.cuc.memory_order = MO_EXACT;
} else {
char tmp[200];
sprintf (tmp, "invalid memory order '%s'.\n", val.str_val);
CONFIG_ERROR(tmp);
}
}
 
void cuc_timings_fn(union param_val val, void *dat)
{
strcpy(config.cuc.timings_fn, val.str_val);
}
 
void reg_cuc_sec(void)
{
struct config_section *sec = reg_config_sec("cuc", NULL, NULL);
 
reg_config_param(sec, "calling_convention", paramt_int, cuc_calling_convention);
reg_config_param(sec, "enable_bursts", paramt_int, cuc_enable_bursts);
reg_config_param(sec, "no_multicycle", paramt_int, cuc_no_multicycle);
reg_config_param(sec, "memory_order", paramt_word, cuc_memory_order);
reg_config_param(sec, "timings_fn", paramt_str, cuc_timings_fn);
}
cuc.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: load.c =================================================================== --- load.c (nonexistent) +++ load.c (revision 1765) @@ -0,0 +1,536 @@ +/* load.c -- OpenRISC Custom Unit Compiler, instruction loading and converting + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "abstract.h" +#include "sim-config.h" +#include "cuc.h" +#include "opcode/or32.h" +#include "insn.h" + +static const cuc_conv conv[] = { +{"l.add", II_ADD}, {"l.addi", II_ADD}, +{"l.movhi", II_OR}, +{"l.sub", II_SUB}, {"l.subi", II_SUB}, +{"l.and", II_AND}, {"l.andi", II_AND}, +{"l.xor", II_XOR}, {"l.xori", II_XOR}, +{"l.or", II_OR }, {"l.ori", II_OR}, +{"l.mul", II_MUL}, {"l.muli", II_MUL}, + +{"l.sra", II_SRA}, {"l.srai", II_SRA}, +{"l.srl", II_SRL}, {"l.srli", II_SRL}, +{"l.sll", II_SLL}, {"l.slli", II_SLL}, + +{"l.lbz",II_LB | II_MEM}, {"l.lbs", II_LB | II_MEM | II_SIGNED}, +{"l.lhz",II_LH | II_MEM}, {"l.lhs", II_LH | II_MEM | II_SIGNED}, +{"l.lwz",II_LW | II_MEM}, {"l.lws", II_LW | II_MEM | II_SIGNED}, +{"l.sb", II_SB | II_MEM}, {"l.sh", II_SH | II_MEM}, {"l.sw", II_SW | II_MEM}, +{"l.sfeq", II_SFEQ }, {"l.sfeqi", II_SFEQ}, +{"l.sfne", II_SFNE }, {"l.sfnei", II_SFNE}, +{"l.sflts", II_SFLT | II_SIGNED}, {"l.sfltis", II_SFLT | II_SIGNED}, +{"l.sfltu", II_SFLT}, {"l.sfltiu", II_SFLT}, +{"l.sfgts", II_SFGT | II_SIGNED}, {"l.sfgtis", II_SFGT | II_SIGNED}, +{"l.sfgtu", II_SFGT}, {"l.sfgtiu", II_SFGT}, +{"l.sfges", II_SFGE | II_SIGNED}, {"l.sfgeis", II_SFGE | II_SIGNED}, +{"l.sfgeu", II_SFGE}, {"l.sfgeiu", II_SFGE}, +{"l.sfles", II_SFLE | II_SIGNED}, {"l.sfleis", II_SFLE | II_SIGNED}, +{"l.sfleu", II_SFLE}, {"l.sfleiu", II_SFLE}, +{"l.j", II_BF }, +{"l.bf", II_BF }, +{"l.jal", II_CALL }, +{"l.nop", II_NOP } +}; + +/* Instructions from function */ +cuc_insn insn[MAX_INSNS]; +int num_insn; +int reloc[MAX_INSNS]; + +/* Prints out instructions */ +void print_cuc_insns (char *s, int verbose) +{ + PRINTF ("****************** %s ******************\n", s); + print_insns (0, insn, num_insn,verbose); + PRINTF ("\n\n"); +} + +void xchg_insn (int i, int j) +{ + cuc_insn t; + t = insn[i]; + insn[i] = insn[j]; + insn[j] = t; +} + +/* Negates conditional instruction */ +void negate_conditional (cuc_insn *ii) +{ + assert (ii->type & IT_COND); + + if (ii->index == II_SFEQ) change_insn_type (ii, II_SFNE); + else if (ii->index == II_SFNE) change_insn_type (ii, II_SFEQ); + else if (ii->index == II_SFLT) change_insn_type (ii, II_SFGE); + else if (ii->index == II_SFGT) change_insn_type (ii, II_SFLE); + else if (ii->index == II_SFLE) change_insn_type (ii, II_SFGT); + else if (ii->index == II_SFGE) change_insn_type (ii, II_SFLT); + else assert (0); +} + +/* Remove delay slots */ +void remove_dslots () +{ + int i; + int in_delay = 0; + for (i = 0; i < num_insn; i++) { + if (in_delay) insn[i].type |= IT_INDELAY; + in_delay = 0; + if (insn[i].type & IT_BRANCH) in_delay = 1; + if (insn[i].type & IT_INDELAY) { + cuc_insn *ii; + cuc_insn *bi; + assert (i >= 2); + ii = &insn[i - 2]; + bi = &insn[i - 1]; + /* delay slot should not be a branch target! */ + assert ((insn[i].type & IT_BBSTART) == 0); + assert ((bi->type & IT_INDELAY) == 0); + insn[i].type &= ~IT_INDELAY; /* no more in delay slot */ + + /* Get the value we need before the actual jump */ + if (bi->opt[1] & OPT_REGISTER && bi->op[1] >= 0) { + int r = bi->op[1]; + assert (ii->index == II_NOP); + change_insn_type (ii, II_ADD); + ii->type = IT_COND; + ii->dep = NULL; + ii->op[0] = r; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = r; ii->opt[1] = OPT_REGISTER; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + bi->op[1] = i - 2; bi->opt[1] = OPT_REF; + } + xchg_insn (i, i - 1); + } + } + assert (in_delay == 0); +} + +/* Convert local variables (uses stack frame -- r1) to internal values */ +void detect_locals () +{ + int stack[CUC_MAX_STACK]; + int i, can_remove_stack = 1; + int real_stack_size = 0; + + for (i = 0; i < CUC_MAX_STACK; i++) stack[i] = -1; + + for (i = 0; i < num_insn; i++) { + /* sw off (r1),rx */ + if (insn[i].index == II_SW + && (insn[i].opt[0] & OPT_CONST) + && insn[i].op[1] == 1 && (insn[i].opt[1] & OPT_REGISTER)) { + + if (insn[i].op[0] < CUC_MAX_STACK/* && insn[i].op[1] >= 4*/) { /* Convert to normal move */ + stack[insn[i].op[0]] = i; + insn[i].type &= IT_INDELAY | IT_BBSTART; + change_insn_type (&insn[i], II_ADD); + insn[i].op[0] = -1; insn[i].opt[0] = OPT_REGISTER | OPT_DEST; + insn[i].op[1] = insn[i].op[2]; insn[i].opt[1] = insn[i].opt[2]; + insn[i].op[2] = 0; insn[i].opt[2] = OPT_CONST; + } else can_remove_stack = 0; + /* lw rx,off (r1) */ + } else if (insn[i].index == II_LW + && (insn[i].opt[1] & OPT_CONST) + && insn[i].op[2] == 1 && (insn[i].opt[2] & OPT_REGISTER)) { + + if (insn[i].op[1] < CUC_MAX_STACK && stack[insn[i].op[1]] >= 0) { /* Convert to normal move */ + insn[i].type &= IT_INDELAY | IT_BBSTART; + change_insn_type (&insn[i], II_ADD); + insn[i].op[1] = stack[insn[i].op[1]]; insn[i].opt[1] = OPT_REF; + insn[i].op[2] = 0; insn[i].opt[2] = OPT_CONST; + } else can_remove_stack = 0; + /* Check for defined stack size */ + } else if (insn[i].index == II_ADD && !real_stack_size + && (insn[i].opt[0] & OPT_REGISTER) && insn[i].op[0] == 1 + && (insn[i].opt[1] & OPT_REGISTER) && insn[i].op[1] == 1 + && (insn[i].opt[2] & OPT_CONST)) { + real_stack_size = -insn[i].op[2]; + } + } + //assert (can_remove_stack); /* TODO */ +} + +/* Disassemble one instruction from insn index and generate parameters */ +const char *build_insn (unsigned long data, cuc_insn *insn) +{ + const char *name; + char *s; + extern char *disassembled; + int index = insn_decode (data); + struct or32_opcode const *opcode; + int i, argc = 0; + + insn->insn = data; + insn->index = -1; + insn->type = 0; + name = insn_name (index); + insn->index = index; + disassemble_index (data, index); + strcpy (insn->disasm, disassembled); + insn->dep = NULL; + for (i = 0; i < MAX_OPERANDS; i++) insn->opt[i] = OPT_NONE; + + if (index < 0) { + fprintf (stderr, "Invalid opcode 0x%08lx!\n", data); + exit (1); + } + opcode = &or32_opcodes[index]; + + for (s = opcode->args; *s != '\0'; ++s) { + switch (*s) { + case '\0': return name; + case 'r': + insn->opt[argc] = OPT_REGISTER | (argc ? 0 : OPT_DEST); + insn->op[argc++] = or32_extract(*++s, opcode->encoding, data); + break; + + default: + if (strchr (opcode->encoding, *s)) { + unsigned long imm = or32_extract (*s, opcode->encoding, data); + imm = extend_imm(imm, *s); + insn->opt[argc] = OPT_CONST; + insn->op[argc++] = imm; + } + } + } + return name; +} + +/* inserts nop before branch */ +void expand_branch () +{ + int i, j, num_bra = 0, d; + for (i = 0; i < num_insn; i++) if (insn[i].type & IT_BRANCH) num_bra++; + + d = num_insn + 2 * num_bra; + assert (d < MAX_INSNS); + + /* Add nop before branch */ + for (i = num_insn - 1; i >= 0; i--) if (insn[i].type & IT_BRANCH) { + insn[--d] = insn[i]; // for delay slot (later) + if (insn[d].opt[1] & OPT_REGISTER) { + assert (insn[d].op[1] == FLAG_REG); + insn[d].op[1] = i; insn[d].opt[1] = OPT_REF; + } + insn[--d] = insn[i]; // for branch + change_insn_type (&insn[d], II_NOP); + insn[--d] = insn[i]; // save flag & negation of conditional, if required + change_insn_type (&insn[d], II_CMOV); + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; + insn[d].op[1] = insn[d].type & IT_FLAG1 ? 0 : 1; insn[d].opt[1] = OPT_CONST; + insn[d].op[2] = insn[d].type & IT_FLAG1 ? 1 : 0; insn[d].opt[2] = OPT_CONST; + insn[d].op[3] = FLAG_REG; insn[d].opt[3] = OPT_REGISTER; + insn[d].type = IT_COND; + if (insn[d].type) + reloc[i] = d; + } else { + insn[--d] = insn[i]; + reloc[i] = d; + } + num_insn += 2 * num_bra; + for (i = 0; i < num_insn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP) + insn[i].op[j] = reloc[insn[i].op[j]]; +} + +/* expands immediate memory instructions to two */ +void expand_memory () +{ + int i, j, num_mem = 0, d; + for (i = 0; i < num_insn; i++) if (insn[i].type & IT_MEMORY) num_mem++; + + d = num_insn + num_mem; + assert (d < MAX_INSNS); + + /* Split memory commands */ + for (i = num_insn - 1; i >= 0; i--) if (insn[i].type & IT_MEMORY) { + insn[--d] = insn[i]; + insn[--d] = insn[i]; + reloc[i] = d; + switch (insn[d].index) { + case II_SW: + case II_SH: + case II_SB: + insn[d + 1].op[1] = d; insn[d + 1].opt[1] = OPT_REF; /* sw rx,(t($-1)) */ + insn[d + 1].op[0] = insn[i].op[2]; insn[d + 1].opt[0] = insn[d + 1].opt[2]; + insn[d + 1].opt[2] = OPT_NONE; + insn[d + 1].type &= ~IT_BBSTART; + insn[d].op[2] = insn[d].op[0]; insn[d].opt[2] = insn[d].opt[0]; + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; /* add rd, ra, rb */ + insn[d].opt[3] = OPT_NONE; + insn[d].type &= IT_INDELAY | IT_BBSTART; + insn[d].type |= IT_MEMADD; + change_insn_type (&insn[d], II_ADD); + break; + case II_LW: + case II_LH: + case II_LB: + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; /* add rd, ra, rb */ + insn[d].type &= IT_INDELAY | IT_BBSTART; + insn[d].type |= IT_MEMADD; + change_insn_type (&insn[d], II_ADD); + insn[d + 1].op[1] = d; insn[d + 1].opt[1] = OPT_REF; /* lw (t($-1)),rx */ + insn[d + 1].opt[2] = OPT_NONE; + insn[d + 1].opt[3] = OPT_NONE; + insn[d + 1].type &= ~IT_BBSTART; + break; + default: fprintf (stderr, "%4i, %4i: %s\n", i, d, cuc_insn_name (&insn[d])); + assert (0); + } + } else { + insn[--d] = insn[i]; + reloc[i] = d; + } + num_insn += num_mem; + for (i = 0; i < num_insn; i++) if (!(insn[i].type & IT_MEMORY)) + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP) + insn[i].op[j] = reloc[insn[i].op[j]]; +} + +/* expands signed comparisons to three instructions */ +void expand_signed () +{ + int i, j, num_sig = 0, d; + for (i = 0; i < num_insn; i++) + if (insn[i].type & IT_SIGNED && !(insn[i].type & IT_MEMORY)) num_sig++; + + d = num_insn + num_sig * 2; + assert (d < MAX_INSNS); + + /* Split signed instructions */ + for (i = num_insn - 1; i >= 0; i--) + /* We will expand signed memory later */ + if (insn[i].type & IT_SIGNED && !(insn[i].type & IT_MEMORY)) { + insn[--d] = insn[i]; + insn[d].op[1] = d - 2; insn[d].opt[1] = OPT_REF; + insn[d].op[2] = d - 1; insn[d].opt[2] = OPT_REF; + + insn[--d] = insn[i]; + change_insn_type (&insn[d], II_ADD); + insn[d].type = 0; + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; + insn[d].op[1] = insn[d].op[2]; insn[d].opt[1] = insn[d].opt[2]; + insn[d].op[2] = 0x80000000; insn[d].opt[2] = OPT_CONST; + insn[d].opt[3] = OPT_NONE; + + insn[--d] = insn[i]; + change_insn_type (&insn[d], II_ADD); + insn[d].type = 0; + insn[d].op[0] = -1; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; + insn[d].op[1] = insn[d].op[1]; insn[d].opt[1] = insn[d].opt[1]; + insn[d].op[2] = 0x80000000; insn[d].opt[2] = OPT_CONST; + insn[d].opt[3] = OPT_NONE; + + reloc[i] = d; + } else { + insn[--d] = insn[i]; + reloc[i] = d; + } + num_insn += num_sig * 2; + for (i = 0; i < num_insn; i++) if (insn[i].type & IT_MEMORY || !(insn[i].type & IT_SIGNED)) { + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP) + insn[i].op[j] = reloc[insn[i].op[j]]; + } else insn[i].type &= ~IT_SIGNED; +} + +/* expands calls to 7 instructions */ +void expand_calls () +{ + int i, j, num_call = 0, d; + for (i = 0; i < num_insn; i++) + if (insn[i].index == II_CALL) num_call++; + + d = num_insn + num_call * 6; /* 6 parameters */ + assert (d < MAX_INSNS); + + /* Split call instructions */ + for (i = num_insn - 1; i >= 0; i--) + /* We will expand signed memory later */ + if (insn[i].index == II_CALL) { + insn[--d] = insn[i]; + insn[d].op[0] = insn[d].op[1]; insn[d].opt[0] = OPT_CONST; + insn[d].opt[1] = OPT_NONE; + insn[d].type |= IT_VOLATILE; + + for (j = 0; j < 6; j++) { + insn[--d] = insn[i]; + change_insn_type (&insn[d], II_ADD); + insn[d].type = IT_VOLATILE; + insn[d].op[0] = 3 + j; insn[d].opt[0] = OPT_REGISTER | OPT_DEST; + insn[d].op[1] = 3 + j; insn[d].opt[1] = OPT_REGISTER; + insn[d].op[2] = 0x80000000; insn[d].opt[2] = OPT_CONST; + insn[d].opt[3] = OPT_NONE; + } + + reloc[i] = d; + } else { + insn[--d] = insn[i]; + reloc[i] = d; + } + num_insn += num_call * 6; + for (i = 0; i < num_insn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_REF || insn[i].opt[j] & OPT_JUMP) + insn[i].op[j] = reloc[insn[i].op[j]]; +} + +/* Loads function from file into global array insn. + Function returns nonzero if function cannot be converted. */ +int cuc_load (char *in_fn) +{ + int i, j; + FILE *fi; + int func_return = 0; + num_insn = 0; + + log ("Loading filename %s\n", in_fn); + if ((fi = fopen (in_fn, "rt")) == NULL) { + fprintf (stderr, "Cannot open '%s'\n", in_fn); + exit (1); + } + /* Read in the function and decode the instructions */ + for (i = 0;; i++) { + unsigned long data; + const char *name; + + if (fscanf (fi, "%08lx\n", &data) != 1) break; + + /* build params */ + name = build_insn (data, &insn[i]); + if (func_return) func_return++; + //PRINTF ("%s\n", name); + + if (or32_opcodes[insn[i].index].flags & OR32_IF_DELAY) { + int f; + if (strcmp (name, "l.bnf") == 0) f = 1; + else if (strcmp (name, "l.bf") == 0) f = 0; + else if (strcmp (name, "l.j") == 0) { + f = -1; + } else if (strcmp (name, "l.jr") == 0 && func_return == 0) { + func_return = 1; + change_insn_type (&insn[i], II_NOP); + continue; + } else { + cucdebug (1, "Instruction #%i: \"%s\" not supported.\n", i, name); + log ("Instruction #%i: \"%s\" not supported.\n", i, name); + return 1; + } + if (f < 0) { /* l.j */ + /* repair params */ + change_insn_type (&insn[i], II_BF); + insn[i].op[0] = i + insn[i].op[0]; insn[i].opt[0] = OPT_JUMP; + insn[i].op[1] = 1; insn[i].opt[1] = OPT_CONST; + insn[i].type |= IT_BRANCH | IT_VOLATILE; + } else { + change_insn_type (&insn[i], II_BF); + insn[i].op[0] = i + insn[i].op[0]; insn[i].opt[0] = OPT_JUMP; + insn[i].op[1] = FLAG_REG; insn[i].opt[1] = OPT_REGISTER; + insn[i].type |= IT_BRANCH | IT_VOLATILE; + if (f) insn[i].type |= IT_FLAG1; + } + } else { + insn[i].index = -1; + for (j = 0; j < sizeof (conv) / sizeof (cuc_conv); j++) + if (strcmp (conv[j].from, name) == 0) { + if (conv[j].to & II_SIGNED) insn[i].type |= IT_SIGNED; + if (conv[j].to & II_MEM) insn[i].type |= IT_MEMORY | IT_VOLATILE; + change_insn_type (&insn[i], conv[j].to & II_MASK); + break; + } + if (strcmp (name, "l.movhi") == 0) { + insn[i].op[1] <<= 16; + insn[i].op[2] = 0; + insn[i].opt[2] = OPT_CONST; + } + if (insn[i].index == II_SFEQ || insn[i].index == II_SFNE + || insn[i].index == II_SFLE || insn[i].index == II_SFGT + || insn[i].index == II_SFGE || insn[i].index == II_SFLT) { + /* repair params */ + insn[i].op[2] = insn[i].op[1]; insn[i].opt[2] = insn[i].opt[1] & ~OPT_DEST; + insn[i].op[1] = insn[i].op[0]; insn[i].opt[1] = insn[i].opt[0] & ~OPT_DEST; + insn[i].op[0] = FLAG_REG; insn[i].opt[0] = OPT_DEST | OPT_REGISTER; + insn[i].opt[3] = OPT_NONE; + insn[i].type |= IT_COND; + } + if (insn[i].index < 0 || insn[i].index == II_NOP && insn[i].op[0] != 0) { + cucdebug (1, "Instruction #%i: \"%s\" not supported (2).\n", i, name); + log ("Instruction #%i: \"%s\" not supported (2).\n", i, name); + return 1; + } + } + } + num_insn = i; + fclose (fi); + if (func_return != 2) { + cucdebug (1, "Unsupported function structure.\n"); + log ("Unsupported function structure.\n"); + return 1; + } + + log ("Number of instructions loaded = %i\n", num_insn); + if (cuc_debug >= 3) print_cuc_insns ("INITIAL", 1); + + log ("Converting.\n"); + expand_branch (); + if (cuc_debug >= 6) print_cuc_insns ("AFTER_EXP_BRANCH", 0); + + remove_dslots (); + if (cuc_debug >= 6) print_cuc_insns ("NO_DELAY_SLOTS", 0); + + if (config.cuc.calling_convention) { + detect_locals (); + if (cuc_debug >= 7) print_cuc_insns ("AFTER_LOCALS", 0); + } + expand_memory (); + if (cuc_debug >= 3) print_cuc_insns ("AFTER_EXP_MEM", 0); + + expand_signed (); + if (cuc_debug >= 3) print_cuc_insns ("AFTER_EXP_SIG", 0); + + expand_calls (); + if (cuc_debug >= 3) print_cuc_insns ("AFTER_EXP_CALLS", 0); + + return 0; +}
load.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: bb.c =================================================================== --- bb.c (nonexistent) +++ bb.c (revision 1765) @@ -0,0 +1,1508 @@ +/* bb.c -- OpenRISC Custom Unit Compiler, Basic Block handling + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "sim-config.h" +#include "abstract.h" +#include "cuc.h" +#include "insn.h" +#include "support/profile.h" + +/* prints out bb string */ +void print_bb_num (int num) +{ + if (num < 0) PRINTF ("*"); + else if (num == BBID_END) PRINTF ("END"); + else if (num == BBID_START) PRINTF ("START"); + else PRINTF ("%2x", num); +} + +/* Print out basic blocks */ +void print_cuc_bb (cuc_func *f, char *s) +{ + int i; + PRINTF ("------- %s -------\n", s); + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].insn) PRINTF ("\n---- BB%-2x * %x ---- ", i, f->bb[i].cnt); + else PRINTF ("BB%-2x: %4x-%-4x", i, f->bb[i].first, f->bb[i].last); + PRINTF (" type %02lx tmp %i ", f->bb[i].type, f->bb[i].tmp); + PRINTF ("next "); print_bb_num (f->bb[i].next[0]); + PRINTF (" "); print_bb_num (f->bb[i].next[1]); + PRINTF (" prev "); print_bb_num (f->bb[i].prev[0]); + PRINTF (" "); print_bb_num (f->bb[i].prev[1]); + PRINTF ("\n"); + + if (f->bb[i].insn) print_insns (i, f->bb[i].insn, f->bb[i].ninsn, 0); + } + if (f->nmsched) { + PRINTF ("\nmsched: "); + for (i = 0; i < f->nmsched; i++) + PRINTF ("%x ", f->msched[i]); + PRINTF ("\n\n\n"); + } else PRINTF ("\n"); + fflush (stdout); +} + +/* Copies src basic block into destination */ +cuc_bb *cpy_bb (cuc_bb *dest, cuc_bb *src) +{ + int i, j; + dep_list *d; + assert (dest != src); + *dest = *src; + assert (dest->insn = malloc (sizeof (cuc_insn) * src->ninsn)); + for (i = 0; i < src->ninsn; i++) { + d = src->insn[i].dep; + dest->insn[i] = src->insn[i]; + dest->insn[i].dep = NULL; + while (d) { + add_dep (&dest->insn[i].dep, d->ref); + d = d->next; + } + } + + d = src->mdep; + dest->mdep = NULL; + while (d) { + add_dep (&dest->mdep, d->ref); + d = d->next; + } + if (src->ntim) { + assert (dest->tim = malloc (sizeof (cuc_timings) * src->ntim)); + for (i = 0; i < src->ntim; i++) { + dest->tim[i] = src->tim[i]; + if (src->tim[i].nshared) { + assert (dest->tim[i].shared = malloc (sizeof (int) * src->tim[i].nshared)); + for (j = 0; j < src->tim[i].nshared; j++) + dest->tim[i].shared[j] = src->tim[i].shared[j]; + } + } + } +} + +/* Duplicates function */ +cuc_func *dup_func (cuc_func *f) +{ + cuc_func *n = (cuc_func *) malloc (sizeof (cuc_func)); + int b, i; + for (b = 0; b < f->num_bb; b++) cpy_bb (&n->bb[b], &f->bb[b]); + n->num_bb = f->num_bb; + assert (n->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb)); + for (b = 0; b < f->num_init_bb; b++) n->init_bb_reloc[b] = f->init_bb_reloc[b]; + n->num_init_bb = f->num_init_bb; + for (i = 0; i < MAX_REGS; i++) { + n->saved_regs[i] = f->saved_regs[i]; + n->lur[i] = f->lur[i]; + n->used_regs[i] = f->used_regs[i]; + } + n->start_addr = f->start_addr; + n->end_addr = f->end_addr; + n->orig_time = f->orig_time; + n->nmsched = f->nmsched; + n->num_runs = f->num_runs; + for (i = 0; i < f->nmsched; i++) { + n->msched[i] = f->msched[i]; + n->mtype[i] = f->mtype[i]; + } + n->nfdeps = f->nfdeps; + if (f->nfdeps) { + f->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * f->nfdeps); + for (i = 0; i < f->nfdeps; i++) n->fdeps[i] = f->fdeps[i]; + } + return n; +} + +/* Releases memory allocated by function */ +void free_func (cuc_func *f) +{ + int b, i; + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < f->bb[b].ninsn; i++) + dispose_list (&f->bb[b].insn[i].dep); + if (f->bb[b].insn) free (f->bb[b].insn); + for (i = 0; i < f->bb[b].ntim; i++) + if (f->bb[b].tim[i].nshared && f->bb[b].tim[i].shared) + free (f->bb[b].tim[i].shared); + if (f->bb[b].tim && f->bb[b].ntim) free (f->bb[b].tim); + } + free (f); +} + +/* Recalculates last_used_reg */ +void recalc_last_used_reg (cuc_func *f, int b) +{ + int i; + cuc_bb *bb = &f->bb[b]; + + /* rebuild last used reg array */ + if (bb->insn[0].index == II_LRBB) bb->last_used_reg[LRBB_REG] = 0; + else bb->last_used_reg[LRBB_REG] = -1; + + for (i = 1; i < MAX_REGS - 1; i++) bb->last_used_reg[i] = -1; + + /* Create references */ + for (i = 0; i < bb->ninsn; i++) { + int k; + /* Now check for destination operand(s) */ + for (k = 0; k < MAX_OPERANDS; k++) if (bb->insn[i].opt[k] & OPT_DEST) + if ((bb->insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER + && (int)bb->insn[i].op[k] >= 0) { + bb->last_used_reg[bb->insn[i].op[k]] = REF (b, i); + } + } +} + +/* Set the BB limits */ +void detect_bb (cuc_func *f) +{ + int i, j, end_bb = 0, eb = 0; + + /* Mark block starts/ends */ + for (i = 0; i < num_insn; i++) { + if (end_bb) insn[i].type |= IT_BBSTART; + end_bb = 0; + if (insn[i].type & IT_BRANCH) { + int jt = insn[i].op[0]; + insn[i].type |= IT_BBEND; + end_bb = 1; + if (jt < 0 || jt >= num_insn) { + fprintf (stderr, "Instruction #%i:Jump out of function '%s'.\n", i, insn[i].disasm); + exit (1); + } + if (jt > 0) insn[jt - 1].type |= IT_BBEND; + insn[jt].type |= IT_BBSTART; + } + } + + /* Initialize bb array */ + insn[0].type |= IT_BBSTART; + insn[num_insn - 1].type |= IT_BBEND; + f->num_bb = 0; + for (i = 0; i < num_insn; i++) { + if (insn[i].type & IT_BBSTART) { + f->bb[f->num_bb].first = i; + f->bb[f->num_bb].cnt = 0; + } + /* Determine repetitions of a loop */ + if (insn[i].type & IT_BBEND) { + f->bb[f->num_bb].type = 0; + f->bb[f->num_bb].last = i; + f->bb[f->num_bb].next[0] = f->bb[f->num_bb].next[1] = -1; + f->bb[f->num_bb].tmp = 0; + f->bb[f->num_bb].ntim = 0; + f->num_bb++; + assert (f->num_bb < MAX_BB); + } + } + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INIT"); + + /* Build forward connections between BBs */ + for (i = 0; i < f->num_bb; i++) + if (insn[f->bb[i].last].type & IT_BRANCH) { + int j; + assert (insn[f->bb[i].last].index == II_BF); + /* Find block this instruction jumps to */ + for (j = 0; j < f->num_bb; j++) + if (f->bb[j].first == insn[f->bb[i].last].op[0]) break; + assert (j < f->num_bb); + + /* Convert the jump address to BB link */ + insn[f->bb[i].last].op[0] = j; insn[f->bb[i].last].opt[0] = OPT_BB; + + /* Make a link */ + f->bb[i].next[0] = j; + if (++f->bb[j].tmp > 2) eb++; + f->bb[i].next[1] = i + 1; + if (++f->bb[i + 1].tmp > 2) eb++; + } else if (f->bb[i].last == num_insn - 1) { /* Last instruction doesn't have to do anything */ + } else { + f->bb[i].next[0] = i + 1; + if (++f->bb[i + 1].tmp > 2) eb++; + } + + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_NEXT"); + + /* Build backward connections, but first insert artificial blocks + * to handle more than 2 connections */ + cucdebug (6, "artificial %i %i\n", f->num_bb, eb); + end_bb = f->num_bb + eb; + for (i = f->num_bb - 1; i >= 0; i--) { + j = f->bb[i].tmp; + if (f->bb[i].tmp > 2) f->bb[i].tmp = -f->bb[i].tmp; + f->bb[--end_bb] = f->bb[i]; + reloc[i] = end_bb; + while (j-- > 2) { + f->bb[--end_bb].first = f->bb[i].first; + f->bb[end_bb].last = -1; + f->bb[end_bb].next[0] = -1; + f->bb[end_bb].next[1] = -1; + f->bb[end_bb].tmp = 0; + f->bb[end_bb].cnt = f->bb[i].cnt; + f->bb[end_bb].ntim = 0; + } + } + f->num_bb += eb; + + /* relocate jump instructions */ + for (i = 0; i < num_insn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (insn[i].opt[j] & OPT_BB) + insn[i].op[j] = reloc[insn[i].op[j]]; + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT-reloc"); + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].next[0] >= 0) { + int t = reloc[f->bb[i].next[0]]; + if (f->bb[t].tmp < 0) { + f->bb[t].tmp = -f->bb[t].tmp; + t -= f->bb[t].tmp - 2; + } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2; + f->bb[i].next[0] = t; + } + if (f->bb[i].next[1] >= 0) { + int t = reloc[f->bb[i].next[1]]; + if (f->bb[t].tmp < 0) { + f->bb[t].tmp = -f->bb[t].tmp; + t -= f->bb[t].tmp - 2; + } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2; + f->bb[i].next[1] = t; + } + /* artificial blocks do not have relocations, hardcode them */ + if (f->bb[i].last < 0) f->bb[i].next[0] = i + 1; + } + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT"); + + /* Uncoditional branched do not continue to next block */ + for (i = 0; i < f->num_bb; i++) { + cuc_insn *ii; + if (f->bb[i].last < 0) continue; + ii = &insn[f->bb[i].last]; + /* Unconditional branch? */ + if (ii->type & IT_BRANCH && ii->opt[1] & OPT_CONST) { + change_insn_type (ii, II_NOP); +#if 0 + if (f->bb[i].next[1] == i + 1) f->bb[i].next[0] = f->bb[i].next[1]; +#endif + f->bb[i].next[1] = -1; + } + } + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_UNCOND_JUMP"); + + /* Add backward connections */ + for (i = 0; i < f->num_bb; i++) + f->bb[i].prev[0] = f->bb[i].prev[1] = -1; + + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].next[0] >= 0) { + int t = f->bb[i].next[0]; + if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i; + else { + assert (f->bb[t].prev[1] < 0); + f->bb[t].prev[1] = i; + } + } + if (f->bb[i].next[1] >= 0) { + int t = f->bb[i].next[1]; + if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i; + else { + assert (f->bb[t].prev[1] < 0); + f->bb[t].prev[1] = i; + } + } + } + /* Add START marker */ + assert (f->bb[0].prev[0] < 0); + f->bb[0].prev[0] = BBID_START; + + /* Add END marker */ + assert (f->bb[f->num_bb - 1].next[0] < 0); + assert (f->bb[f->num_bb - 1].next[1] < 0); + f->bb[f->num_bb - 1].next[0] = BBID_END; + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_PREV"); +} + +/* We do a quick check if there are some anomalies with references */ +void cuc_check (cuc_func *f) +{ + int i, j, k; + cucdebug (1, "cuc_check\n"); + for (i = 0; i < f->num_bb; i++) { + if (!f->bb[i].insn && f->bb[i].ninsn) goto err; + for (j = 0; j < f->bb[i].ninsn; j++) { + cuc_insn *ii = &f->bb[i].insn[j]; + if ((ii->index == II_CMOV || ii->index == II_ADD) && ii->type & IT_COND && ii->opt[0] & OPT_DEST) { + k = 0; + assert (ii->opt[k] & OPT_REGISTER); + if ((signed)ii->op[k] >= 0 && ii->op[k] != FLAG_REG && ii->op[k] != LRBB_REG) { + cucdebug (1, "Invalid dest conditional type opt%x op%lx\n", ii->opt[0], ii->op[0]); + goto err; + } + } + for (k = 0; k < MAX_OPERANDS; k++) { + if (ii->opt[k] & OPT_REF) { + int t = ii->op[k]; + if (REF_BB(t) >= f->num_bb || REF_I (t) >= f->bb[REF_BB(t)].ninsn + || (ii->index == II_CMOV || ii->index == II_ADD) && ( + (f->INSN(t).type & IT_COND) != (ii->type & IT_COND) && k < 3 + || !(f->INSN(t).type & IT_COND) && k == 3)) { + cucdebug (1, "Conditional misused\n"); + goto err; + } + } + if (k && ii->opt[k] & OPT_DEST) { + cucdebug (1, "Destination only allowed for op0!\n"); + goto err; + } + } + } + } + return; +err: + cucdebug (1, "Anomaly detected at [%x_%x].%i\n", i, j, k); + print_cuc_bb (f, "ANOMALY"); + cucdebug (1, "Anomaly detected at [%x_%x].%i\n", i, j, k); + exit (1); +} + +/* Build basic blocks */ +void build_bb (cuc_func *f) +{ + int i, j, k; + for (i = 0; i < f->num_bb; i++) { + if (f->bb[i].last < 0) f->bb[i].ninsn = MAX_REGS - 1; + else f->bb[i].ninsn = f->bb[i].last - f->bb[i].first + 1 + MAX_REGS - 1; + assert (f->bb[i].ninsn >= MAX_REGS - 1); + f->bb[i].insn = (cuc_insn *) malloc (sizeof (cuc_insn) * f->bb[i].ninsn); + assert (f->bb[i].insn); + f->bb[i].nmemory = 0; + f->bb[i].unrolled = 1; + + /* Save space for conditional moves, exclude r0, place lrbb instead */ + change_insn_type (&f->bb[i].insn[0], II_LRBB); + strcpy (f->bb[i].insn[0].disasm, "lrbb"); + f->bb[i].insn[0].type = IT_UNUSED | IT_COND; + f->bb[i].insn[0].dep = NULL; + f->bb[i].insn[0].op[0] = LRBB_REG; f->bb[i].insn[0].opt[0] = OPT_REGISTER | OPT_DEST; + f->bb[i].insn[0].opt[1] = OPT_LRBB; + f->bb[i].insn[0].opt[2] = f->bb[i].insn[0].opt[3] = OPT_NONE; + for (j = 1; j < MAX_REGS - 1; j++) { + change_insn_type (&f->bb[i].insn[j], II_CMOV); + strcpy (f->bb[i].insn[j].disasm, "cmov"); + f->bb[i].insn[j].type = j == FLAG_REG || j == LRBB_REG ? IT_COND : 0; + f->bb[i].insn[j].dep = NULL; + f->bb[i].insn[j].opt[0] = f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_REGISTER; + f->bb[i].insn[j].opt[0] |= OPT_DEST; + f->bb[i].insn[j].op[0] = f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = j; + f->bb[i].insn[j].op[3] = LRBB_REG; f->bb[i].insn[j].opt[3] = OPT_REGISTER; + } + + /* Relocate instructions */ + for (j = MAX_REGS - 1; j < f->bb[i].ninsn; j++) { + f->bb[i].insn[j] = insn[f->bb[i].first + j - (MAX_REGS - 1)]; + for (k = 0; k < MAX_OPERANDS; k++) + if (f->bb[i].insn[j].opt[k] & OPT_REF) { + int b1; + for (b1 = 0; b1 < i; b1++) + if (f->bb[b1].first <= (signed) f->bb[i].insn[j].op[k] + && (signed)f->bb[i].insn[j].op[k] <= f->bb[b1].last) break; + assert (b1 < f->num_bb); + f->bb[i].insn[j].op[k] = REF (b1, f->bb[i].insn[j].op[k] - f->bb[b1].first + MAX_REGS - 1); + } + if (f->bb[i].insn[j].type & IT_MEMORY) f->bb[i].nmemory++; + } + } + cuc_check (f); +} + +/* Does simplification on blocks A, B, C: + A->B->C, A->C to just A->B->C */ +static void simplify_bb (cuc_func *f, int pred, int s1, int s2, int neg) +{ + cuc_insn *last; + int i; + if (cuc_debug >= 3) print_cuc_bb (f, "BEFORE_SIMPLIFY"); + cucdebug (3, "simplify %x->%x->%x (%i)\n", pred, s1, s2, neg); + assert (s2 != pred); /* Shouldn't occur => stupid */ + f->bb[pred].next[1] = -1; + f->bb[pred].next[0] = s1; + + if (f->bb[s2].prev[0] == pred) { + f->bb[s2].prev[0] = f->bb[s2].prev[1]; + f->bb[s2].prev[1] = -1; + } else if (f->bb[s2].prev[1] == pred) { + f->bb[s2].prev[1] = -1; + } else assert (0); + + last = &f->bb[pred].insn[f->bb[pred].ninsn - 1]; + assert (last->type & IT_BRANCH); + for (i = 0; i < f->bb[s2].ninsn; i++) { + cuc_insn *ii= &f->bb[s2].insn[i]; + if (ii->index == II_LRBB) { + change_insn_type (ii, II_CMOV); + ii->type = IT_COND; + ii->op[1] = neg ? 0 : 1; ii->opt[1] = OPT_CONST; + ii->op[2] = neg ? 1 : 0; ii->opt[2] = OPT_CONST; + ii->op[3] = last->op[1]; ii->opt[3] = last->opt[1]; + } + } + change_insn_type (last, II_NOP); + if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_SIMPLIFY"); +} + +/* type == 0; keep predecessor condition + * type == 1; keep successor condition + * type == 2; join loop unrolled blocks */ +static void join_bb (cuc_func *f, int pred, int succ, int type) +{ + int i, j, k, n1, n2, ninsn, add_cond = 0; + unsigned long cond_op, cond_opt; + cuc_insn *insn; + + if (cuc_debug) cuc_check (f); + cucdebug (3, "%x <= %x+%x (%i)\n", pred, pred, succ, type); + cucdebug (3, "%x %x\n", f->bb[pred].ninsn, f->bb[succ].ninsn); + if (cuc_debug >= 3) fflush (stdout); + + n1 = f->bb[pred].ninsn; + n2 = f->bb[succ].ninsn; + if (n1 <= 0 + || !(f->bb[pred].insn[n1 - 1].type & IT_BRANCH)) type = 1; + if (type == 0 && f->bb[succ].prev[0] == f->bb[succ].next[0]) add_cond = 1; + if (type == 2) add_cond = 1; + + //assert (f->bb[pred].next[0] == f->bb[succ].next[0] || type != 2); /* not supported */ + + ninsn = n1 + n2 + (type == 1 ? 0 : 1) + (add_cond ? MAX_REGS : 0); + + insn = (cuc_insn *) malloc (ninsn * sizeof (cuc_insn)); + for (i = 0; i < n1; i++) insn[i] = f->bb[pred].insn[i]; + /* when type == 0, we move the last (jump) instruction to the end */ + if (type == 0 || type == 2) { + /* Move first branch instruction to the end */ + assert (insn[n1 - 1].type & IT_BRANCH); + insn[ninsn - 1] = insn[n1 - 1]; + cond_op = insn[n1 - 1].op[1]; + cond_opt = insn[n1 - 1].opt[1]; + + /* Remove old branch */ + change_insn_type (&insn[n1 - 1], II_NOP); + } + /* Copy second block */ + for (i = 0; i < n2; i++) insn[i + n1] = f->bb[succ].insn[i]; + + /* and when type == 2, we may need to add sfor instruction, to quit when either is true */ + if (type == 2) { + /* Move second branch instruction to the end */ + if (insn[n1 + n2 - 1].type & IT_BRANCH) { + insn[ninsn - 1] = insn[n1 + n2 - 1]; + + /* Use conditional from cmov FLAG_REG, c_p, c_s, c_p */ + insn[ninsn - 1].op[1] = REF (pred, n1 + n2 + FLAG_REG); insn[ninsn - 1].opt[1] = OPT_REF; + + /* Remove old one */ + change_insn_type (&insn[n1 + n2 - 1], II_NOP); + } else change_insn_type (&insn[ninsn - 1], II_NOP); /* do not use branch slot */ + } + +#if 1 + /* LRBB at start of succ BB is not valid anymore */ + if (n1 > 0 && insn[n1].index == II_LRBB) { + if (type == 1) { + /* We have two possibilities, how this could have happened: + 1. we just moved second predecessor of succ to pred, + pred now having two predecessors => everything is ok + 2. we just moved second predecessor of succ to pred, + now, having just one predecessor => LRBB is not needed anymore */ + if (f->bb[pred].prev[1] < 0) { /* handle second option */ + change_insn_type (&insn[n1], II_ADD); + insn[n1].op[1] = 1; insn[n1].opt[1] = OPT_CONST; + insn[n1].op[2] = 0; insn[n1].opt[2] = OPT_CONST; + insn[n1].opt[3] = OPT_NONE; + } + } else { + assert (0); /* not tested yet */ + change_insn_type (&insn[n1], II_NOP); + for (i = n1; i < ninsn; i++) + if (insn[i].index == II_CMOV && insn[i].op[3] == REF (pred, n1)) { + assert (insn[i].opt[3] == OPT_REF); + insn[i].op[3] = cond_op; + insn[i].opt[3] = cond_opt; + if (f->bb[pred].next[0] != succ) { + unsigned long t; /* negate conditional -- exchange */ + assert (f->bb[pred].next[1] == succ); + t = insn[i].op[1]; + insn[i].op[1] = insn[i].op[2]; + insn[i].op[2] = t; + t = insn[i].opt[1]; + insn[i].opt[1] = insn[i].opt[2]; + insn[i].opt[2] = t; + } + } + } + } +#endif + + for (i = 0; i < ninsn; i++) reloc[i] = -1; + + /* Add conditional instructions if required */ + if (add_cond) { + recalc_last_used_reg (f, pred); + recalc_last_used_reg (f, succ); + + /* r0 -- add nop for it */ + change_insn_type (&insn[n1 + n2], II_NOP); + for (i = 1; i < MAX_REGS; i++) { + cuc_insn *ii = &insn[n1 + n2 + i]; + int a = f->bb[pred].last_used_reg[i]; + int b = f->bb[succ].last_used_reg[i]; + + /* We have deleted first branch instruction, now we must setup FLAG_REG, + to point to conditional */ + if (i == FLAG_REG) { + change_insn_type (ii, II_CMOV); + ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0; + ii->dep = NULL; + ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = cond_op; ii->opt[1] = cond_opt; + if (b >= 0) { + ii->op[2] = b; ii->opt[2] = OPT_REF; + } else { + ii->op[2] = cond_op; ii->opt[2] = cond_opt; + } + ii->op[3] = cond_op; ii->opt[3] = cond_opt; + reloc[REF_I(a)] = REF (pred, n1 + n2 + i); + } else if (b < 0) change_insn_type (ii, II_NOP); + else if (a < 0) { + change_insn_type (ii, II_ADD); + ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0; + ii->dep = NULL; + ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = b; ii->opt[1] = OPT_REF; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + } else if (b >= 0) { + change_insn_type (ii, II_CMOV); + ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0; + ii->dep = NULL; + ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = a; ii->opt[1] = OPT_REF; + ii->op[2] = b; ii->opt[2] = OPT_REF; + ii->op[3] = cond_op; ii->opt[3] = cond_opt; + reloc[REF_I(a)] = REF (pred, n1 + n2 + i); + } + sprintf (ii->disasm, "cmov (join BB)"); + } + } + + if (cuc_debug) cuc_check (f); + i = 0; + switch (type) { + case 0: + assert (f->bb[pred].next[0] >= 0); + if (f->bb[pred].next[0] == succ) f->bb[pred].next[0] = f->bb[succ].next[0]; + if (f->bb[pred].next[1] == succ) f->bb[pred].next[1] = f->bb[succ].next[0]; + break; + case 1: + assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END); + f->bb[pred].next[0] = f->bb[succ].next[0]; + f->bb[pred].next[1] = f->bb[succ].next[1]; + break; + case 2: + assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END); + f->bb[pred].next[0] = f->bb[succ].next[0]; + f->bb[pred].next[1] = f->bb[succ].next[1]; + break; + } + if (f->bb[pred].next[0] < 0) f->bb[pred].next[0] = f->bb[pred].next[1]; + if (f->bb[pred].next[0] == f->bb[pred].next[1]) f->bb[pred].next[1] = -1; + + if (type == 0) assert (f->bb[succ].next[1] < 0); + + /* We just did something stupid -- we joined two predecessors into one; + succ may need the information from which block we came. We will repair + this by converting LRBB to CMOV */ + for (j = 0; j < 2; j++) { + int nb = f->bb[pred].next[j]; + int t; + + /* check just valid connections */ + if (nb < 0 || nb == BBID_END) continue; + + /* check type */ + if (f->bb[nb].prev[0] == pred && f->bb[nb].prev[1] == succ) t = 1; + else if (f->bb[nb].prev[1] == pred && f->bb[nb].prev[0] == succ) t = 0; + else continue; + + /* check all LRBB instructions. */ + for (i = 0; i < f->bb[nb].ninsn; i++) + if (f->bb[nb].insn[i].index == II_LRBB) { + cuc_insn *lrbb =&f->bb[nb].insn[i]; + change_insn_type (lrbb, II_CMOV); + lrbb->op[1] = t; lrbb->opt[1] = OPT_CONST; + lrbb->op[2] = 1 - t; lrbb->opt[2] = OPT_CONST; + lrbb->op[3] = cond_op; lrbb->opt[3] = cond_opt; + lrbb->type |= IT_COND; + } + } + + f->bb[succ].type = BB_DEAD; + //PRINTF (" %x %x %x %x %x\n", f->bb[pred].next[0], f->bb[pred].next[1], f->bb[succ].next[0], f->bb[succ].next[1], insn[ninsn - 1].type); + /* remove branch instruction, if there is only one successor */ + if (f->bb[pred].next[1] < 0 && ninsn > 0 && insn[ninsn - 1].type & IT_BRANCH) { + assert (f->bb[pred].next[0] != pred); /* end BB, loop should not be possible */ + change_insn_type (&insn[ninsn - 1], II_NOP); + } + + /* Set max count */ + if (f->bb[pred].cnt < f->bb[succ].cnt) f->bb[pred].cnt = f->bb[succ].cnt; + f->bb[pred].ninsn = ninsn; + f->bb[succ].ninsn = 0; + free (f->bb[pred].insn); f->bb[pred].insn = NULL; + free (f->bb[succ].insn); f->bb[succ].insn = NULL; + f->bb[pred].insn = insn; + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) { + if (f->bb[i].prev[0] == succ) f->bb[i].prev[0] = pred; + if (f->bb[i].prev[1] == succ) f->bb[i].prev[1] = pred; + if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1; + for (j = 0; j < f->bb[i].ninsn; j++) + for (k = 0; k < MAX_OPERANDS; k++) + if (f->bb[i].insn[j].opt[k] & OPT_REF) { + /* Check if we are referencing successor BB -> relocate to second part of + the new block */ + if (REF_BB (f->bb[i].insn[j].op[k]) == succ) { + int t = f->bb[i].insn[j].op[k]; + int ndest = REF (pred, REF_I (t) + n1); + //PRINTF ("%x: %x %x\n", REF(i, j), t, ndest); + + /* We've found a reference to succ. block, being removed, relocate */ + f->bb[i].insn[j].op[k] = ndest; + } else if (REF_BB(f->bb[i].insn[j].op[k]) == pred) { + if (i != pred && reloc[REF_I(f->bb[i].insn[j].op[k])] >= 0) { + f->bb[i].insn[j].op[k] = reloc[REF_I(f->bb[i].insn[j].op[k])]; + } + } + } + } + + if (cuc_debug) cuc_check (f); + if (cuc_debug >= 3) print_cuc_bb (f, "join"); +} + +/* Optimize basic blocks */ +int optimize_bb (cuc_func *f) +{ + int modified = 0; + int i, j; +remove_lrbb: + /* we can remove lrbb instructions from blocks with just one predecessor */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) { + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* exactly one predecessor */ + for (j = 0; j < f->bb[i].ninsn; j++) + if (f->bb[i].insn[j].index == II_LRBB) { + cuc_insn *t; + cucdebug (4, "-lrbb %x.%x\n", i, j); + + /* Change to add LRBB, 0, 0 */ + change_insn_type (&f->bb[i].insn[j], II_ADD); + f->bb[i].insn[j].type &= ~IT_VOLATILE; + f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_CONST; + f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = 0; /* always use left block */ + f->bb[i].insn[j].opt[3] = OPT_NONE; + modified = 1; + if (f->bb[i].prev[0] != BBID_START && f->bb[f->bb[i].prev[0]].ninsn > 0) { + t = &f->bb[f->bb[i].prev[0]].insn[f->bb[f->bb[i].prev[0]].ninsn - 1]; + + /* If the predecessor still has a conditional jump instruction, we must be careful. + If next[0] == next[1] join them. Now we will link lrbb and correct the situation */ + if (t->type & IT_BRANCH) { /* We must set a reference to branch result */ + f->bb[i].insn[j].opt[1] = t->opt[1]; + f->bb[i].insn[j].op[1] = t->op[1]; + /* sometimes branch is not needed anymore */ + if (f->bb[f->bb[i].prev[0]].next[1] < 0) change_insn_type (t, II_NOP); + } + } + } + } + } + + /* Ordering of joining types is cruical -- we should concat all directly connected BBs + together first, so when we do a type != 1 joining, we can remove LRBB, directly by + looking at number of its predeccessors */ + + /* Type 1 joining + 1. link between pred & succ + 2. no other pred's successors + 3. no other succ's predecessors, except if pred has max one */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) { + int p = f->bb[i].prev[0]; + if (p < 0 || p == BBID_START) continue; + /* one successor and max sum of 3 predecessors */ + if (f->bb[p].next[0] >= 0 && f->bb[p].next[1] < 0 + && (f->bb[p].prev[1] < 0 || f->bb[i].prev[1] < 0)) { + /* First we will move all predecessors from succ to pred, and then we will do + real type 1 joining */ + if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START) { + int p1 = f->bb[i].prev[1]; + /* joining is surely not worth another extra memory access */ + if (f->bb[p].nmemory) continue; + if (f->bb[p].prev[0] >= 0) { + assert (f->bb[p].prev[1] < 0); + f->bb[p].prev[1] = p1; + } else f->bb[p].prev[0] = p1; + if (f->bb[p1].next[0] == i) f->bb[p1].next[0] = p; + else if (f->bb[p1].next[1] == i) f->bb[p1].next[1] = p; + else assert (0); + f->bb[i].prev[1] = -1; + } + assert (p >= 0 && f->bb[i].prev[1] < 0); /* one predecessor */ + join_bb (f, p, i, 1); + modified = 1; + goto remove_lrbb; + } + } + + /* Type 0 joining + 1. link between pred & succ + 2. no memory accesses in succ + 3. optional pred's second successors + 4. max. one succ's successors */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START + && f->bb[i].prev[1] < 0 /* one predecessor */ + && f->bb[i].next[1] < 0 /* max. one successor */ + && f->bb[i].nmemory == 0) { /* and no memory acceses */ + join_bb (f, f->bb[i].prev[0], i, 0); + modified = 1; + goto remove_lrbb; + } + + /* Type 2 joining + 1. link between pred & succ + 2. succ has exactly one predeccessor + 3. pred & succ share common successor + 4. optional succ's second successor */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* one predecessor */ + int p = f->bb[i].prev[0]; + if (p == BBID_START) continue; +#if 0 /* not yet supported */ + if (f->bb[p].next[0] == i + && (f->bb[i].next[1] == f->bb[p].next[1] + || f->bb[i].next[1] == f->bb[p].next[0])) { + join_bb (f, p, i, 2); + goto remove_lrbb; + } +#endif + if (f->bb[p].next[1] == i + && (f->bb[p].next[0] == f->bb[i].next[1] + || f->bb[p].next[0] == f->bb[i].next[0])) { + join_bb (f, p, i, 2); + modified = 1; + goto remove_lrbb; + } + } + + /* BB simplify: + 1. a block has exactly 2 successors A and B + 2. A has exactly one successor -- B + 3. A has no memory accesses + to: + flow always goes though A, LRBB is replaced by current block conditional + */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) + if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END + && f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END) { + int a = f->bb[i].next[0]; + int b = f->bb[i].next[1]; + int neg = 0; + /* Exchange? */ + if (f->bb[b].next[0] == a && f->bb[b].next[1] < 0) { + int t = a; + a = b; + b = t; + neg = 1; + } + /* Do the simplification if possible */ + if (f->bb[a].next[0] == b && f->bb[a].next[1] < 0 + && f->bb[a].nmemory == 0) { + simplify_bb (f, i, a, b, neg); + modified = 1; + goto remove_lrbb; + } + } + + return modified; +} + +/* Removes BBs marked as dead */ +int remove_dead_bb (cuc_func *f) +{ + int i, j, k, d = 0; + + for (i = 0; i < f->num_bb; i++) if (f->bb[i].type & BB_DEAD) { + if (f->bb[i].insn) free (f->bb[i].insn); + f->bb[i].insn = NULL; + reloc[i] = -1; + } else { + reloc[i] = d; + f->bb[d++] = f->bb[i]; + } + if (f->num_bb == d) return 0; + f->num_bb = d; + + /* relocate initial blocks */ + for (i = 0; i < f->num_init_bb; i++) + f->init_bb_reloc[i] = reloc[f->init_bb_reloc[i]]; + + /* repair references */ + for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) { + cucdebug (5, "%x %x %x %x %x\n", i, f->bb[i].prev[0], f->bb[i].prev[1], f->bb[i].next[0], f->bb[i].next[1]); + fflush (stdout); + if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START) + assert ((f->bb[i].prev[0] = reloc[f->bb[i].prev[0]]) >= 0); + if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START) + assert ((f->bb[i].prev[1] = reloc[f->bb[i].prev[1]]) >= 0); + if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END) + assert ((f->bb[i].next[0] = reloc[f->bb[i].next[0]]) >= 0); + if (f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END) + assert ((f->bb[i].next[1] = reloc[f->bb[i].next[1]]) >= 0); + if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1; + if (f->bb[i].next[0] == f->bb[i].next[1]) f->bb[i].next[1] = -1; + + for (j = 0; j < f->bb[i].ninsn; j++) + for (k = 0; k < MAX_OPERANDS; k++) + if ((f->bb[i].insn[j].opt[k] & OPT_BB) && + ((signed)f->bb[i].insn[j].op[k] >= 0)) { + if (f->bb[i].insn[j].op[k] != BBID_END) + assert ((f->bb[i].insn[j].op[k] = reloc[f->bb[i].insn[j].op[k]]) >= 0); + } else if (f->bb[i].insn[j].opt[k] & OPT_REF) { + int t = f->bb[i].insn[j].op[k]; + assert (reloc[REF_BB(t)] >= 0); + f->bb[i].insn[j].op[k] = REF (reloc[REF_BB(t)], REF_I (t)); + } + } + return 1; +} + +/* Recursive calculation of dependencies */ +static int reg_dep_rec (cuc_func *f, int cur) +{ + int i, j; + cuc_insn *insn = f->bb[cur].insn; + + //PRINTF ("\n %i", cur); + /* Spread only, do not loop */ + if (f->bb[cur].tmp) return; + f->bb[cur].tmp = 1; + //PRINTF ("! "); + + for (i = 0; i < f->bb[cur].ninsn; i++) { + /* Check for destination operand(s) */ + for (j = 0; j < MAX_OPERANDS; j++) if (insn[i].opt[j] & OPT_DEST) + if ((insn[i].opt[j] & ~OPT_DEST) == OPT_REGISTER && (signed)insn[i].op[j] >= 0) { + //PRINTF ("%i:%i,%x ", insn[i].op[j], i, REF (cur, i)); + assert (insn[i].op[j] > 0 && insn[i].op[j] < MAX_REGS); /* r0 should never be dest */ + f->bb[cur].last_used_reg[insn[i].op[j]] = REF (cur, i); + } + } + + if (f->bb[cur].next[0] >= 0 && f->bb[cur].next[0] != BBID_END) + reg_dep_rec (f, f->bb[cur].next[0]); + if (f->bb[cur].next[1] >= 0 && f->bb[cur].next[1] != BBID_END) + reg_dep_rec (f, f->bb[cur].next[1]); +} + +/* Detect register dependencies */ +void reg_dep (cuc_func *f) +{ + int i, b, c; + + /* Set dead blocks */ + for (b = 0; b < f->num_bb; b++) { + f->bb[b].tmp = 0; + for (i = 0; i < MAX_REGS; i++) f->bb[b].last_used_reg[i] = -1; + } + + /* Start with first block and set dependecies of all reachable blocks */ + /* At the same time set last_used_regs */ + reg_dep_rec (f, 0); + + for (i = 0; i < f->num_bb; i++) + if (f->bb[i].tmp) f->bb[i].tmp = 0; + else f->bb[i].type |= BB_DEAD; + + /* Detect loops; mark BBs where loops must be broken */ + for (c = 0; c < f->num_bb; c++) { + int min = 3, minb; + + /* search though all non-visited for minimum number of unvisited predecessors */ + for (b = 0; b < f->num_bb; b++) if (!f->bb[b].tmp) { + int tmp = 0; + if (f->bb[b].prev[0] >= 0 && f->bb[b].prev[0] != BBID_START + && !f->bb[f->bb[b].prev[0]].tmp) tmp++; + if (f->bb[b].prev[1] >= 0 && f->bb[b].prev[1] != BBID_START + && !f->bb[f->bb[b].prev[1]].tmp) tmp++; + if (tmp < min) { + minb = b; + min = tmp; + if (tmp == 0) break; /* We already have the best one */ + } + } + b = minb; + f->bb[b].tmp = 1; /* Mark visited */ + cucdebug (3, "minb %i min %i\n", minb, min); + if (min) { /* We just broke the loop */ + f->bb[b].type |= BB_INLOOP; + } + } + + /* Set real predecessors in cmov instructions to previous blocks */ + for (b = 0; b < f->num_bb; b++) + for (i = 1; i < MAX_REGS - 1; i++) { + int pa, pb; + assert (f->bb[b].insn[i].index == II_CMOV); + assert (f->bb[b].insn[i].opt[0] == OPT_REGISTER | OPT_DEST); + assert (f->bb[b].insn[i].op[0] == i); + if (f->bb[b].prev[0] < 0 || f->bb[b].prev[0] == BBID_START) pa = -1; + else pa = f->bb[f->bb[b].prev[0]].last_used_reg[i]; + if (f->bb[b].prev[1] < 0 || f->bb[b].prev[1] == BBID_START) pb = -1; + else pb = f->bb[f->bb[b].prev[1]].last_used_reg[i]; + + /* We do some very simple optimizations right away to make things more readable */ + if (pa < 0 && pb < 0) { + /* Was not used at all */ + change_insn_type (&f->bb[b].insn[i], II_ADD); + f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST; + f->bb[b].insn[i].opt[3] = OPT_NONE; + } else if (pa < 0) { + change_insn_type (&f->bb[b].insn[i], II_ADD); + assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST)); + f->bb[b].insn[i].op[1] = pb; f->bb[b].insn[i].opt[1] = OPT_REF; + f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST; + f->bb[b].insn[i].opt[3] = OPT_NONE; + } else if (pb < 0) { + change_insn_type (&f->bb[b].insn[i], II_ADD); + assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST)); + f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF; + f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST; + f->bb[b].insn[i].opt[3] = OPT_NONE; + } else { + int t = REF (b, 0); /* lrbb should be first instruction */ + assert (f->INSN(t).index == II_LRBB); + + f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF; + assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST)); + + f->bb[b].insn[i].op[2] = pb; f->bb[b].insn[i].opt[2] = OPT_REF; + assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST)); + + /* Update op[3] -- flag register */ + assert (f->bb[b].insn[i].opt[3] == OPT_REGISTER); + assert (f->bb[b].insn[i].op[3] == LRBB_REG); + assert (t >= 0); + f->bb[b].insn[i].opt[3] = OPT_REF; /* Convert already used regs to references */ + f->bb[b].insn[i].op[3] = t; + assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST)); + } + } + + /* assign register references */ + for (b = 0; b < f->num_bb; b++) { + /* rebuild last used reg array */ + f->bb[b].last_used_reg[0] = -1; + if (f->bb[b].insn[0].index == II_LRBB) f->bb[b].last_used_reg[LRBB_REG] = 0; + else f->bb[b].last_used_reg[LRBB_REG] = -1; + + for (i = 1; i < MAX_REGS - 1; i++) + f->bb[b].last_used_reg[i] = -1; + + /* Create references */ + for (i = 0; i < f->bb[b].ninsn; i++) { + int k; + /* Check for source operands first */ + for (k = 0; k < MAX_OPERANDS; k++) { + if (!(f->bb[b].insn[i].opt[k] & OPT_DEST)) { + if (f->bb[b].insn[i].opt[k] & OPT_REGISTER) { + int t = f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]]; + + if (f->bb[b].insn[i].op[k] == 0) { /* Convert r0 to const0 */ + f->bb[b].insn[i].opt[k] = OPT_CONST; + f->bb[b].insn[i].op[k] = 0; + } else if (t >= 0) { + f->bb[b].insn[i].opt[k] = OPT_REF; /* Convert already used regs to references */ + f->bb[b].insn[i].op[k] = t; + assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST)); + //f->INSN(t).op[0] = -1; + } + } else if (f->bb[b].insn[i].opt[k] & OPT_REF) { + //f->INSN(f->bb[b].insn[i].op[k]).op[0] = -1; /* Mark referenced */ + f->INSN(f->bb[b].insn[i].op[k]).type &= ~IT_UNUSED; + } + } + } + + /* Now check for destination operand(s) */ + for (k = 0; k < MAX_OPERANDS; k++) if (f->bb[b].insn[i].opt[k] & OPT_DEST) + if ((f->bb[b].insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER + && (int)f->bb[b].insn[i].op[k] >= 0) { + assert (f->bb[b].insn[i].op[k] != 0); /* r0 should never be dest */ + f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]] = REF (b, i); + } + } + } + + /* Remove all unused lrbb */ + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_UNUSED) change_insn_type (&f->bb[b].insn[i], II_NOP); + + /* SSAs with final register value are marked as outputs */ + assert (f->bb[f->num_bb - 1].next[0] == BBID_END); + for (i = 0; i < MAX_REGS; i++) if (!caller_saved[i]) { + int t = f->bb[f->num_bb - 1].last_used_reg[i]; + /* Mark them volatile, so optimizer does not remove them */ + if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT; + } + { + int t = f->bb[f->num_bb - 1].last_used_reg[i]; + /* Mark them volatile, so optimizer does not remove them */ + if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT; + } +} + +/* split the BB, based on the group numbers in .tmp */ +void expand_bb (cuc_func *f, int b) +{ + int n = f->num_bb; + int mg = 0; + int b1, i, j; + + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp; + + /* Create copies */ + for (b1 = 1; b1 <= mg; b1++) { + assert (f->num_bb < MAX_BB); + cpy_bb (&f->bb[f->num_bb], &f->bb[b]); + f->num_bb++; + } + + /* Relocate */ + for (b1 = 0; b1 < f->num_bb; b1++) + for (i = 0; i < f->bb[b1].ninsn; i++) { + dep_list *d = f->bb[b1].insn[i].dep; + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b1].insn[i].opt[j] & OPT_REF) { + int t = f->bb[b1].insn[i].op[j]; + if (REF_BB(t) == b && f->INSN(t).tmp != 0) + f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t)); + } + while (d) { + if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0) + d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref)); + d = d->next; + } + } + + /* Delete unused instructions */ + for (j = 0; j <= mg; j++) { + if (j == 0) b1 = b; + else b1 = n + j - 1; + for (i = 0; i < f->bb[b1].ninsn; i++) { + if (f->bb[b1].insn[i].tmp != j) + change_insn_type (&f->bb[b1].insn[i], II_NOP); + f->bb[b1].insn[i].tmp = 0; + } + if (j < mg) { + f->bb[b1].next[0] = n + j; + f->bb[b1].next[1] = -1; + f->bb[n + j].prev[0] = b1; + f->bb[n + j].prev[1] = -1; + } else { + i = f->bb[b1].next[0]; + f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1; + f->bb[n + j].prev[1] = -1; + if (i >= 0 && i != BBID_END) { + if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; + if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; + } + i = f->bb[b1].next[1]; + if (i >= 0 && i != BBID_END) { + if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1; + if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1; + } + } + } +} + +/* Scans sequence of BBs and set bb[].cnt */ +void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename) +{ + FILE *fi, *fo; + struct mprofentry_struct *buf; + const int bufsize = 256; + unsigned long *bb_start; + unsigned long *bb_end; + int b, i, r; + int curbb, prevbb = -1; + unsigned long addr = -1; + unsigned long prevaddr = -1; + int mssum = 0; + int mlsum = 0; + int mscnt = 0; + int mlcnt = 0; + int reopened = 0; + + /* Use already opened stream? */ + if (runtime.sim.fmprof) { + fi = runtime.sim.fmprof; + reopened = 1; + rewind (fi); + } else assert (fi = fopen (mp_filename, "rb")); + assert (fo = fopen (bb_filename, "wb+")); + + assert (bb_start = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb)); + assert (bb_end = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb)); + for (b = 0; b < f->num_bb; b++) { + bb_start[b] = f->start_addr + f->bb[b].first * 4; + bb_end[b] = f->start_addr + f->bb[b].last * 4; + //PRINTF ("%i %x %x\n", b, bb_start[b], bb_end[b]); + f->bb[0].cnt = 0; + } + + buf = (struct mprofentry_struct *) malloc (sizeof (struct mprofentry_struct) * bufsize); + assert (buf); + + //PRINTF ("BBSEQ:\n"); + do { + r = fread (buf, sizeof (struct mprofentry_struct), bufsize, fi); + //PRINTF ("r%i : ", r); + for (i = 0; i < r; i++) { + if (buf[i].type & MPROF_FETCH) { + //PRINTF ("%x, ", buf[i].addr); + if (buf[i].addr >= f->start_addr && buf[i].addr <= f->end_addr) { + assert (buf[i].type & MPROF_32); + prevaddr = addr; + addr = buf[i].addr; + for (b = 0; b < f->num_bb; b++) + if (bb_start[b] <= addr && addr <= bb_end[b]) break; + assert (b < f->num_bb); + curbb = b; + if (prevaddr + 4 != addr) prevbb = -1; + } else curbb = -1; + +#warning TODO: do not count interrupts + if (curbb != prevbb && curbb >= 0) { + fwrite (&curbb, sizeof (unsigned long), 1, fo); + //PRINTF (" [%i] ", curbb); + f->bb[curbb].cnt++; + prevbb = curbb; + } + } else { + if (verify_memoryarea(buf[i].addr)) { + if (buf[i].type & MPROF_WRITE) mscnt++, mssum += cur_area->delayw; + else mlcnt++, mlsum += cur_area->delayr; + } + } + } + //PRINTF ("\n"); + } while (r == bufsize); + //PRINTF ("\n"); + + runtime.cuc.mdelay[0] = (1. * mlsum) / mlcnt; + runtime.cuc.mdelay[1] = (1. * mssum) / mscnt; + runtime.cuc.mdelay[2] = runtime.cuc.mdelay[3] = 1; + f->num_runs = f->bb[0].cnt; + if (!reopened) fclose (fi); + fclose (fo); + free (buf); + free (bb_end); + free (bb_start); + + /* Initialize basic block relocations */ + f->num_init_bb = f->num_bb; + //PRINTF ("num_init_bb = %i\n", f->num_init_bb); + assert (f->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb)); + for (b = 0; b < f->num_init_bb; b++) f->init_bb_reloc[b] = b; +} + +/* Scans sequence of BBs and set counts for pre/unrolled loop for BB b */ +void count_bb_seq (cuc_func *f, int b, char *bb_filename, int *counts, int preroll, int unroll) +{ + FILE *fi; + const int bufsize = 256; + int i, r; + int *buf; + int cnt = 0; + int times = preroll - 1 + unroll; + + assert (fi = fopen (bb_filename, "rb")); + for (i = 0; i < times; i++) counts[i] = 0; + assert (buf = (int *) malloc (sizeof (int) * bufsize)); + + do { + r = fread (buf, sizeof (int), bufsize, fi); + for (i = 0; i < r; i++) { + /* count consecutive acesses */ + if (f->init_bb_reloc[buf[i]] == b) { + counts[cnt]++; + if (++cnt >= times) cnt = preroll - 1; + } else cnt = 0; + } + } while (r == bufsize); + + log ("Counts %i,%i :", preroll, unroll); + for (i = 0; i < times; i++) log ("%x ", counts[i]); + log ("\n"); + + fclose (fi); + free (buf); +} + +/* relocate all accesses inside of BB b to back/fwd */ +static void relocate_bb (cuc_bb *bb, int b, int back, int fwd) +{ + int i, j; + for (i = 0; i < bb->ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (bb->insn[i].opt[j] & OPT_REF + && REF_BB (bb->insn[i].op[j]) == b) { + int t = REF_I (bb->insn[i].op[j]); + if (t < i) bb->insn[i].op[j] = REF (back, t); + else bb->insn[i].op[j] = REF (fwd, t); + } +} + +/* Preroll if type == 1 or unroll if type == 0 loop in BB b `ntimes' times and return + new function. Original function is unmodified. */ +static cuc_func *roll_loop (cuc_func *f, int b, int ntimes, int type) +{ + int b1, t, i, prevb, prevart_b; + cuc_func *n = dup_func (f); + cuc_bb *ob = &f->bb[b]; + cuc_insn *ii; + + assert (ntimes > 1); + cucdebug (3, "roll type = %i, BB%i x %i (num_bb %i)\n", type, b, ntimes, n->num_bb); + ntimes--; + assert (n->num_bb + ntimes * 2 < MAX_BB); + + prevb = b; + prevart_b = b; + + /* point to first artificial block */ + if (n->bb[b].next[0] != b) { + n->bb[b].next[0] = n->num_bb + 1; + } else if (n->bb[b].next[1] != b) { + n->bb[b].next[1] = n->num_bb + 1; + } + + /* Duplicate the BB */ + for (t = 0; t < ntimes; t++) { + cuc_bb *pb = &n->bb[prevart_b]; + /* Add new block and set links */ + b1 = n->num_bb++; + cpy_bb (&n->bb[b1], ob); + /* Only one should be in loop, so we remove any INLOOP flags from duplicates */ + n->bb[b1].type &= ~BB_INLOOP; + print_cuc_bb (n, "prerollA"); + + printf ("prevb %i b1 %i prevart %i\n", prevb, b1, prevart_b); + /* Set predecessor's successor */ + if (n->bb[prevb].next[0] == b) { + n->bb[prevb].next[0] = b1; + if (pb->next[0] < 0) pb->next[0] = b1 + 1; + else pb->next[1] = b1 + 1; + n->bb[b1].next[1] = b1 + 1; + } else if (n->bb[prevb].next[1] == b) { + if (pb->next[0] < 0) pb->next[0] = b1 + 1; + else pb->next[1] = b1 + 1; + n->bb[b1].next[0] = b1 + 1; + n->bb[prevb].next[1] = b1; + } else assert (0); + + /* Set predecessor */ + n->bb[b1].prev[0] = prevb; + n->bb[b1].prev[1] = -1; + + /* Relocate backward references to current instance and forward references + to previous one */ + relocate_bb (&n->bb[b1], b, b1, prevb); + + /* add artificial block, just to join accesses */ + b1 = n->num_bb++; + cpy_bb (&n->bb[b1], ob); + n->bb[b1].cnt = 0; + + for (i = 0; i < ob->ninsn - 1; i++) { + ii = &n->bb[b1].insn[i]; + if (ob->insn[i].opt[0] & OPT_DEST) { + change_insn_type (ii, II_CMOV); + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (prevart_b, i); ii->opt[1] = OPT_REF; + ii->op[2] = REF (b1 - 1, i); ii->opt[2] = OPT_REF; + + /* Take left one, if we should have finished the first iteration*/ + if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[3] = pb->insn[pb->ninsn - 1].op[1]; ii->opt[3] = pb->insn[pb->ninsn - 1].opt[1]; + } else { + assert (pb->insn[pb->ninsn - 1].type & IT_COND); + ii->op[3] = REF (prevart_b, pb->ninsn - 1); ii->opt[3] = OPT_REF; + } + ii->dep = NULL; + ii->type = ob->insn[i].type & IT_COND; + } else { + change_insn_type (ii, II_NOP); + } + } + + /* Add conditional or instruction at the end, prioritizing flags */ + ii = &n->bb[b1].insn[ob->ninsn - 1]; + change_insn_type (ii, II_CMOV); + ii->op[0] = FLAG_REG; ii->opt[0] = OPT_REGISTER | OPT_DEST; + if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[1] = pb->insn[pb->ninsn - 1].op[1]; + ii->opt[1] = pb->insn[pb->ninsn - 1].opt[1]; + } else { + ii->op[1] = REF (prevart_b, pb->ninsn - 1); + ii->opt[1] = OPT_REF; + } + if (n->bb[b1 - 1].insn[pb->ninsn - 1].type & IT_BRANCH) { + ii->op[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].op[1]; + ii->opt[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].opt[1]; + } else { + ii->op[2] = REF (b1 - 1, pb->ninsn - 1); + ii->opt[2] = OPT_REF; + } + /* {z = x || y;} is same as {z = x ? x : y;} */ + ii->op[3] = ii->op[1]; ii->opt[3] = ii->opt[1]; + ii->type = IT_COND; + + /* Only one should be in loop, so we remove any INLOOP flags from duplicates */ + n->bb[b1].type &= ~BB_INLOOP; + n->bb[b1].prev[0] = prevart_b; + n->bb[b1].prev[1] = b1 - 1; + n->bb[b1].next[0] = -1; + n->bb[b1].next[1] = -1; + + prevb = b1 - 1; + prevart_b = b1; + print_cuc_bb (n, "prerollB"); + } + + print_cuc_bb (n, "preroll0"); + n->bb[prevart_b].next[0] = ob->next[0] == b ? ob->next[1] : ob->next[0]; + + print_cuc_bb (n, "preroll1"); + /* repair BB after loop, to point back to latest artificial BB */ + b1 = n->bb[prevart_b].next[0]; + if (b1 >= 0 && b1 != BBID_END) { + if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = prevart_b; + else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = prevart_b; + else assert (0); + } + + if (type) { + /* Relink to itself */ + /* Set predecessor's successor */ + if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = prevb; + else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = prevb; + else assert (0); + n->bb[prevb].prev[1] = prevb; + + /* Set predecessor */ + if (n->bb[b].prev[0] == b) { + n->bb[b].prev[0] = n->bb[b].prev[1]; + n->bb[b].prev[1] = -1; + } else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = -1; + else assert (0); + } else { + /* Relink back to start of the loop */ + /* Set predecessor's successor */ + if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = b; + else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = b; + else assert (0); + + /* Set predecessor */ + if (n->bb[b].prev[0] == b) n->bb[b].prev[0] = prevb; + else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = prevb; + else assert (0); + } + + print_cuc_bb (n, "preroll2"); + + /* Relocate backward references to current instance and forward references + to previous one */ + relocate_bb (&n->bb[b], b, b, prevb); + + /* Relocate all other blocks to point to latest prevart_b */ + for (i = 0; i < f->num_bb; i++) + if (i != b) relocate_bb (&n->bb[i], b, prevart_b, prevart_b); + + return n; +} + +/* Unroll loop b unroll times and return new function. Original + function is unmodified. */ +cuc_func *preunroll_loop (cuc_func *f, int b, int preroll, int unroll, char *bb_filename) +{ + int b1, i; + cuc_func *n, *t; + int *counts; + + if (preroll > 1) { + t = roll_loop (f, b, preroll, 1); + b1 = t->num_bb - 2; + if (unroll > 1) { + //print_cuc_bb (t, "preunroll1"); + n = roll_loop (t, b1, unroll, 0); + free_func (t); + } else n = t; + } else { + b1 = b; + if (unroll > 1) n = roll_loop (f, b1, unroll, 0); + else return dup_func (f); + } + + /* Assign new counts to functions */ + assert (counts = (int *)malloc (sizeof (int) * (preroll - 1 + unroll))); + count_bb_seq (n, b, bb_filename, counts, preroll, unroll); + for (i = 0; i < preroll - 1 + unroll; i++) { + if (i == 0) b1 = b; + else b1 = f->num_bb + (i - 1) * 2; + n->bb[b1].cnt = counts[i]; + } + + //print_cuc_bb (n, "preunroll"); + free (counts); + return n; +} +
bb.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: insn.c =================================================================== --- insn.c (nonexistent) +++ insn.c (revision 1765) @@ -0,0 +1,1424 @@ +/* insn.c -- OpenRISC Custom Unit Compiler, instruction support + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "abstract.h" +#include "sim-config.h" +#include "cuc.h" +#include "insn.h" + +/* Table of known instructions. Watch out for indexes I_*! */ +const cuc_known_insn known[II_LAST + 1] = { +{"add", 1, "assign \1 = \2 + \3;"}, +{"sub", 0, "assign \1 = \2 - \3;"}, +{"and", 1, "assign \1 = \2 & \3;"}, +{"or", 1, "assign \1 = \2 | \3;"}, +{"xor", 1, "assign \1 = \2 ^ \3;"}, +{"mul", 1, "assign \1 = \2 * \3;"}, + +{"srl", 0, "assign \1 = \2 >> \3;"}, +{"sll", 0, "assign \1 = \2 << \3;"}, +{"sra", 0, "assign \1 = ({32{\2[31]}} << (6'd32-{1'b0, \3}))\n\ + | \2 >> \3;"}, + +{"lb", 0, "always @(posedge clk)"}, +{"lh", 0, "always @(posedge clk)"}, +{"lw", 0, "always @(posedge clk)"}, +{"sb", 0, "/* mem8[\2] = \1 */"}, +{"sh", 0, "/* mem16[\2] = \1 */"}, +{"sw", 0, "/* mem32[\2] = \1 */"}, + +{"sfeq", 1, "assign \1 = \2 == \3;"}, +{"sfne", 1, "assign \1 = \2 != \3;"}, +{"sfle", 0, "assign \1 = \2 <= \3;"}, +{"sflt", 0, "assign \1 = \2 < \3;"}, +{"sfge", 0, "assign \1 = \2 >= \3;"}, +{"sfgt", 0, "assign \1 = \2 > \3;"}, +{"bf", 0, ""}, + +{"lrbb", 0,"always @(posedge clk or posedge rst)"}, +{"cmov", 0,"assign \1 = \4 ? \2 : \3;"}, +{"reg", 0, "always @(posedge clk)"}, + +{"nop", 1, ""}, +{"call", 0, "/* function call */"}}; + +/* Find known instruction and attach them to insn */ +void change_insn_type (cuc_insn *i, int index) +{ + int j; + assert (index >= 0 && index <= II_LAST); + i->index = index; + if (i->index == II_NOP) { + for (j = 0; j < MAX_OPERANDS; j++) i->opt[j] = OPT_NONE; + i->type = 0; + i->dep = NULL; + i->disasm[0] = '\0'; + } +} + +/* Returns instruction name */ +const char *cuc_insn_name (cuc_insn *ii) { + if (ii->index < 0 || ii->index > II_LAST) return "???"; + else return known[ii->index].name; +} + +/* Prints out instructions */ +void print_insns (int bb, cuc_insn *insn, int ninsn, int verbose) +{ + int i, j; + for (i = 0; i < ninsn; i++) { + char tmp[10]; + dep_list *l = insn[i].dep; + sprintf (tmp, "[%x_%x]", bb, i); + PRINTF ("%-8s%c %-4s ", tmp, insn[i].index >= 0 ? ':' : '?', cuc_insn_name (&insn[i])); + if (verbose) { + PRINTF ("%-20s insn = %08lx, index = %i, type = %04x ", + insn[i].disasm, insn[i].insn, insn[i].index, insn[i].type); + } else PRINTF ("type = %04x ", insn[i].type); + for (j = 0; j < MAX_OPERANDS; j++) { + if (insn[i].opt[j] & OPT_DEST) PRINTF ("*"); + switch (insn[i].opt[j] & ~OPT_DEST) { + case OPT_NONE: + break; + case OPT_CONST: + if (insn[i].type & IT_COND && (insn[i].index == II_CMOV + || insn[i].index == II_ADD)) + PRINTF ("%lx, ", insn[i].op[j]); + else + PRINTF ("0x%08lx, ", insn[i].op[j]); + break; + case OPT_JUMP: + PRINTF ("J%lx, ", insn[i].op[j]); + break; + case OPT_REGISTER: + PRINTF ("r%li, ", insn[i].op[j]); + break; + case OPT_REF: + PRINTF ("[%lx_%lx], ", REF_BB(insn[i].op[j]), REF_I(insn[i].op[j])); + break; + case OPT_BB: + PRINTF ("BB "); + print_bb_num (insn[i].op[j]); + PRINTF (", "); + break; + case OPT_LRBB: + PRINTF ("LRBB, "); + break; + default: + fprintf (stderr, "Invalid operand type %s(%x_%x) = %x\n", + cuc_insn_name (&insn[i]), i, j, insn[i].opt[j]); + assert (0); + } + } + if (l) { + PRINTF ("\n\tdep:"); + while (l) { + PRINTF (" [%lx_%lx],", REF_BB (l->ref), REF_I (l->ref)); + l = l->next; + } + } + PRINTF ("\n"); + } +} + +void add_dep (dep_list **list, int dep) +{ + dep_list *ndep; + dep_list **tmp = list; + + while (*tmp) { + if ((*tmp)->ref == dep) return; /* already there */ + tmp = &((*tmp)->next); + } + ndep = (dep_list *)malloc (sizeof (dep_list)); + ndep->ref = dep; + ndep->next = NULL; + *tmp = ndep; +} + +void dispose_list (dep_list **list) +{ + while (*list) { + dep_list *tmp = *list; + *list = tmp->next; + free (tmp); + } +} + +void add_data_dep (cuc_func *f) +{ + int b, i, j; + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) { + fflush (stdout); + if (insn[i].opt[j] & OPT_REF) { + /* Copy list from predecessor */ + dep_list *l = f->INSN(insn[i].op[j]).dep; + while (l) { + add_dep (&insn[i].dep, l->ref); + l = l->next; + } + /* add predecessor */ + add_dep (&insn[i].dep, insn[i].op[j]); + } + } + } +} + +/* Inserts n nops before insn 'ref' */ +void insert_insns (cuc_func *f, int ref, int n) +{ + int b1, i, j; + int b = REF_BB(ref); + int ins = REF_I(ref); + + assert (b < f->num_bb); + assert (ins <= f->bb[b].ninsn); + assert (f->bb[b].ninsn + n < MAX_INSNS); + if (cuc_debug >= 8) print_cuc_bb (f, "PREINSERT"); + f->bb[b].insn = (cuc_insn *) realloc (f->bb[b].insn, + (f->bb[b].ninsn + n) * sizeof (cuc_insn)); + + /* Set up relocations */ + for (i = 0; i < f->bb[b].ninsn; i++) + if (i < ins) reloc[i] = i; + else reloc[i] = i + n; + + /* Move instructions, based on relocations */ + for (i = f->bb[b].ninsn - 1; i >= 0; i--) f->bb[b].insn[reloc[i]] = f->bb[b].insn[i]; + for (i = 0; i < n; i++) change_insn_type (&f->bb[b].insn[ins + i], II_NOP); + + f->bb[b].ninsn += n; + for (b1 = 0; b1 < f->num_bb; b1++) { + dep_list *d = f->bb[b1].mdep; + while (d) { + if (REF_BB (d->ref) == b && REF_I (d->ref) >= ins) + d->ref = REF (b, REF_I (d->ref) + n); + d = d->next; + } + for (i = 0; i < f->bb[b1].ninsn; i++) { + d = f->bb[b1].insn[i].dep; + while (d) { + if (REF_BB (d->ref) == b && REF_I (d->ref) >= ins) + d->ref = REF (b, REF_I (d->ref) + n); + d = d->next; + } + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b1].insn[i].opt[j] & OPT_REF && REF_BB (f->bb[b1].insn[i].op[j]) == b + && REF_I (f->bb[b1].insn[i].op[j]) >= ins) + f->bb[b1].insn[i].op[j] = REF (b, REF_I (f->bb[b1].insn[i].op[j]) + n); + } + } + for (i = 0; i < f->nmsched; i++) + if (REF_BB(f->msched[i]) == b) f->msched[i] = REF (b, reloc[REF_I (f->msched[i])]); + if (cuc_debug >= 8) print_cuc_bb (f, "POSTINSERT"); + cuc_check (f); +} + +/* returns nonzero, if instruction was simplified */ +int apply_edge_condition (cuc_insn *ii) +{ + unsigned int c = ii->op[2]; + + switch (ii->index) { + case II_AND: + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { + change_insn_type (ii, II_ADD); + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_OR: + if (ii->opt[2] & OPT_CONST && c == 0x0) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0xffffffff; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SUB: + if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_MUL: + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else + if (ii->opt[2] & OPT_CONST && c == 1) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else + if (ii->opt[2] & OPT_CONST && c == 0xffffffff) { + change_insn_type (ii, II_SUB); + ii->op[2] = ii->op[1]; ii->opt[2] = ii->opt[1]; + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + return 1; + } else break; + case II_SRL: + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] & OPT_CONST && c >= 32) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SLL: + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] & OPT_CONST && c >= 32) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SRA: + if (ii->opt[2] & OPT_CONST && c == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = c; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SFEQ: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] == ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SFNE: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] != ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else break; + case II_SFLE: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] <= ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) { + change_insn_type (ii, II_SFEQ); + } else break; + case II_SFLT: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] < ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + } break; + case II_SFGE: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] >= ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) { + change_insn_type (ii, II_ADD); + ii->op[1] = 1; ii->opt[1] = OPT_CONST; + } else break; + case II_SFGT: + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[1] > ii->op[2]; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) { + change_insn_type (ii, II_SFNE); + } else break; + case II_CMOV: + if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + return 1; + } + if (ii->opt[3] & OPT_CONST) { + change_insn_type (ii, II_ADD); + if (ii->op[3]) { + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + } else { + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + } + ii->opt[3] = OPT_NONE; + return 1; + } + if (ii->type & IT_COND) { + if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) { + if (ii->op[1] && !ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[1] = ii->op[3]; ii->opt[1] = ii->opt[3]; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + return 1; + } + if (ii->op[1] && ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[1] = 1; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + return 1; + } + if (!ii->op[1] && !ii->op[2]) { + change_insn_type (ii, II_ADD); + ii->op[1] = 0; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + return 1; + } + } + if (ii->op[1] == ii->op[3] && ii->opt[1] == ii->opt[3]) { + ii->op[1] = 1; ii->opt[1] = OPT_CONST; + return 1; + } + if (ii->op[2] == ii->op[3] && ii->opt[2] == ii->opt[3]) { + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + return 1; + } + } + break; + } + return 0; +} + +/* First primary input */ +static unsigned long tmp_op, tmp_opt; + +/* Recursive function that searches for primary inputs; + returns 0 if cmov can be replaced by add */ +static int cmov_needed (cuc_func *f, int ref) +{ + cuc_insn *ii = &f->INSN(ref); + int j; + + cucdebug (4, " %x", ref); + /* mark visited, if already marked, we have a loop, ignore */ + if (ii->tmp) return 0; + ii->tmp = 1; + + /* handle normal movs separately */ + if (ii->index == II_ADD && !(ii->type & IT_VOLATILE) + && ii->opt[2] == OPT_CONST && ii->op[2] == 0) { + if (ii->opt[1] == OPT_REF) { + if (cmov_needed (f, ii->op[1])) { + ii->tmp = 0; + return 1; + } + } else { + if (tmp_opt == OPT_NONE) { + tmp_op = ii->op[1]; + tmp_opt = ii->opt[1]; + } else if (tmp_opt != ii->opt[1] || tmp_op != ii->op[1]) { + ii->tmp = 0; + return 1; + } + } + ii->tmp = 0; + return 0; + } + + /* Is this instruction CMOV? no => add to primary inputs */ + if ((ii->index != II_CMOV) || (ii->type & IT_VOLATILE)) { + if (tmp_opt == OPT_NONE) { + tmp_op = ref; + tmp_opt = OPT_REF; + ii->tmp = 0; + return 0; + } else if (tmp_opt != OPT_REF || tmp_op != ref) { + ii->tmp = 0; + return 1; + } else { + ii->tmp = 0; + return 0; + } + } + + for (j = 1; j < 3; j++) { + cucdebug (4, "(%x:%i)", ref, j); + if (ii->opt[j] == OPT_REF) { + if (cmov_needed (f, ii->op[j])) { + ii->tmp = 0; + return 1; + } + } else { + if (tmp_opt == OPT_NONE) { + tmp_op = ii->op[j]; + tmp_opt = ii->opt[j]; + } else if (tmp_opt != ii->opt[j] || tmp_op != ii->op[j]) { + ii->tmp = 0; + return 1; + } + } + } + + ii->tmp = 0; + return 0; +} + +/* Search and optimize complex cmov assignments */ +int optimize_cmovs (cuc_func *f) +{ + int modified = 0; + int b, i; + + /* Mark all instructions unvisited */ + for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) + for (i = 0; i < f->bb[b].ninsn; i++) f->bb[b].insn[i].tmp = 0; + + for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) { + for (i = 0; i < f->bb[b].ninsn; i++) { + cuc_insn *ii = &f->bb[b].insn[i]; + if (ii->index == II_CMOV && !(ii->type & IT_VOLATILE)) { + tmp_opt = OPT_NONE; + cucdebug (4, "\n"); + if (!cmov_needed (f, REF(b, i))) { + assert (tmp_opt != OPT_NONE); + change_insn_type (ii, II_ADD); + ii->op[1] = tmp_op; ii->opt[1] = tmp_opt; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + modified = 1; + } + } + } + } + return modified; +} + +/* returns number of instructions, using instruction ref */ +static int insn_uses (cuc_func *f, int ref) +{ + int b, i, j; + int cnt = 0; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == ref) cnt++; + return cnt; +} + +/* handles some common CMOV, CMOV-CMOV cases; + returns nonzero if anything optimized */ +static int optimize_cmov_more (cuc_func *f, int ref) +{ + int t = 0; + cuc_insn *ii = &f->INSN(ref); + assert (ii->index == II_CMOV); + + /* In case of x = cmov x, y; or x = cmov y, x; we have + asynchroneous loop -> remove it */ + if ((ii->opt[1] & OPT_REF) && ii->op[1] == ref) t = 1; + if ((ii->opt[2] & OPT_REF) && ii->op[2] == ref) t = 2; + if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) t = 2; + if (t) { + change_insn_type (ii, II_ADD); + cucdebug (2, "%8x:cmov %i\n", ref, t); + ii->opt[t] = OPT_CONST; + ii->op[t] = 0; + ii->opt[3] = OPT_NONE; + return 1; + } + if (!(ii->type & IT_COND)) { + for (t = 1; t <= 2; t++) { + /* cmov L, X, Y, C1 + cmov Z, L, Y, C2 + can be replaced with simpler: + cmov L, C1, C2, C2 + cmov Z, X, Y, L */ + if (ii->opt[t] == OPT_REF && f->INSN(ii->op[t]).index == II_CMOV) { + int r = ii->op[t]; + unsigned long x, xt, y, yt; + cuc_insn *prev = &f->INSN(r); + cuc_check (f); + cucdebug (3, "%x-%x\n", ref, r); + assert (!(prev->type & IT_COND)); + if (prev->op[3 - t] != ii->op[3 - t] || prev->opt[3 - t] != ii->opt[3 - t] + || insn_uses (f, r) > 1) continue; + cucdebug (3, "%x-%x cmov more\n", ref, r); + prev->type |= IT_COND; + x = prev->op[t]; xt = prev->opt[t]; + y = prev->op[3 - t]; yt = prev->opt[3 - t]; + prev->op[t] = ii->op[3]; prev->opt[t] = ii->opt[3]; /* C2 */ + ii->op[3] = r; ii->opt[3] = OPT_REF; /* L */ + prev->op[3 - t] = prev->op[3]; prev->opt[3 - t] = prev->opt[3]; /* C1 */ + prev->op[3] = prev->op[t]; prev->opt[3] = prev->opt[t]; /* C2 */ + ii->op[t] = x; ii->opt[t] = xt; /* X */ + ii->op[3 - t] = y; ii->opt[3 - t] = yt; /* Y */ + prev->op[0] = -1; prev->opt[0] = OPT_REGISTER | OPT_DEST; + cuc_check (f); + return 1; + } + } + } + + if (ii->opt[3] & OPT_REF) { + cuc_insn *prev = &f->INSN(ii->op[3]); + assert (prev->type & IT_COND); + if (prev->index == II_CMOV) { + /* negated conditional: + cmov x, 0, 1, y + cmov z, a, b, x + is replaced by + cmov z, b, a, y */ + if (prev->opt[1] & OPT_CONST && prev->opt[2] & OPT_CONST + && !prev->op[1] && prev->op[2]) { + unsigned long t; + t = ii->op[1]; ii->op[1] = ii->op[2]; ii->op[2] = t; + t = ii->opt[1]; ii->opt[1] = ii->opt[2]; ii->opt[2] = t; + ii->op[3] = prev->op[3]; ii->opt[3] = prev->opt[3]; + } + } else if (prev->index == II_ADD) { + /* add x, y, 0 + cmov z, a, b, x + is replaced by + cmov z, a, b, y */ + if (prev->opt[2] & OPT_CONST && prev->op[2] == 0) { + ii->op[3] = prev->op[1]; ii->opt[3] = prev->opt[1]; + return 1; + } + } + } + + /* Check if both choices can be pushed through */ + if (ii->opt[1] & OPT_REF && ii->opt[2] & OPT_REF + /* Usually doesn't make sense to move conditionals though => more area */ + && !(ii->type & IT_COND)) { + cuc_insn *a, *b; + a = &f->INSN(ii->op[1]); + b = &f->INSN(ii->op[2]); + if (a->index == b->index && !(a->type & IT_VOLATILE) && !(b->type & IT_VOLATILE)) { + int diff = -1; + int i; + for (i = 0; i < MAX_OPERANDS; i++) + if (a->opt[i] != b->opt[i] || !(a->op[i] == b->op[i] || a->opt[i] & OPT_REGISTER)) { + if (diff == -1) diff = i; else diff = -2; + } + /* If diff == -1, it will be eliminated by CSE */ + if (diff >= 0) { + cuc_insn tmp, cmov; + int ref2 = REF (REF_BB (ref), REF_I (ref) + 1); + insert_insns (f, ref, 1); + a = &f->INSN(f->INSN(ref2).op[1]); + b = &f->INSN(f->INSN(ref2).op[2]); + cucdebug (4, "ref = %x %lx %lx\n", ref, f->INSN(ref2).op[1], + f->INSN(ref2).op[2]); + if (cuc_debug >= 7) { + print_cuc_bb (f, "AAA"); + cuc_check (f); + } + tmp = *a; + cmov = f->INSN(ref2); + tmp.op[diff] = ref; tmp.opt[diff] = OPT_REF; + cmov.op[0] = -1; cmov.opt[0] = OPT_REGISTER | OPT_DEST; + cmov.op[1] = a->op[diff]; cmov.opt[1] = a->opt[diff]; + cmov.op[2] = b->op[diff]; cmov.opt[2] = b->opt[diff]; + change_insn_type (&cmov, II_CMOV); + cmov.type &= ~IT_COND; + cucdebug (4, "ref2 = %x %lx %lx\n", ref2, cmov.op[1], cmov.op[2]); + if (cmov.opt[1] & OPT_REF && cmov.opt[2] & OPT_REF + && f->INSN(cmov.op[1]).type & IT_COND) { + assert (f->INSN(cmov.op[2]).type & IT_COND); + cmov.type |= IT_COND; + } + f->INSN(ref) = cmov; + f->INSN(ref2) = tmp; + if (cuc_debug >= 6) print_cuc_bb (f, "BBB"); + cuc_check (f); + return 1; + } + } + } + return 0; +} + +/* Optimizes dataflow tree */ +int optimize_tree (cuc_func *f) +{ + int b, i, j; + int modified; + int gmodified = 0; + + do { + modified = 0; + if (cuc_debug) cuc_check (f); + for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) { + for (i = 0; i < f->bb[b].ninsn; i++) { + cuc_insn *ii = &f->bb[b].insn[i]; + /* We tend to have the third parameter const if instruction is cumutative */ + if ((ii->opt[1] & OPT_CONST) && !(ii->opt[2] & OPT_CONST)) { + int cond = ii->index == II_SFEQ || ii->index == II_SFNE + || ii->index == II_SFLT || ii->index == II_SFLE + || ii->index == II_SFGT || ii->index == II_SFGE; + if (known[ii->index].comutative || cond) { + unsigned long t = ii->opt[1]; + ii->opt[1] = ii->opt[2]; + ii->opt[2] = t; + t = ii->op[1]; + ii->op[1] = ii->op[2]; + ii->op[2] = t; + modified = 1; cucdebug (2, "%08x:<>\n", REF(b, i)); + if (cond) { + if (ii->index == II_SFEQ) ii->index = II_SFNE; + else if (ii->index == II_SFNE) ii->index = II_SFEQ; + else if (ii->index == II_SFLE) ii->index = II_SFGT; + else if (ii->index == II_SFLT) ii->index = II_SFGE; + else if (ii->index == II_SFGE) ii->index = II_SFLT; + else if (ii->index == II_SFGT) ii->index = II_SFLE; + else assert (0); + } + } + } + + /* Try to do the promotion */ + /* We have two consecutive expressions, containing constants, + * if previous is a simple expression we can handle it simply: */ + for (j = 0; j < MAX_OPERANDS; j++) + if (ii->opt[j] & OPT_REF) { + cuc_insn *t = &f->INSN(ii->op[j]); + if (f->INSN(ii->op[j]).index == II_ADD + && f->INSN(ii->op[j]).opt[2] & OPT_CONST + && f->INSN(ii->op[j]).op[2] == 0 + && !(ii->type & IT_MEMORY && t->type & IT_MEMADD)) { + /* do not promote through add-mem, and branches */ + modified = 1; + cucdebug (2, "%8x:promote%i %8lx %8lx\n", REF (b, i), j, ii->op[j], t->op[1]); + ii->op[j] = t->op[1]; + ii->opt[j] = t->opt[1]; + } + } + + /* handle some CMOV cases more deeply */ + if (ii->index == II_CMOV && optimize_cmov_more (f, REF (b, i))) { + modified = 1; + continue; + } + + /* Do nothing to volatile instructions */ + if (ii->type & IT_VOLATILE) continue; + + /* Check whether we can simplify the instruction */ + if (apply_edge_condition (ii)) { + modified = 1; + continue; + } + /* We cannot do anything more if at least one is not constant */ + if (!(ii->opt[2] & OPT_CONST)) continue; + + if (ii->opt[1] & OPT_CONST) { /* We have constant expression */ + unsigned long value; + int ok = 1; + /* Was constant expression already? */ + if (ii->index == II_ADD && !ii->op[2]) continue; + + if (ii->index == II_ADD) value = ii->op[1] + ii->op[2]; + else if (ii->index == II_SUB) value = ii->op[1] - ii->op[2]; + else if (ii->index == II_SLL) value = ii->op[1] << ii->op[2]; + else if (ii->index == II_SRL) value = ii->op[1] >> ii->op[2]; + else if (ii->index == II_MUL) value = ii->op[1] * ii->op[2]; + else if (ii->index == II_OR) value = ii->op[1] | ii->op[2]; + else if (ii->index == II_XOR) value = ii->op[1] ^ ii->op[2]; + else if (ii->index == II_AND) value = ii->op[1] & ii->op[2]; + else ok = 0; + if (ok) { + change_insn_type (ii, II_ADD); + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = value; ii->opt[1] = OPT_CONST; + ii->op[2] = 0; ii->opt[2] = OPT_CONST; + modified = 1; cucdebug (2, "%8x:const\n", REF (b, i)); + } + } else if (ii->opt[1] & OPT_REF) { + cuc_insn *prev = &f->INSN(ii->op[1]); + /* Is this just a move? */ + if (ii->index == II_ADD + && !(ii->type & IT_MEMADD) && ii->op[2] == 0) { + int b1, i1, j1; + cucdebug (2, "%8x:link %8lx: ", REF(b, i), ii->op[1]); + if (!(prev->type & (IT_OUTPUT | IT_VOLATILE))) { + assert (ii->opt[0] & OPT_DEST); + prev->op[0] = ii->op[0]; prev->opt[0] = ii->opt[0]; + prev->type |= ii->type & IT_OUTPUT; + for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD)) + for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) + for (j1 = 0; j1 < MAX_OPERANDS; j1++) + if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF) + && f->bb[b1].insn[i1].op[j1] == REF(b, i)) { + cucdebug (2, "%x ", REF (b1, i1)); + f->bb[b1].insn[i1].op[j1] = ii->op[1]; + } + cucdebug (2, "\n"); + change_insn_type (ii, II_NOP); + } + } else if (prev->opt[2] & OPT_CONST) { + /* Handle some common cases */ + /* add - add joining */ + if (ii->index == II_ADD && prev->index == II_ADD) { + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] += prev->op[2]; + modified = 1; cucdebug (2, "%8x: add-add\n", REF(b, i)); + } else /* add - sub joining */ + if (ii->index == II_ADD && prev->index == II_SUB) { + change_insn_type (&insn[i], II_SUB); + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] += prev->op[2]; + modified = 1; cucdebug (2, "%8x: add-sub\n", REF(b, i)); + } else /* sub - add joining */ + if (ii->index == II_SUB && prev->index == II_ADD) { + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] += prev->op[2]; + modified = 1; cucdebug (2, "%8x: sub-add\n", REF(b, i)); + } else /* add - sfxx joining */ + if (prev->index == II_ADD && ( + ii->index == II_SFEQ || ii->index == II_SFNE + || ii->index == II_SFLT || ii->index == II_SFLE + || ii->index == II_SFGT || ii->index == II_SFGE)) { + if (ii->opt[2] & OPT_CONST && ii->op[2] < 0x80000000) { + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] -= prev->op[2]; + modified = 1; cucdebug (2, "%8x: add-sfxx\n", REF(b, i)); + } + } else /* sub - sfxx joining */ + if (prev->index == II_SUB && ( + ii->index == II_SFEQ || ii->index == II_SFNE + || ii->index == II_SFLT || ii->index == II_SFLE + || ii->index == II_SFGT || ii->index == II_SFGE)) { + if (ii->opt[2] & OPT_CONST && ii->op[2] < 0x80000000) { + ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1]; + ii->op[2] += prev->op[2]; + modified = 1; cucdebug (2, "%8x: sub-sfxx\n", REF(b, i)); + } + } + } + } + } + } + if (modified) gmodified = 1; + } while (modified); + return gmodified; +} + +/* Remove nop instructions */ +int remove_nops (cuc_func *f) +{ + int b; + int modified = 0; + for (b = 0; b < f->num_bb; b++) { + int c, d = 0, i, j; + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (insn[i].index != II_NOP) { + reloc [i] = d; + insn[d++] = insn[i]; + } else { + reloc[i] = d; /* For jumps only */ + } + if (f->bb[b].ninsn != d) modified = 1; + f->bb[b].ninsn = d; + + /* Relocate references from all basic blocks */ + for (c = 0; c < f->num_bb; c++) + for (i = 0; i < f->bb[c].ninsn; i++) { + dep_list *d = f->bb[c].insn[i].dep; + for (j = 0; j < MAX_OPERANDS; j++) + if ((f->bb[c].insn[i].opt[j] & OPT_REF) + && REF_BB(f->bb[c].insn[i].op[j]) == b) + f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]); + + while (d) { + if (REF_BB(d->ref) == b) d->ref = REF (b, reloc[REF_I (d->ref)]); + d = d->next; + } + } + } + return modified; +} + +static void unmark_tree (cuc_func *f, int ref) +{ + cuc_insn *ii = &f->INSN(ref); + cucdebug (5, "%x ", ref); + if (ii->type & IT_UNUSED) { + int j; + ii->type &= ~IT_UNUSED; + for (j = 0; j < MAX_OPERANDS; j++) + if (ii->opt[j] & OPT_REF) unmark_tree (f, ii->op[j]); + } +} + +/* Remove unused assignments */ +int remove_dead (cuc_func *f) +{ + int b, i; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + f->bb[b].insn[i].type |= IT_UNUSED; + + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) { + cuc_insn *ii = &f->bb[b].insn[i]; + if (ii->type & IT_VOLATILE || ii->type & IT_OUTPUT + || II_IS_LOAD (ii->index) && (f->memory_order == MO_NONE || f->memory_order == MO_WEAK) + || II_IS_STORE (ii->index)) { + unmark_tree (f, REF (b, i)); + cucdebug (5, "\n"); + } + } + + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_UNUSED) { + change_insn_type (&f->bb[b].insn[i], II_NOP); + } + + return remove_nops (f); +} + +/* Removes trivial register assignments */ +int remove_trivial_regs (cuc_func *f) +{ + int b, i; + for (i = 0; i < MAX_REGS; i++) f->saved_regs[i] = caller_saved[i]; + + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) { + if (insn[i].index == II_ADD + && insn[i].opt[0] & OPT_REGISTER + && insn[i].opt[1] & OPT_REGISTER && insn[i].op[0] == insn[i].op[1] + && insn[i].opt[2] & OPT_CONST && insn[i].op[2] == 0) { + if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1; + change_insn_type (&insn[i], II_NOP); + } + } + } + if (cuc_debug >= 2) { + PRINTF ("saved regs "); + for (i = 0; i < MAX_REGS; i++) PRINTF ("%i:%i ", i, f->saved_regs[i]); + PRINTF ("\n"); + } + return remove_nops (f); +} + +/* Determine inputs and outputs */ +void set_io (cuc_func *f) +{ + int b, i, j; + /* Determine register usage */ + for (i = 0; i < MAX_REGS; i++) { + f->lur[i] = -1; + f->used_regs[i] = 0; + } + if (cuc_debug > 5) print_cuc_bb (f, "SET_IO"); + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b].insn[i].opt[j] & OPT_REGISTER && f->bb[b].insn[i].op[j] >= 0) { + if (f->bb[b].insn[i].opt[j] & OPT_DEST) f->lur[f->bb[b].insn[i].op[j]] = REF (b, i); + else f->used_regs[f->bb[b].insn[i].op[j]] = 1; + } + } +} + +/* relocate all accesses inside of BB b to back/fwd */ +static void relocate_bb (cuc_bb *bb, int b, int back, int fwd) +{ + int i, j; + for (i = 0; i < bb->ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (bb->insn[i].opt[j] & OPT_REF + && REF_BB (bb->insn[i].op[j]) == b) { + int t = REF_I (bb->insn[i].op[j]); + if (t < i) bb->insn[i].op[j] = REF (back, t); + else bb->insn[i].op[j] = REF (fwd, t); + } +} + +/* Latch outputs in loops */ +void add_latches (cuc_func *f) +{ + int b, i, j; + + //print_cuc_bb (f, "ADD_LATCHES a"); + /* Cuts the tree and marks registers */ + mark_cut (f); + + /* Split BBs with more than one group */ + for (b = 0; b < f->num_bb; b++) expand_bb (f, b); + remove_nops (f); + //print_cuc_bb (f, "ADD_LATCHES 0"); + + /* Convert accesses in BB_INLOOP type block to latched */ + for (b = 0; b < f->num_bb; b++) { + int j; + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] == OPT_REF) { + int t = f->bb[b].insn[i].op[j]; + /* If we are pointing to a INLOOP block from outside, or forward + (= previous loop iteration) we must register that data */ + if ((f->bb[REF_BB(t)].type & BB_INLOOP || config.cuc.no_multicycle) + && !(f->INSN(t).type & (IT_BRANCH | IT_COND)) + && (REF_BB(t) != b || REF_I(t) >= i)) { + f->INSN(t).type |= IT_LATCHED; + } + } + } + //print_cuc_bb (f, "ADD_LATCHES 1"); + + /* Add latches at the end of blocks as needed */ + for (b = 0; b < f->num_bb; b++) { + int nreg = 0; + cuc_insn *insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_LATCHED) nreg++; + if (nreg) { + insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (f->bb[b].ninsn + nreg)); + j = 0; + for (i = 0; i < f->bb[b].ninsn; i++) { + insn[i] = f->bb[b].insn[i]; + if (insn[i].type & IT_LATCHED) { + cuc_insn *ii = &insn[f->bb[b].ninsn + j++]; + change_insn_type (ii, II_REG); + ii->op[0] = -1; ii->opt[0] = OPT_DEST | OPT_REGISTER; + ii->op[1] = REF (b, i); ii->opt[1] = OPT_REF; + ii->opt[2] = ii->opt[3] = OPT_NONE; + ii->dep = NULL; + ii->type = IT_VOLATILE; + sprintf (ii->disasm, "reg %i_%i", b, i); + } + } + f->bb[b].ninsn += nreg; + free (f->bb[b].insn); + f->bb[b].insn = insn; + } + } + //print_cuc_bb (f, "ADD_LATCHES 2"); + + /* Repair references */ + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + /* If destination instruction is latched, use register instead */ + if (f->bb[b].insn[i].opt[j] == OPT_REF + && f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) { + int b1, i1; + b1 = REF_BB (f->bb[b].insn[i].op[j]); + //cucdebug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]); + if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) { + for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) { + assert (f->bb[b1].insn[i1].index == II_REG); + if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) { + f->bb[b].insn[i].op[j] = REF (b1, i1); + break; + } + } + } + } +} + +/* CSE -- common subexpression elimination */ +int cse (cuc_func *f) +{ + int modified = 0; + int b, i, j, b1, i1, b2, i2; + for (b1 = 0; b1 < f->num_bb; b1++) + for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) if (f->bb[b1].insn[i1].index != II_NOP + && f->bb[b1].insn[i1].index != II_LRBB && !(f->bb[b1].insn[i1].type & IT_MEMORY) + && !(f->bb[b1].insn[i1].type & IT_MEMADD)) + for (b2 = 0; b2 < f->num_bb; b2++) + for (i2 = 0; i2 < f->bb[b2].ninsn; i2++) + if (f->bb[b2].insn[i2].index != II_NOP && f->bb[b2].insn[i2].index != II_LRBB + && !(f->bb[b2].insn[i2].type & IT_MEMORY) && !(f->bb[b2].insn[i2].type & IT_MEMADD) + && (b1 != b2 || i2 > i1)) { + cuc_insn *ii1 = &f->bb[b1].insn[i1]; + cuc_insn *ii2 = &f->bb[b2].insn[i2]; + int ok = 1; + + /* Do we have an exact match? */ + if (ii1->index != ii2->index) continue; + if (ii2->type & IT_VOLATILE) continue; + + /* Check all operands also */ + for (j = 0; j < MAX_OPERANDS; j++) { + if (ii1->opt[j] != ii2->opt[j]) { + ok = 0; + break; + } + if (ii1->opt[j] & OPT_DEST) continue; + if (ii1->opt[j] != OPT_NONE && ii1->op[j] != ii2->op[j]) { + ok = 0; + break; + } + } + + if (ok) { + /* remove duplicated instruction and relink the references */ + cucdebug (3, "%x - %x are same\n", REF(b1, i1), REF(b2, i2)); + change_insn_type (ii2, II_NOP); + modified = 1; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b].insn[i].opt[j] & OPT_REF + && f->bb[b].insn[i].op[j] == REF (b2, i2)) + f->bb[b].insn[i].op[j] = REF (b1, i1); + } + } + return modified; +} + +static int count_cmovs (cuc_insn *ii, int match) +{ + int c = 0, j; + if (match & 2) { + for (j = 0; j < MAX_OPERANDS; j++) + if (ii->opt[j] & OPT_DEST) c++; + } + if (match & 1) { + for (j = 0; j < MAX_OPERANDS; j++) + if (!(ii->opt[j] & OPT_DEST) && ii->opt[j] & OPT_REF) c++; + } else { + for (j = 0; j < MAX_OPERANDS; j++) + if (!(ii->opt[j] & OPT_DEST) && ii->opt[j] != OPT_NONE) c++; + } + return c; +} + +static void search_csm (int iter, cuc_func *f, cuc_shared_list *list); +static cuc_shared_list *main_list; +static int *iteration; + +/* CSM -- common subexpression matching -- resource sharing + We try to match tree of instruction inside a BB with as many + matches as possible. All possibilities are collected and + options, making situation worse are removed */ +void csm (cuc_func *f) +{ + int b, i, j; + int cnt; + cuc_shared_list *list; + cuc_timings timings; + + analyse_timings (f, &timings); + main_list = NULL; + for (b = 0; b < f->num_bb; b++) { + assert (iteration = (int *)malloc (sizeof (int) * f->bb[b].ninsn)); + for (i = 0; i < f->bb[b].ninsn; i++) { + int cnt = 0, cntc = 0; + double size = 0., sizec = 0.; + int j2 = 0; + for (j = 0; j < f->bb[b].ninsn; j++) + if (f->bb[b].insn[i].index == f->bb[b].insn[j].index) { + int ok = 1; + for (j2 = 0; j2 < MAX_OPERANDS; j2++) if (!(f->bb[b].insn[j].opt[j2] & OPT_REF)) + if (f->bb[b].insn[j].opt[j2] != f->bb[b].insn[i].opt[j2] + || f->bb[b].insn[j].op[j2] != f->bb[b].insn[i].opt[j2]) { + ok = 0; + break; + } + if (ok) { + cntc++; + sizec = sizec + insn_size (&f->bb[b].insn[j]); + } else { + cnt++; + size = size + insn_size (&f->bb[b].insn[j]); + } + iteration[j] = 0; + } else iteration[j] = -1; + if (cntc > 1) { + assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); + list->next = main_list; + list->from = NULL; + list->ref = REF (b, i); + list->cnt = cnt; + list->cmatch = 1; + list->cmovs = count_cmovs (&f->bb[b].insn[i], 3); + list->osize = sizec; + list->size = ii_size (f->bb[b].insn[i].index, 1); + main_list = list; + search_csm (0, f, list); + } + if (cnt > 1) { + assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); + list->next = main_list; + list->from = NULL; + list->ref = REF (b, i); + list->cnt = cnt + cntc; + list->cmatch = 0; + list->cmovs = count_cmovs (&f->bb[b].insn[i], 2); + list->osize = size + sizec; + list->size = ii_size (f->bb[b].insn[i].index, 0); + main_list = list; + search_csm (0, f, list); + } + } + free (iteration); + } + + for (list = main_list; list; list = list->next) list->dead = 0; + cnt = 0; + for (list = main_list; list; list = list->next) if (!list->dead) cnt++; + cucdebug (1, "noptions = %i\n", cnt); + + /* Now we will check the real size of the 'improvements'; if the size + actually increases, we abandom the option */ + for (list = main_list; list; list = list->next) + if (list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size >= list->osize) list->dead = 1; + + cnt = 0; + for (list = main_list; list; list = list->next) if (!list->dead) cnt++; + cucdebug (1, "noptions = %i\n", cnt); + + /* Count number of instructions grouped */ + for (list = main_list; list; list = list->next) { + cuc_shared_list *l = list; + int c = 0; + while (l) { + c++; + if (f->INSN(l->ref).type & (IT_VOLATILE | IT_MEMORY | IT_MEMADD)) list->dead = 1; + l = l->from; + } + list->ninsn = c; + } + + cnt = 0; + for (list = main_list; list; list = list->next) + if (!list->dead) cnt++; + cucdebug (1, "noptions = %i\n", cnt); + +#if 1 + /* We can get a lot of options here, so we will delete duplicates */ + for (list = main_list; list; list = list->next) if (!list->dead) { + cuc_shared_list *l; + for (l = list->next; l; l = l->next) if (!l->dead) { + int ok = 1; + cuc_shared_list *t1 = list; + cuc_shared_list *t2 = l; + while (ok && t1 && t2) { + if (f->INSN(t1->ref).index == f->INSN(t2->ref).index) { + /* If other operands are matching, we must check for them also */ + if (t1->cmatch) { + int j; + for (j = 0; j < MAX_OPERANDS; j++) + if (!(f->INSN(t1->ref).opt[j] & OPT_REF) || !(f->INSN(t2->ref).opt[j] & OPT_REF) + || f->INSN(t1->ref).opt[j] != f->INSN(t2->ref).opt[j] + || f->INSN(t1->ref).op[j] != f->INSN(t2->ref).op[j]) { + ok = 0; + break; + } + } + + /* This option is duplicate, remove */ + if (ok) t1->dead = 1; + } + t1 = t1->from; + t2 = t2->from; + } + } + } + cnt = 0; + for (list = main_list; list; list = list->next) if (!list->dead) cnt++; + cucdebug (1, "noptions = %i\n", cnt); +#endif + /* Print out */ + for (list = main_list; list; list = list->next) if (!list->dead) { + cuc_shared_list *l = list; + cucdebug (1, "%-4s cnt %3i ninsn %3i size %8.1f osize %8.1f cmovs %3i @", + cuc_insn_name (&f->INSN(list->ref)), list->cnt, list->ninsn, + list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size, list->osize, list->cmovs); + while (l) { + cucdebug (1, "%c%x,", l->cmatch ? '.' : '!', l->ref); + l = l->from; + } + cucdebug (1, "\n"); + } + + /* Calculate estimated timings */ + for (b = 0; b < f->num_bb; b++) { + cnt = 0; + for (list = main_list; list; list = list->next) + if (!list->dead && REF_BB(list->ref) == b) cnt++; + + f->bb[b].ntim = cnt; + if (!cnt) { + f->bb[b].tim = NULL; + continue; + } + assert (f->bb[b].tim = (cuc_timings *)malloc (sizeof (cuc_timings) * cnt)); + + cnt = 0; + for (list = main_list; list; list = list->next) if (!list->dead && REF_BB(list->ref) == b) { + cuc_shared_list *l = list; + f->bb[b].tim[cnt].b = b; + f->bb[b].tim[cnt].preroll = f->bb[b].tim[cnt].unroll = 1; + f->bb[b].tim[cnt].nshared = list->ninsn; + assert (f->bb[b].tim[cnt].shared = (cuc_shared_item *) + malloc (sizeof(cuc_shared_item) * list->ninsn)); + for (i = 0; i < list->ninsn; i++, l = l->from) { + f->bb[b].tim[cnt].shared[i].ref = l->ref; + f->bb[b].tim[cnt].shared[i].cmatch = l->cmatch; + } + f->bb[b].tim[cnt].new_time = timings.new_time + f->bb[b].cnt * (list->cnt - 1); + f->bb[b].tim[cnt].size = timings.size + + list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size - list->osize; + cnt++; + } + } +} + +/* Recursive function for searching through instruction graph */ +static void search_csm (int iter, cuc_func *f, cuc_shared_list *list) +{ + int b, i, j, i1; + cuc_shared_list *l; + b = REF_BB(list->ref); + i = REF_I(list->ref); + + for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] & OPT_REF) { + int t = f->bb[b].insn[i].op[j]; + int cnt = 0, cntc = 0; + double size = 0., sizec = 0.; + + /* Mark neighbours */ + for (i1 = 0; i1 < f->bb[b].ninsn; i1++) { + if (iteration[i1] == iter && f->bb[b].insn[i1].opt[j] & OPT_REF) { + int t2 = f->bb[b].insn[i1].op[j]; + if (f->INSN(t).index == f->INSN(t2).index && f->INSN(t2).opt[j] & OPT_REF) { + int j2; + int ok = 1; + iteration[REF_I(t2)] = iter + 1; + for (j2 = 0; j2 < MAX_OPERANDS; j2++) if (!(f->bb[b].insn[i1].opt[j2] & OPT_REF)) + if (f->bb[b].insn[i1].opt[j2] != f->bb[b].insn[i].opt[j2] + || f->bb[b].insn[i1].op[j2] != f->bb[b].insn[i].opt[j2]) { + ok = 0; + break; + } + if (ok) { + cntc++; + sizec = sizec + insn_size (&f->bb[b].insn[i1]); + } else { + cnt++; + size = size + insn_size (&f->bb[b].insn[i1]); + } + } + } + } + + if (cntc > 1) { + assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); + l->next = main_list; + main_list = l; + l->from = list; + l->ref = t; + l->cnt = cnt; + l->cmatch = 1; + l->cmovs = list->cmovs + count_cmovs (&f->bb[b].insn[i], 1) - 1; + l->size = list->size + ii_size (f->bb[b].insn[i].index, 1); + l->osize = sizec; + search_csm (iter + 1, f, l); + } + if (cnt > 1) { + assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list))); + l->next = main_list; + main_list = l; + l->from = list; + l->ref = t; + l->cnt = cnt + cntc; + l->cmatch = 0; + l->osize = size + sizec; + l->cmovs = list->cmovs + count_cmovs (&f->bb[b].insn[i], 0) - 1; + l->size = list->size + ii_size (f->bb[b].insn[i].index, 0); + search_csm (iter + 1, f, l); + } + + /* Unmark them back */ + for (i1 = 0; i1 < f->bb[b].ninsn; i1++) if (iteration[i1] > iter) iteration[i1] = -1; + } +} + +/* Displays shared instructions */ +void print_shared (cuc_func *rf, cuc_shared_item *shared, int nshared) +{ + int i, first = 1; + for (i = 0; i < nshared; i++) { + PRINTF ("%s%s%s", first ? "" : "-", cuc_insn_name (&rf->INSN(shared[i].ref)), + shared[i].cmatch ? "!" : ""); + first = 0; + } +} + +/* Common subexpression matching -- resource sharing, generation pass + + Situation here is much simpler than with analysis -- we know the instruction sequence + we are going to share, but we are going to do this on whole function, not just one BB. + We can find sequence in reference function, as pointed from "shared" */ +void csm_gen (cuc_func *f, cuc_func *rf, cuc_shared_item *shared, int nshared) +{ + int b, i, cnt = 0; +#warning some code here (2) + PRINTF ("Replacing: "); + print_shared (rf, shared, nshared); + + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) { + } + + PRINTF ("\nFound %i matches.\n", cnt); +} + Index: verilog.c =================================================================== --- verilog.c (nonexistent) +++ verilog.c (revision 1765) @@ -0,0 +1,1032 @@ +/* verilog.c -- OpenRISC Custom Unit Compiler, verilog generator + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "abstract.h" +#include "cuc.h" +#include "insn.h" +#include "profiler.h" +#include "sim-config.h" + +/* Shortcut */ +#define GEN(x...) fprintf (fo, x) + +/* Find index of load/store/call */ +int find_lsc_index (cuc_func *f, int ref) +{ + int c = 0; + int i; + int load; + + if (f->INSN(ref).index == II_CALL) { + for (i = 0; i < f->nmsched; i++) { + if (f->msched[i] == ref) break; + if (f->mtype[i] & MT_CALL) c++; + } + } else { + load = II_IS_LOAD (f->INSN(ref).index); + for (i = 0; i < f->nmsched; i++) { + if (f->msched[i] == ref) break; + if (load && (f->mtype[i] & MT_LOAD) + || !load && (f->mtype[i] & MT_STORE)) c++; + } + } + return c; +} + +/* Print out dependencies as verilog expression */ +void print_deps (FILE *fo, cuc_func *f, int b, dep_list *t, int registered) +{ + if (t) { + int first = 0; + while (t) { + if (f->INSN(t->ref).type & IT_MEMORY) { + GEN ("%s%c_end[%i]", first ? " && " : "", + II_IS_LOAD (f->INSN(t->ref).index) ? 'l' : 's', find_lsc_index (f, t->ref)); + } else if (f->INSN(t->ref).index == II_CALL) { + GEN ("%sf_end[%i]", first ? " && " : "", find_lsc_index (f, t->ref)); + } else { + PRINTF ("print_deps: err %lx\n", t->ref); + assert (0); + } + first = 1; + t = t->next; + } + } else { + if (registered) GEN ("bb_start_r[%i]", b); + else GEN ("bb_start[%i]", b); + } +} + +char *print_op_v (cuc_func *f, char *s, int ref, int j) +{ + unsigned long op = f->INSN(ref).op[j]; + unsigned long opt = f->INSN(ref).opt[j]; + switch (opt & ~OPT_DEST) { + case OPT_NONE: assert (0); break; + case OPT_CONST: if (f->INSN(ref).type & IT_COND && (f->INSN(ref).index == II_CMOV + || f->INSN(ref).index == II_ADD)) { + assert (op == 0 || op == 1); + sprintf (s, "1'b%lx", op); + } else sprintf (s, "32'h%lx", op); + break; + case OPT_REGISTER: + if (opt & OPT_DEST) sprintf (s, "t%x_%x", REF_BB(ref), REF_I(ref)); + else sprintf (s, "r%li_%c", op, opt & OPT_DEST ? 'o' : 'i'); + break; +#if 0 + case OPT_FREG: assert (opt & OPT_DEST); + sprintf (s, "fr%i_o", op); + break; +#endif + case OPT_REF: sprintf (s, "t%lx_%lx", REF_BB(op), REF_I(op)); break; + } + return s; +} + +/* Prints out specified instruction */ +void print_insn_v (FILE *fo, cuc_func *f, int b, int i) +{ + cuc_insn *ii = &f->bb[b].insn[i]; + char *s = known[ii->index].rtl; + char tmp[200] = ""; + + assert (s); + while (*s) { + if (*s <= MAX_OPERANDS) { + char t[30]; + sprintf (tmp, "%s%s", tmp, print_op_v (f, t, REF(b, i), *s - 1)); + } else if (*s == '\b') sprintf (tmp, "%s%i", b); + else sprintf (tmp, "%s%c", tmp, *s); + s++; + } + GEN ("%-40s /* %s */\n", tmp, ii->disasm); + if (ii->type & IT_MEMORY) { + int nls = find_lsc_index (f, REF (b, i)); + if (II_IS_LOAD (ii->index)) { + int nm; + for (nm = 0; nm < f->nmsched; nm++) if (f->msched[nm] == REF (b, i)) break; + assert (nm < f->nmsched); + + GEN (" if (l_end[%i]) t%x_%x <= #Tp ", nls, b, i); + switch (f->mtype[nm] & (MT_WIDTH | MT_SIGNED)) { + case 1: GEN ("l_dat_i & 32'hff;\n"); + break; + case 2: GEN ("l_dat_i & 32'hffff;\n"); + break; + case 4 | MT_SIGNED: + case 4: GEN ("l_dat_i;\n"); + break; + case 1 | MT_SIGNED: + GEN ("{24{l_dat_i[7]}, l_dat_i[7:0]};\n"); + break; + case 2 | MT_SIGNED: + GEN ("{16{l_dat_i[15]}, l_dat_i[15:0]};\n"); + break; + default: assert (0); + } + } + } else if (ii->index == II_LRBB) { + GEN (" if (rst) t%x_%x <= #Tp 1'b0;\n", b, i); + assert (f->bb[b].prev[0] >= 0); + if (f->bb[b].prev[0] == BBID_START) + GEN (" else if (bb_start[%i]) t%x_%x <= #Tp start_i;\n", b, b, i); + else + GEN (" else if (bb_start[%i]) t%x_%x <= #Tp bb_stb[%i];\n", b, b, i, f->bb[b].prev[0]); + } else if (ii->index == II_REG) { + assert (ii->opt[1] == OPT_REF); + GEN (" if ("); + if (f->bb[b].mdep) print_deps (fo, f, b, f->bb[b].mdep, 0); + else GEN ("bb_stb[%i]", b); + GEN (") t%x_%x <= #Tp t%lx_%lx;\n", b, i, + REF_BB (ii->op[1]), REF_I (ii->op[1])); + } +} + +/* Outputs binary number */ +static char *bin_str (unsigned long x, int len) +{ + static char bx[33]; + char *s = bx; + while (len > 0) *s++ = '0' + ((x >> --len) & 1); + *s = '\0'; + return bx; +} + +/* Returns index of branch instruction inside a block b */ +static int branch_index (cuc_bb *bb) +{ + int i; + for (i = bb->ninsn - 1; i >= 0; i--) + if (bb->insn[i].type & IT_BRANCH) return i; + return -1; +} + +static void print_turn_off_dep (FILE *fo, cuc_func *f, dep_list *dep) +{ + while (dep) { + assert (f->INSN(dep->ref).type & IT_MEMORY || f->INSN(dep->ref).index == II_CALL); + GEN (" %c_stb[%i] <= #Tp 1'b0;\n", f->INSN(dep->ref).index == II_CALL ? 'f' + : II_IS_LOAD (f->INSN(dep->ref).index) ? 'l' : 's', find_lsc_index (f, dep->ref)); + dep = dep->next; + } +} + +static int func_index (cuc_func *f, int ref) +{ + int i; + unsigned long addr; + assert (f->INSN(ref).index == II_CALL && f->INSN(ref).opt[0] & OPT_CONST); + addr = f->INSN(ref).op[0]; + for (i = 0; i < f->nfdeps; i++) + if (f->fdeps[i]->start_addr == addr) return i; + + assert (0); + return -1; +} + +/* Generates verilog file out of insn dataflow */ +void output_verilog (cuc_func *f, char *filename, char *funcname) +{ + FILE *fo; + int b, i, j; + int ci = 0, co = 0; + int nloads = 0, nstores = 0, ncalls = 0; + char tmp[256]; + sprintf (tmp, "%s.v", filename); + + log ("Generating verilog file \"%s\"\n", tmp); + PRINTF ("Generating verilog file \"%s\"\n", tmp); + if ((fo = fopen (tmp, "wt+")) == NULL) { + fprintf (stderr, "Cannot open '%s'\n", tmp); + exit (1); + } + + /* output header */ + GEN ("/* %s -- generated by Custom Unit Compiler\n", tmp); + GEN (" (C) 2002 Opencores\n"); + GEN (" function \"%s\"\n", funcname); + GEN (" at %08lx - %08lx\n", f->start_addr, f->end_addr); + GEN (" num BBs %i */\n\n", f->num_bb); + + GEN ("`include \"timescale.v\"\n\n"); + GEN ("module %s (clk, rst,\n", filename); + GEN (" l_adr_o, l_dat_i, l_req_o,\n"); + GEN (" l_sel_o, l_linbrst_o, l_rdy_i,\n"); + GEN (" s_adr_o, s_dat_o, s_req_o,\n"); + GEN (" s_sel_o, s_linbrst_o, s_rdy_i,\n"); + + GEN ("/* inputs */ "); + for (i = 0; i < MAX_REGS; i++) + if (f->used_regs[i]) { + GEN ("r%i_i, ", i); + ci++; + } + if (!ci) GEN ("/* NONE */"); + + GEN ("\n/* outputs */ "); + for (i = 0; i < MAX_REGS; i++) + if (f->lur[i] >= 0 && !f->saved_regs[i]) { + GEN ("r%i_o, ", i); + co++; + } + + if (!co) GEN ("/* NONE */"); + if (f->nfdeps) { + GEN ("\n/* f. calls */, fstart_o, %sfend_i, fr11_i, ", + log2_int (f->nfdeps) > 0 ? "fid_o, " : ""); + for (i = 0; i < 6; i++) GEN ("fr%i_o, ", i + 3); + } + GEN ("\n start_i, end_o, busy_o);\n\n"); + + GEN ("parameter Tp = 1;\n\n"); + + GEN ("input clk, rst;\n"); + GEN ("input start_i;\t/* Module starts when set to 1 */ \n"); + GEN ("output end_o;\t/* Set when module finishes, cleared upon start_i == 1 */\n"); + GEN ("output busy_o;\t/* Set when module should not be interrupted */\n"); + GEN ("\n/* Bus signals */\n"); + GEN ("output l_req_o, s_req_o;\n"); + GEN ("input l_rdy_i, s_rdy_i;\n"); + GEN ("output [3:0] l_sel_o, s_sel_o;\n"); + GEN ("output [31:0] l_adr_o, s_adr_o;\n"); + GEN ("output l_linbrst_o, s_linbrst_o;\n"); + GEN ("input [31:0] l_dat_i;\n"); + GEN ("output [31:0] s_dat_o;\n\n"); + + GEN ("reg l_req_o, s_req_o;\n"); + GEN ("reg [31:0] l_adr_o, s_adr_o;\n"); + GEN ("reg [3:0] l_sel_o, s_sel_o;\n"); + GEN ("reg [31:0] s_dat_o;\n"); + GEN ("reg l_linbrst_o, s_linbrst_o;\n"); + + if (ci || co) GEN ("\n/* module ports */\n"); + if (ci) { + int first = 1; + GEN ("input [31:0]"); + for (i = 0; i < MAX_REGS; i++) + if (f->used_regs[i]) { + GEN ("%sr%i_i", first ? " " : ", ", i); + first = 0; + } + GEN (";\n"); + } + + if (co) { + int first = 1; + GEN ("output [31:0]"); + for (i = 0; i < MAX_REGS; i++) + if (f->lur[i] >= 0 && !f->saved_regs[i]) { + GEN ("%sr%i_o", first ? " " : ", ", i); + first = 0; + } + GEN (";\n"); + } + + if (f->nfdeps) { + GEN ("\n/* Function calls */\n"); + GEN ("output [31:0] fr3_o"); + for (i = 1; i < 6; i++) GEN (", fr%i_o", i + 3); + GEN (";\n"); + GEN ("input [31:0] fr11_i;\n"); + if (log2_int(f->nfdeps) > 0) GEN ("output [%i:0] fid_o;\n", log2_int(f->nfdeps)); + GEN ("output fstart_o;\n"); + GEN ("input fend_i;\n"); + } + + /* Count loads & stores */ + for (i = 0; i < f->nmsched; i++) + if (f->mtype[i] & MT_STORE) nstores++; + else if (f->mtype[i] & MT_LOAD) nloads++; + else if (f->mtype[i] & MT_CALL) ncalls++; + + /* Output internal registers for loads */ + if (nloads) { + int first = 1; + int num = 0; + GEN ("\n/* internal registers for loads */\n"); + for (i = 0; i < f->nmsched; i++) + if (f->mtype[i] & MT_LOAD) { + GEN ("%st%x_%x", first ? "reg [31:0] " : ", ", + REF_BB(f->msched[i]), REF_I(f->msched[i])); + + if (num >= 8) { + GEN (";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) GEN (";\n"); + } + + /* Internal register for function return value */ + if (f->nfdeps) { + GEN ("\n/* Internal register for function return value */\n"); + GEN ("reg [31:0] fr11_r;\n"); + } + + GEN ("\n/* 'zero or one' hot state machines */\n"); + if (nloads) GEN ("reg [%i:0] l_stb; /* loads */\n", nloads - 1); + if (nstores) GEN ("reg [%i:0] s_stb; /* stores */\n", nstores - 1); + GEN ("reg [%i:0] bb_stb; /* basic blocks */\n", f->num_bb - 1); + + { + int first = 2; + int num = 0; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].type & IT_COND + && f->bb[b].insn[i].index != II_REG + && f->bb[b].insn[i].index != II_LRBB) { + if (first == 2) GEN ("\n/* basic block condition wires */\n"); + GEN ("%st%x_%x", first ? "wire " : ", ", b, i); + if (num >= 8) { + GEN (";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) GEN (";\n"); + + GEN ("\n/* forward declaration of normal wires */\n"); + num = 0; + first = 1; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (!(f->bb[b].insn[i].type & (IT_COND | IT_BRANCH)) + && f->bb[b].insn[i].index != II_REG + && f->bb[b].insn[i].index != II_LRBB) { + /* Exclude loads */ + if (f->bb[b].insn[i].type & IT_MEMORY && II_IS_LOAD (f->bb[b].insn[i].index)) continue; + GEN ("%st%x_%x", first ? "wire [31:0] " : ", ", b, i); + if (num >= 8) { + GEN (";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) GEN (";\n"); + + GEN ("\n/* forward declaration registers */\n"); + num = 0; + first = 1; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].index == II_REG + && f->bb[b].insn[i].index != II_LRBB) { + GEN ("%st%x_%x", first ? "reg [31:0] " : ", ", b, i); + if (num >= 8) { + GEN (";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) GEN (";\n"); + + num = 0; + first = 1; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + if (f->bb[b].insn[i].index != II_REG + && f->bb[b].insn[i].index == II_LRBB) { + GEN ("%st%x_%x", first ? "reg " : ", ", b, i); + if (num >= 8) { + GEN (";\n"); + first = 1; + num = 0; + } else { + first = 0; + num++; + } + } + if (!first) GEN (";\n"); + } + + if (nloads || nstores) GEN ("\n/* dependencies */\n"); + if (nloads) GEN ("wire [%i:0] l_end = l_stb & {%i{l_rdy_i}};\n", + nloads - 1, nloads); + if (nstores) GEN ("wire [%i:0] s_end = s_stb & {%i{s_rdy_i}};\n", + nstores - 1, nstores); + if (ncalls) GEN ("wire [%i:0] f_end = f_stb & {%i{fend_i}};\n", + ncalls - 1, ncalls); + + GEN ("\n/* last dependency */\n"); + GEN ("wire end_o = "); + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < 2; i++) if (f->bb[b].next[i] == BBID_END) { + GEN ("bb_stb[%i]", b); + if (f->bb[b].mdep) { + GEN (" && "); + print_deps (fo, f, b, f->bb[b].mdep, 0); + } + /* Is branch to BBID_END conditional? */ + if (f->bb[b].next[1 - i] >= 0) { + int bidx = branch_index (&f->bb[b]); + char t[30]; + print_op_v (f, t, REF (b, bidx), 1); + GEN (" && %s%s", i ? "" : "!", t); + } + } + } + GEN (";\n"); + GEN ("wire busy_o = |bb_stb;\n"); + + + GEN ("\n/* Basic block triggers */\n"); + GEN ("wire [%2i:0] bb_start = {\n", f->num_bb - 1); + for (b = f->num_bb - 1; b >= 0; b--) { + GEN (" /* bb_start[%2i] */ ", b); + for (i = 0; i < 2; i++) if (f->bb[b].prev[i] >= 0 && f->bb[b].prev[i] != BBID_START) { + cuc_bb *prev = &f->bb[f->bb[b].prev[i]]; + int t; + if (i) GEN ("\n || "); + if (prev->mdep) { + print_deps (fo, f, f->bb[b].prev[i], prev->mdep, 0); + GEN (" && "); + } + GEN ("bb_stb[%i]", f->bb[b].prev[i]); + if (prev->next[0] >= 0 && prev->next[0] != BBID_END + && prev->next[1] >= 0 && prev->next[1] != BBID_END) { + int bi = REF (f->bb[b].prev[i], branch_index (&f->bb[f->bb[b].prev[i]])); + int ci; + assert (bi >= 0); + ci = f->INSN(bi).op[1]; + t = prev->next[0] == b; + GEN (" && "); + if (f->INSN(bi).opt[1] & OPT_REF) { + GEN ("%st%x_%x", t ? "" : "!", REF_BB(ci), REF_I(ci)); + } else { + cucdebug (5, "%x!%x!%x\n", bi, ci, f->INSN(bi).opt[1]); + assert (f->INSN(bi).opt[1] & OPT_CONST); + GEN ("%s%i", t ? "" : "!", ci); + } + } + } else break; + if (!i) GEN ("start_i"); + if (b == 0) GEN ("};\n"); + else GEN (",\n"); + } + + GEN ("\n/* Register the bb_start */\n"); + GEN ("reg [%2i:0] bb_start_r;\n\n", f->num_bb - 1); + GEN ("always @(posedge rst or posedge clk)\n"); + GEN ("begin\n"); + GEN (" if (rst) bb_start_r <= #Tp %i'b0;\n", f->num_bb); + GEN (" else if (end_o) bb_start_r <= #Tp %i'b0;\n", f->num_bb); + GEN (" else bb_start_r <= #Tp bb_start;\n"); + GEN ("end\n"); + + GEN ("\n/* Logic */\n"); + /* output body */ + for (b = 0; b < f->num_bb; b++) { + GEN ("\t\t/* BB%i */\n", b); + for (i = 0; i < f->bb[b].ninsn; i++) + print_insn_v (fo, f, b, i); + GEN ("\n"); + } + + if (co) { + GEN ("\n/* Outputs */\n"); + for (i = 0; i < MAX_REGS; i++) + if (f->lur[i] >= 0 && !f->saved_regs[i]) + GEN ("assign r%i_o = t%x_%x;\n", i, REF_BB(f->lur[i]), + REF_I(f->lur[i])); + } + + if (nstores) { + int cur_store; + GEN ("\n/* Memory stores */\n"); + GEN ("always @(s_stb"); + for (i = 0; i < f->nmsched; i++) + if (f->mtype[i] & MT_STORE) { + char t[30]; + unsigned long opt = f->INSN(f->msched[i]).opt[0]; + if ((opt & ~OPT_DEST) != OPT_CONST) { + GEN (" or %s", print_op_v (f, t, f->msched[i], 0)); + } + } + + cur_store = 0; + GEN (")\nbegin\n"); + for (i = 0; i < f->nmsched; i++) if (f->mtype[i] & MT_STORE) { + char t[30]; + GEN (" %sif (s_stb[%i]) s_dat_o = %s;\n", cur_store == 0 ? "" : "else ", cur_store, + print_op_v (f, t, f->msched[i], 0)); + cur_store++; + //PRINTF ("msched[%i] = %x (mtype %x) %x\n", i, f->msched[i], f->mtype[i], f->INSN(f->msched[i]).op[0]); + } + GEN (" else s_dat_o = 32'hx;\n"); + GEN ("end\n"); + } + + /* Generate load and store state machine */ +#if 0 + GEN ("\n/* Load&store state machine */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN (" if (rst) begin\n"); + if (nloads) GEN (" l_stb <= #Tp %i'h0;\n", nloads); + if (nstores) GEN (" s_stb <= #Tp %i'h0;\n", nstores); + GEN (" end else begin\n"); + for (i = 0; i < f->nmsched; i++) if (f->mtype[i] & MT_LOAD || f->mtype[i] & MT_STORE) { + int cur = 0; + dep_list *dep = f->INSN(f->msched[i]).dep; + assert (f->INSN(f->msched[i]).opt[1] & (OPT_REF | OPT_REGISTER)); + GEN (" if ("); + print_deps (fo, f, REF_BB(f->msched[i]), f->INSN(f->msched[i]).dep, 1); + GEN (") begin\n"); + print_turn_off_dep (fo, f, dep); + GEN (" %c_stb[%i] <= #Tp 1'b1;\n", f->mtype[i] & MT_LOAD ? 'l' : 's', cur++); + GEN (" end\n"); + } + GEN (" if (%c_end[%i]) %c_stb <= #Tp %i'h0;\n", c, cur - 1, c, cur); + GEN (" end\n"); +#endif + + /* Generate state generator machine */ + for (j = 0; j < 2; j++) { + char c; + char *s; + + switch (j) { + case 0: c = 'l'; s = "Load"; break; + case 1: c = 's'; s = "Store"; break; + case 2: c = 'c'; s = "Calls"; break; + } + if (j == 0 && nloads + || j == 1 && nstores + || j == 2 && ncalls) { + int cur = 0; + char t[30]; + + GEN ("\n/* %s state generator machine */\n", s); + GEN ("always @("); + for (i = 0; i < f->nmsched; i++) { + print_op_v (f, t, f->msched[i], 1); + GEN ("%s or ", t); + } + GEN ("bb_start_r"); + if (nloads) GEN (" or l_end"); + if (nstores) GEN (" or s_end"); + GEN (")\n"); + GEN ("begin\n "); + cucdebug (1, "%s\n", s); + for (i = 0; i < f->nmsched; i++) + if (j == 0 && f->mtype[i] & MT_LOAD + || j == 1 && f->mtype[i] & MT_STORE + || j == 2 && f->mtype[i] & MT_CALL) { + cucdebug (1, "msched[%i] = %x (mtype %x)\n", i, f->msched[i], f->mtype[i]); + assert (f->INSN(f->msched[i]).opt[1] & (OPT_REF | OPT_REGISTER)); + GEN ("if ("); + print_deps (fo, f, REF_BB(f->msched[i]), f->INSN(f->msched[i]).dep, 1); + GEN (") begin\n"); + GEN (" %c_req_o = 1'b1;\n", c); + GEN (" %c_sel_o[3:0] = 4'b", c); + switch (f->mtype[i] & MT_WIDTH) { + case 1: GEN ("0001 << (%s & 32'h3);\n", + print_op_v (f, t, f->msched[i], 1)); break; + case 2: GEN ("0011 << ((%s & 32'h1) << 1);\n", + print_op_v (f, t, f->msched[i], 1)); break; + case 4: GEN ("1111;\n"); break; + default: assert (0); + } + GEN (" %c_linbrst_o = 1'b%i;\n", c, + (f->mtype[i] & MT_BURST) && !(f->mtype[i] & MT_BURSTE) ? 1 : 0); + GEN (" %c_adr_o = t%lx_%lx & ~32'h3;\n", c, + REF_BB(f->INSN(f->msched[i]).op[1]), REF_I(f->INSN(f->msched[i]).op[1])); + GEN (" end else "); + } + GEN ("if (%c_end[%i]) begin\n", c, cur - 1); + GEN (" %c_req_o = 1'b0;\n", c); + GEN (" %c_sel_o[3:0] = 4'bx;\n", c); + GEN (" %c_linbrst_o = 1'b0;\n", c); + GEN (" %c_adr_o = 32'hx;\n", c); + GEN (" end else begin\n"); + GEN (" %c_req_o = 1'b0;\n", c); + GEN (" %c_sel_o[3:0] = 4'bx;\n", c); + GEN (" %c_linbrst_o = 1'b0;\n", c); + GEN (" %c_adr_o = 32'hx;\n", c); + GEN (" end\n"); + GEN ("end\n"); + } + } + + if (ncalls) { + int cur_call = 0; + GEN ("\n/* Function calls state machine */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN ("begin\n"); + GEN (" if (rst) begin\n"); + GEN (" f_stb <= #Tp %i'h0;\n", nstores); + for (i = 0; i < 6; i++) GEN (" fr%i_o <= #Tp 32'h0;\n", i + 3); + if (log2_int(ncalls)) GEN (" fid_o <= #Tp %i'h0;\n", log2_int (f->nfdeps)); + GEN (" fstart_o <= #Tp 1'b0;\n"); + //GEN (" f11_r <= #Tp 32'h0;\n"); + GEN (" end else begin\n"); + cucdebug (1, "calls \n"); + for (i = 0; i < f->nmsched; i++) if (f->mtype[i] & MT_CALL) { + dep_list *dep = f->INSN(f->msched[i]).dep; + cucdebug (1, "msched[%i] = %x (mtype %x)\n", i, f->msched[i], f->mtype[i]); + assert (f->INSN(f->msched[i]).opt[1] & (OPT_REF | OPT_REGISTER)); + GEN (" if ("); + print_deps (fo, f, REF_BB(f->msched[i]), f->INSN(f->msched[i]).dep, 1); + GEN (") begin\n"); + print_turn_off_dep (fo, f, dep); + GEN (" f_stb[%i] <= #Tp 1'b1;\n", cur_call++); + GEN (" fstart_o <= #Tp 1'b1;\n"); + if (log2_int (f->nfdeps)) + GEN (" fid_o <= #Tp %i'h%x;\n", log2_int (f->nfdeps), func_index (f, f->msched[i])); + + for (j = 0; j < 6; j++) + GEN (" fr%i_o <= #Tp t%x_%x;\n", j + 3, + REF_BB (f->msched[i]), REF_I (f->msched[i]) - 6 + i); + GEN (" end\n"); + } + GEN (" if (f_end[%i]) begin\n", ncalls - 1); + GEN (" f_stb <= #Tp %i'h0;\n", ncalls); + GEN (" f_start_o <= #Tp 1'b0;\n"); + GEN (" end\n"); + GEN (" end\n"); + GEN ("end\n"); + } + + GEN ("\n/* Basic blocks state machine */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN ("begin\n"); + GEN (" if (rst) bb_stb <= #Tp %i'h%x;\n", f->num_bb, 0); + GEN (" else if (end_o) begin\n"); + GEN (" bb_stb <= #Tp %i'h%x;\n", f->num_bb, 0); + for (i = 0; i < f->num_bb; i++) { + GEN (" end else if (bb_start[%i]) begin\n", i); + GEN (" bb_stb <= #Tp %i'h%x;\n", f->num_bb, 1 << i); + } + GEN (" end else if (end_o) begin\n"); + GEN (" bb_stb <= #Tp %i'h%x;\n", f->num_bb, 0); + GEN (" end\n"); + GEN ("end\n"); + + /* output footer */ + GEN ("\nendmodule\n"); + + fclose (fo); +} + +void generate_main (int nfuncs, cuc_func **f, char *filename) +{ + FILE *fo; + int i, j, nrf, first; + char tmp[256]; + int ncallees[MAX_FUNCS]; + int nl[MAX_FUNCS], ns[MAX_FUNCS]; + int maxncallees = 0; + sprintf (tmp, "%s_top.v", filename); + + for (i = 0, nrf = 0; i < nfuncs; i++) { + nl[i] = ns[i] = 0; + ncallees[i] = 0; + if (f[i]) { + f[i]->tmp = nrf++; + for (j = 0; j < f[i]->nmsched; j++) + if (f[i]->mtype[j] & MT_LOAD) nl[i]++; + else if (f[i]->mtype[j] & MT_STORE) ns[i]++; + for (j = 0; j < f[i]->nfdeps; j++) + ncallees[f[i]->fdeps[j]->tmp]++; + } + } + if (!nrf) return; + + for (i = 0; i < nrf; i++) + if (maxncallees < ncallees[i]) maxncallees = ncallees[i]; + + log ("Generating verilog file \"%s\"\n", tmp); + PRINTF ("Generating verilog file \"%s\"\n", tmp); + if ((fo = fopen (tmp, "wt+")) == NULL) { + fprintf (stderr, "Cannot open '%s'\n", tmp); + exit (1); + } + + /* output header */ + GEN ("/* %s -- generated by Custom Unit Compiler\n", tmp); + GEN (" (C) 2002 Opencores */\n\n"); + GEN ("/* Includes %i functions:", nrf); + for (i = 0; i < nfuncs; i++) if (f[i]) + GEN ("\n%s", prof_func[i].name); + GEN (" */\n\n"); + + GEN ("`include \"timescale.v\"\n\n"); + GEN ("module %s (clk, rst,\n", filename); + GEN (" /* Load and store master Wishbone ports */\n"); + GEN (" l_adr_o, l_dat_i, l_cyc_o, l_stb_o,\n"); + GEN (" l_sel_o, l_linbrst_o, l_rdy_i, l_we_o,\n"); + GEN (" s_adr_o, s_dat_o, s_cyc_o, s_stb_o,\n"); + GEN (" s_sel_o, s_linbrst_o, s_rdy_i, s_we_o,\n\n"); + GEN (" /* cuc interface */\n"); + GEN (" cuc_stb_i, cuc_adr_i, cuc_dat_i, cuc_dat_o, cuc_we_i, cuc_rdy_o);\n\n"); + + GEN ("parameter Tp = 1;\n"); + GEN ("\n/* module ports */\n"); + GEN ("input clk, rst, cuc_stb_i, cuc_we_i;\n"); + GEN ("input l_rdy_i, s_rdy_i;\n"); + GEN ("output l_cyc_o, l_stb_o, l_we_o, l_linbrst_o;\n"); + GEN ("reg l_cyc_o, l_stb_o, l_we_o, l_linbrst_o;\n"); + GEN ("output s_cyc_o, s_stb_o, s_we_o, s_linbrst_o;\n"); + GEN ("reg s_cyc_o, s_stb_o, s_we_o, s_linbrst_o;\n"); + GEN ("output cuc_rdy_o; /* Not registered ! */\n"); + GEN ("output [3:0] l_sel_o, s_sel_o;\n"); + GEN ("reg [3:0] l_sel_o, s_sel_o;\n"); + GEN ("output [31:0] l_adr_o, s_adr_o, s_dat_o, cuc_dat_o;\n"); + GEN ("reg [31:0] l_adr_o, s_adr_o, s_dat_o, cuc_dat_o;\n"); + GEN ("input [15:0] cuc_adr_i;\n"); + GEN ("input [31:0] l_dat_i, cuc_dat_i;\n\n"); + + GEN ("wire [%2i:0] i_we, i_re, i_finish, i_selected, i_first_reg;\n", nrf - 1); + GEN ("wire [%2i:0] i_bidok, i_start_bid, i_start_bidok, main_start, main_end;\n", nrf - 1); + GEN ("wire [%2i:0] i_start, i_end, i_start_block, i_busy;\n", nrf - 1); + GEN ("wire [%2i:0] i_l_req, i_s_req;\n", nrf - 1); + GEN ("reg [%2i:0] i_go_bsy, main_start_r;\n", nrf - 1); + + GEN ("assign i_selected = {\n"); + for (i = 0; i < nrf; i++) + GEN (" cuc_adr_i[15:6] == %i%s\n", i, i < nrf - 1 ? "," : "};"); + + GEN ("assign i_first_reg = {\n"); + for (i = 0; i < nfuncs; i++) if (f[i]) { + for (j = 0; j <= MAX_REGS; j++) if (f[i]->used_regs[j]) break; + GEN (" cuc_adr_i[5:0] == %i%s\n", j, f[i]->tmp < nrf - 1 ? "," : "};"); + } + + GEN ("assign i_we = {%i{cuc_stb_i && cuc_we_i}} & i_selected;\n", nrf); + GEN ("assign i_re = {%i{cuc_stb_i && !cuc_we_i}} & i_selected;\n", nrf); + + GEN ("assign i_start = i_go_bsy & {%i{cuc_rdy_o}};\n", nrf); + GEN ("assign i_start_bidok = {\n"); + for (i = 0; i < nrf; i++) + GEN (" i_start_bid[%i] < %i%s\n", i, i, i < nrf - 1 ? "," : "};"); + GEN ("assign main_start = i_start & i_selected & i_first_reg & i_we;\n"); + GEN ("assign main_end = {%i{i_end}} & i_selected;\n"); + + GEN ("\nalways @(posedge clk or posedge rst)\n"); + GEN ("begin\n"); + GEN (" if (rst) i_go_bsy <= #Tp %i'b0;\n", nrf); + GEN (" else i_go_bsy <= #Tp i_we | ~i_finish & i_go_bsy;\n"); + GEN ("end\n"); + + + /* Function specific data */ + for (i = 0; i < nfuncs; i++) if (f[i]) { + int ci = 0, co = 0; + int fn = f[i]->tmp; + GEN ("\n/* Registers for function %s */\n", prof_func[i].name); + for (j = 0, first = 1; j < MAX_REGS; j++) if (f[i]->used_regs[j]) { + GEN ("%s i%i_r%ii", first ? "/* inputs */\nreg [31:0]" : ",", fn, j); + first = 0; + ci++; + } + if (ci) GEN (";\n"); + + for (j = 0, first = 1; j < MAX_REGS; j++) + if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j]) { + GEN ("%s i%i_r%io", first ? "/* outputs */\nwire [31:0]" : ",", fn, j); + first = 0; + co++; + } + if (co) GEN (";\n"); + GEN ("wire [31:0] i%i_l_adr, i%i_s_adr;\n", fn, fn); + + GEN ("always @(posedge clk or posedge rst)\n"); + GEN (" if (rst) main_start_r <= #Tp %i'b0;\n", nrf); + GEN (" else main_start_r <= #Tp main_start & i_start_bidok | i_busy | ~i_end & main_start_r;\n"); + + if (ci) { + GEN ("\n/* write register access */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN ("begin\n"); + GEN (" if (rst) begin\n"); + for (j = 0; j < MAX_REGS; j++) if (f[i]->used_regs[j]) + GEN (" i%i_r%ii <= #Tp 32'h0;\n", fn, j); + GEN (" end else if (!i_go_bsy[%i] && i_we[%i])\n", fn, fn); + GEN (" case (cuc_adr_i[5:0])\n"); + for (j = 0; j < MAX_REGS; j++) if (f[i]->used_regs[j]) + GEN (" %-2i: i%i_r%ii <= #Tp cuc_dat_i;\n", j, fn, j); + GEN (" endcase\n"); + GEN ("end\n"); + } + + GEN ("\n"); + } + + /* Generate machine for reading all function registers. Register read can be + delayed till function completion */ + { + int co; + GEN ("/* read register access - data */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN (" if (rst) cuc_dat_o <= #Tp 32'h0;\n"); + GEN (" else if (cuc_stb_i && cuc_we_i) begin\n"); + GEN (" "); + + for (i = 0; i < nfuncs; i++) if (f[i]) { + co = 0; + for (j = 0; j < MAX_REGS; j++) + if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j]) co++; + + GEN ("if (cuc_adr_i[15:6] == %i)", f[i]->tmp); + if (co) { + first = 1; + GEN ("\n case (cuc_adr_i[5:0])\n"); + for (j = 0; j < MAX_REGS; j++) + if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j]) + GEN (" %-2i: cuc_dat_o <= #Tp i%i_r%io;\n", j, f[i]->tmp, j); + GEN (" endcase\n"); + } else { + GEN (" cuc_dat_o <= #Tp 32'hx;\n"); + } + GEN (" else "); + } + GEN ("cuc_dat_o <= #Tp 32'hx;\n"); + GEN (" end else cuc_dat_o <= #Tp 32'hx;\n"); + + GEN ("\n/* read register access - acknowledge */\n"); + GEN ("assign cuc_rdy_o = cuc_stb_i && cuc_we_i && |(i_selected & main_end);\n"); + } + + /* Store/load Wishbone bridge */ + for (j = 0; j < 2; j++) { + char t = j ? 's' : 'l'; + GEN ("\n/* %s Wishbone bridge */\n", j ? "store" : "load"); + GEN ("reg [%i:0] %cm_sel;\n", log2_int (nrf), t); + GEN ("reg [%i:0] %cm_bid;\n", log2_int (nrf), t); + GEN ("reg %ccyc_ip;\n\n", t); + GEN ("always @(posedge clk)\n"); + GEN ("begin\n"); + GEN (" %c_we_o <= #Tp 1'b%i;\n", t, j); + GEN (" %c_cyc_o <= #Tp |i_%c_req;\n", t, t); + GEN (" %c_stb_o <= #Tp |i_%c_req;\n", t, t); + GEN ("end\n"); + + GEN ("\n/* highest bid */\n"); + GEN ("always @("); + for (i = 0; i < nrf; i++) GEN ("%si_%c_req", i > 0 ? " or " : "", t); + GEN (")\n"); + for (i = 0; i < nrf; i++) GEN (" %sif (i_%c_req) %cm_bid = %i'h%x;\n", + i ? "else " : "", t, t, log2_int (nrf) + 1, i); + + GEN ("\n/* selected transfer */\n"); + GEN ("always @(posedge clk or posedge rst)\n"); + GEN (" if (rst) %cm_sel <= #Tp %i'h0;\n", t, log2_int (nrf) + 1); + GEN (" else if (%c_rdy_i) %cm_sel <= #Tp %i'h0;\n", t, t, log2_int (nrf) + 1); + GEN (" else if (!%ccyc_ip) %cm_sel <= #Tp %cm_bid;\n", t, t, t); + + GEN ("\n/* Cycle */\n"); + GEN ("\nalways @(posedge clk or posedge rst)\n"); + GEN (" if (rst) %ccyc_ip <= #Tp 1'b0;\n", t); + GEN (" else if (%c_rdy_i) %ccyc_ip <= #Tp 1'b0;\n", t, t); + GEN (" else %ccyc_ip <= #Tp %c_cyc_o;\n", t, t); + } + + GEN ("\n/* Acknowledge */\n"); + for (i = 0; i < nrf; i++) { + GEN ("wire i%i_s_rdy = ((sm_bid == %i & !scyc_ip) | sm_sel == %i) & s_rdy_i;\n", i, i, i); + GEN ("wire i%i_l_rdy = ((lm_bid == %i & !lcyc_ip) | lm_sel == %i) & l_rdy_i;\n", i, i, i); + } + + GEN ("\n/* data, address selects and burst enables */\n"); + for (i = 0; i < nrf; i++) GEN ("wire [31:0] i%i_s_dat;\n", i); + for (i = 0; i < nrf; i++) GEN ("wire i%i_s_linbrst, i%i_l_linbrst;\n", i, i); + for (i = 0; i < nrf; i++) GEN ("wire [3:0] i%i_s_sel, i%i_l_sel;\n", i, i); + for (i = 0; i < nrf; i++) GEN ("wire [31:0] i%i_l_dat = l_dat_i;\n", i); + GEN ("\nalways @(posedge clk)\n"); + GEN ("begin\n"); + GEN (" s_dat_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n sm_bid == %i ? i%i_s_dat : ", i, i); + GEN ("i%i_s_dat;\n", nrf - 1); + GEN (" s_adr_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n sm_bid == %i ? i%i_s_adr : ", i, i); + GEN ("i%i_s_adr;\n", nrf - 1); + GEN (" s_sel_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n sm_bid == %i ? i%i_s_sel : ", i, i); + GEN ("i%i_s_sel;\n", nrf - 1); + GEN (" s_linbrst_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n sm_bid == %i ? i%i_s_linbrst : ", i, i); + GEN ("i%i_s_linbrst;\n", nrf - 1); + GEN ("end\n\n"); + + GEN ("always @(posedge clk)\n"); + GEN ("begin\n"); + GEN (" l_adr_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n lm_bid == %i ? i%i_l_adr : ", i, i); + GEN ("i%i_l_adr;\n", nrf - 1); + GEN (" l_sel_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n lm_bid == %i ? i%i_l_sel : ", i, i); + GEN ("i%i_l_sel;\n", nrf - 1); + GEN (" l_linbrst_o <= #Tp "); + for (i = 0; i < nrf - 1; i++) + GEN ("\n lm_bid == %i ? i%i_l_linbrst : ", i, i); + GEN ("i%i_l_linbrst;\n", nrf - 1); + GEN ("end\n\n"); + + /* start/end signals */ + GEN ("\n\n/* start/end signals */\n"); + for (i = 0; i < nrf; i++) { + if (log2_int (maxncallees + 1)) + GEN ("wire [%i:0] i%i_current = i%i_busy ? i%i_current_r : i%i_start_bid;\n", + log2_int (maxncallees + 1), i, i, i, i, i); + else GEN ("wire i%i_current = 0;\n", i); + } + GEN ("\n"); + + for (i = 0, j = 0; i < nfuncs; i++) if (f[i]) { + if (log2_int (ncallees[i])) { + GEN ("reg [%i:0] i%i_start_bid;\n", log2_int (ncallees[i]), j); + GEN ("always @(start%i", f[i]->tmp); + for (j = 0, first = 1; j < f[i]->nfdeps; j++) + if (f[i]->fdeps[j]) GEN (", "); + GEN (")\n"); + GEN ("begin !!!\n"); //TODO + GEN (" \n"); + GEN ("end\n"); + } + GEN ("wire i%i_start = main_start[%i];\n", j, j); + j++; + } + GEN ("\n"); + + for (i = 0; i < nfuncs; i++) if (f[i]) { + int nf = f[i]->tmp; + GEN ("\n%s%s i%i(.clk(clk), .rst(rst),\n", filename, prof_func[i].name, nf); + GEN (" .l_adr_o(i%i_l_adr), .l_dat_i(i%i_l_dat), .l_req_o(i_l_req[%i]),\n", + nf, nf, nf); + GEN (" .l_sel_o(i%i_l_sel), .l_linbrst_o(i%i_l_linbrst), .l_rdy_i(i%i_l_rdy),\n", + nf, nf, nf); + GEN (" .s_adr_o(i%i_s_adr), .s_dat_o(i%i_s_dat), .s_req_o(i_s_req[%i]),\n", + nf, nf, nf); + GEN (" .s_sel_o(i%i_s_sel), .s_linbrst_o(i%i_s_linbrst), .s_rdy_i(i%i_s_rdy),\n", + nf, nf, nf); + GEN (" "); + for (j = 0, first = 1; j < MAX_REGS; j++) if (f[i]->used_regs[j]) + GEN (".r%i_i(i%i_r%ii), ", j, nf, j), first = 0; + + if (first) GEN ("\n "); + for (j = 0, first = 1; j < MAX_REGS; j++) + if (f[i]->lur[j] >= 0 && !f[i]->saved_regs[j]) + GEN (".r%i_o(i%i_r%io), ", j, nf, j), first = 0; + if (first) GEN ("\n "); + if (f[i]->nfdeps) { + GEN (".fstart_o(i_fstart[%i]), .fend_i(i_fend[%i]), .fid_o(i%i_fid),\n", i, i, i), + GEN (" .fr3_o(i%i_fr3), .fr4_o(i%i_fr4), .fr5_o(i%i_fr5), .fr6_o(i%i_fr6),\n"); + GEN (" .fr7_o(i%i_fr7), .fr8_o(i%i_fr8), .fr11_i(i%i_fr11i),\n "); + } + GEN (".start_i(i_start[%i]), .end_o(i_end[%i]), .busy_o(i_busy[%i]));\n", nf, nf, nf); + } + + /* output footer */ + GEN ("\nendmodule\n"); + + fclose (fo); +} +
verilog.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: memory.c =================================================================== --- memory.c (nonexistent) +++ memory.c (revision 1765) @@ -0,0 +1,542 @@ +/* memory.c -- OpenRISC Custom Unit Compiler, memory optimization and scheduling + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "abstract.h" +#include "sim-config.h" +#include "cuc.h" +#include "insn.h" + + +/* Cleans memory & data dependencies */ +void clean_deps (cuc_func *f) +{ + int b, i; + dep_list *t; + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < f->bb[b].ninsn; i++) { + t = f->bb[b].insn[i].dep; + while (t) { + dep_list *tmp = t; + t = t->next; + free (tmp); + } + f->bb[b].insn[i].dep = NULL; + } + + t = f->bb[b].mdep; + while (t) { + dep_list *tmp = t; + t = t->next; + free (tmp); + } + f->bb[b].mdep = NULL; + } + + f->nmsched = 0; +} + +/* Checks for memory conflicts between two instructions; returns 1 if detected + 0 - exact; 1 - strong; 2 - weak; 3 - none */ +static int check_memory_conflict (cuc_func *f, cuc_insn *a, cuc_insn *b, int otype) +{ + switch (otype) { + case MO_EXACT: /* exact */ + case MO_STRONG: /* strong */ + return 1; + case MO_WEAK: /* weak */ + assert (a->type & IT_MEMORY); + assert (b->type & IT_MEMORY); + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + int aw, bw; + assert ((aw = II_MEM_WIDTH (a->index)) >= 0); + assert ((bw = II_MEM_WIDTH (b->index)) >= 0); + + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) return 1; + + /* Check if they overlap */ + if (a->op[2] >= b->op[2] && a->op[2] < b->op[2] + bw) return 1; + if (b->op[2] >= a->op[2] && b->op[2] < a->op[2] + aw) return 1; + return 0; + } else return 1; + case MO_NONE: /* none */ + return 0; + default: + assert (0); + } + return 1; +} + +/* Adds memory dependencies based on ordering type: + 0 - exact; 1 - strong; 2 - weak; 3 - none */ +void add_memory_dep (cuc_func *f, int otype) +{ + int b, i; + dep_list *all_mem = NULL; + + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (insn[i].type & IT_MEMORY) { + dep_list *tmp = all_mem; + while (tmp) { + //PRINTF ("%x %x\n", REF (b,i), tmp->ref); + if (check_memory_conflict (f, &insn[i], &f->INSN(tmp->ref), otype)) + add_dep (&insn[i].dep, tmp->ref); + tmp = tmp->next; + } + add_dep (&all_mem, REF (b, i)); + } + } + dispose_list (&all_mem); +} + +/* Check if they address the same location, so we can join them */ +static int same_transfers (cuc_func *f, int otype) +{ + int i, j; + int modified = 0; + if (otype == MO_WEAK || otype == MO_NONE) { + for (i = 1, j = 1; i < f->nmsched; i++) + /* Exclude memory stores and different memory types */ + if (f->mtype[i - 1] == f->mtype[i] && f->mtype[i] & MT_LOAD) { + cuc_insn *a = &f->INSN(f->msched[i - 1]); + cuc_insn *b = &f->INSN(f->msched[i]); + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + /* Not in usual form? */ + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) goto keep; + + //PRINTF ("%i %i, ", a->op[2], b->op[2]); + + /* Check if they are the same => do not copy */ + if (a->op[2] == b->op[2] + && REF_BB(f->msched[i - 1]) == REF_BB(f->msched[i])) { + /* yes => remove actual instruction */ + int t1 = MIN (f->msched[i - 1], f->msched[i]); + int t2 = MAX (f->msched[i - 1], f->msched[i]); + int b, i, j; + cucdebug (2, "Removing %x_%x and using %x_%x instead.\n", + REF_BB(t2), REF_I(t2), REF_BB(t1), REF_I(t1)); + change_insn_type (&f->INSN(t2), II_NOP); + modified = 1; + /* Update references */ + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == t2) + f->bb[b].insn[i].op[j] = t1; + + } else goto keep; + } else goto keep; + } else { +keep: + f->msched[j] = f->msched[i]; + f->mtype[j++] = f->mtype[i]; + } + f->nmsched = j; + } + return modified; +} + +/* Check if two consecutive lb[zs] can be joined into lhz and if + two consecutive lh[zs] can be joined into lwz */ +static int join_transfers (cuc_func *f, int otype) +{ + int i, j; + int modified = 0; + + /* We can change width even with strong memory ordering */ + if (otype == MO_WEAK || otype == MO_NONE || otype == MO_STRONG) { + for (i = 1, j = 1; i < f->nmsched; i++) + /* Exclude memory stores and different memory types */ + if (f->mtype[i - 1] == f->mtype[i] && f->mtype[i] & MT_LOAD) { + cuc_insn *a = &f->INSN(f->msched[i - 1]); + cuc_insn *b = &f->INSN(f->msched[i]); + int aw = f->mtype[i - 1] & MT_WIDTH; + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + + /* Not in usual form? */ + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) goto keep; + + /* Check if they touch together */ + if (a->op[2] + aw == b->op[2] + && REF_BB(f->msched[i - 1]) == REF_BB(f->msched[i])) { + /* yes => remove second instruction */ + int t1 = MIN (f->msched[i - 1], f->msched[i]); + int t2 = MAX (f->msched[i - 1], f->msched[i]); + dep_list *t1dep = f->INSN(t1).dep; + int x, p; + cuc_insn *ii; + + cucdebug (2, "Joining %x and %x.\n", t1, t2); + if (cuc_debug >= 8) print_cuc_bb (f, "PREJT"); + change_insn_type (&f->INSN(t1), II_NOP); + change_insn_type (&f->INSN(t2), II_NOP); + /* We will reuse the memadd before the first load, and add some + custom code at the end */ + insert_insns (f, t1, 10); + if (cuc_debug > 8) print_cuc_bb (f, "PREJT2"); + + /* Remove all dependencies to second access */ + for (x = 0; x < f->num_bb; x++) { + int i; + for (i = 0; i < f->bb[x].ninsn; i++) { + dep_list *d = f->bb[x].insn[i].dep; + dep_list **old = &f->bb[x].insn[i].dep; + while (d) { + if (d->ref == t2) { + d = d->next; + *old = d; + } else { + d = d->next; + old = &((*old)->next); + } + } + } + } + + /* Build the folowing code: + l[hw]z p-1 + and p-1, 0xff + sfle p-1, 0x7f + or p-2, 0xffffff00 + cmov p-3, p-1, p-2 + shr p-5, 8 + and p-1, 0xff + sfle p-1 0x7f + or p-2 0xffffff00 + cmov p-3, p-1, p-2*/ + p = REF_I(t1); + cucdebug (8, "%x %x\n", f->mtype[i - 1], f->mtype[i]); + for (x = 0; x < 2; x++) { + int t = f->mtype[i - 1 + x]; + ii = &f->bb[REF_BB(t1)].insn[p]; + if (!x) { + change_insn_type (ii, aw == 1 ? II_LH : II_LW); + ii->type = IT_MEMORY | IT_VOLATILE; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = t1 - 1; ii->opt[1] = OPT_REF; + ii->opt[2] = ii->opt[3] = OPT_NONE; + ii->dep = t1dep; + f->mtype[i - 1] = MT_LOAD | (aw == 1 ? 2 : 4); + f->msched[i - 1] = REF (REF_BB(t1), p); + } else { + change_insn_type (ii, II_SRL); + ii->type = 0; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = t1; ii->opt[1] = OPT_REF; + ii->op[2] = 8; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + } + + ii = &f->bb[REF_BB(t1)].insn[++p]; + change_insn_type (ii, II_AND); + ii->type = 0; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (REF_BB(t1), p - 1); ii->opt[1] = OPT_REF; + ii->op[2] = 0xff; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + + ii = &f->bb[REF_BB(t1)].insn[++p]; + change_insn_type (ii, II_SFLE); + ii->type = IT_COND; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (REF_BB(t1), p - 1); ii->opt[1] = OPT_REF; + ii->op[2] = 0x7f; ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + + ii = &f->bb[REF_BB(t1)].insn[++p]; + change_insn_type (ii, II_OR); + ii->type = 0; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (REF_BB(t1), p - 2); ii->opt[1] = OPT_REF; + if (t & MT_SIGNED) ii->op[2] = 0xffffff00; + else ii->op[2] = 0; + ii->opt[2] = OPT_CONST; + ii->opt[3] = OPT_NONE; + + ii = &f->bb[REF_BB(t1)].insn[++p]; + change_insn_type (ii, II_CMOV); + ii->type = 0; + ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST; + ii->op[1] = REF (REF_BB(t1), p - 1); ii->opt[1] = OPT_REF; + ii->op[2] = REF (REF_BB(t1), p - 3); ii->opt[2] = OPT_REF; + ii->op[3] = REF (REF_BB(t1), p - 2); ii->opt[3] = OPT_REF; + p++; + } + + modified = 1; + + { + int b, i, j; + /* Update references */ + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + for (j = 0; j < MAX_OPERANDS; j++) + if (REF_I (f->bb[b].insn[i].op[j]) < REF_I (t1) + || REF_I(f->bb[b].insn[i].op[j]) >= REF_I (t1) + 10) { + if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == t1) + f->bb[b].insn[i].op[j] = t1 + 4; + else if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == t2) + f->bb[b].insn[i].op[j] = t1 + 9; + } + } + if (cuc_debug >= 8) print_cuc_bb (f, "POSTJT"); + } else goto keep; + } else goto keep; + } else { +keep: + f->msched[j] = f->msched[i]; + f->mtype[j++] = f->mtype[i]; + } + f->nmsched = j; + } + return modified; +} + +/* returns nonzero if a < b */ +int mem_ordering_cmp (cuc_func *f, cuc_insn *a, cuc_insn *b) +{ + assert (a->type & IT_MEMORY); + assert (b->type & IT_MEMORY); + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) return 0; + + /* Order linearly, we can then join them to bursts */ + return a->op[2] < b->op[2]; + } else return 0; +} + +/* Schedule memory accesses + 0 - exact; 1 - strong; 2 - weak; 3 - none */ +int schedule_memory (cuc_func *f, int otype) +{ + int b, i, j; + int modified = 0; + f->nmsched = 0; + + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) + if (insn[i].type & IT_MEMORY) { + f->msched[f->nmsched++] = REF (b, i); + if (otype == MO_NONE || otype == MO_WEAK) insn[i].type |= IT_FLAG1; /* mark unscheduled */ + } + } + + for (i = 0; i < f->nmsched; i++) + cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' '); + cucdebug (2, "\n"); + + /* We can reorder just more loose types + We assume, that memory accesses are currently in valid (but not neccesserly) + optimal order */ + if (otype == MO_WEAK || otype == MO_NONE) { + for (i = 0; i < f->nmsched; i++) { + int best = i; + int tmp; + for (j = i + 1; j < f->nmsched; j++) if (REF_BB(f->msched[j]) == REF_BB(f->msched[best])) { + if (mem_ordering_cmp (f, &f->INSN (f->msched[j]), &f->INSN(f->msched[best]))) { + /* Check dependencies */ + dep_list *t = f->INSN(f->msched[j]).dep; + while (t) { + if (f->INSN(t->ref).type & IT_FLAG1) break; + t = t->next; + } + if (!t) best = j; /* no conflicts -> ok */ + } + } + + /* we have to shift instructions up, to maintain valid dependencies + and make space for best candidate */ + + /* make local copy */ + tmp = f->msched[best]; + for (j = best; j > i; j--) f->msched[j] = f->msched[j - 1]; + f->msched[i] = tmp; + f->INSN(f->msched[i]).type &= ~IT_FLAG1; /* mark scheduled */ + } + } + + for (i = 0; i < f->nmsched; i++) + cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' '); + cucdebug (2, "\n"); + + /* Assign memory types */ + for (i = 0; i < f->nmsched; i++) { + cuc_insn *a = &f->INSN(f->msched[i]); + f->mtype[i] = !II_IS_LOAD(a->index) ? MT_STORE : MT_LOAD; + f->mtype[i] |= II_MEM_WIDTH (a->index); + if (a->type & IT_SIGNED) f->mtype[i] |= MT_SIGNED; + } + + if (same_transfers (f, otype)) modified = 1; + if (join_transfers (f, otype)) modified = 1; + + for (i = 0; i < f->nmsched; i++) + cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' '); + cucdebug (2, "\n"); + if (cuc_debug > 5) print_cuc_bb (f, "AFTER_MEM_REMOVAL"); + + if (config.cuc.enable_bursts) { + //PRINTF ("\n"); + for (i = 1; i < f->nmsched; i++) { + cuc_insn *a = &f->INSN(f->msched[i - 1]); + cuc_insn *b = &f->INSN(f->msched[i]); + int aw = f->mtype[i - 1] & MT_WIDTH; + + /* Burst can only be out of words */ + if (aw != 4) continue; + + if ((a->opt[1] & OPT_REF) && f->INSN(a->op[1]).index == II_ADD + &&(b->opt[1] & OPT_REF) && f->INSN(b->op[1]).index == II_ADD) { + a = &f->INSN(a->op[1]); + b = &f->INSN(b->op[1]); + /* Not in usual form? */ + if (a->opt[1] != b->opt[1] || a->op[1] != b->op[1] + || a->opt[2] != OPT_CONST || b->opt[2] != OPT_CONST) continue; + + //PRINTF ("%i %i, ", a->op[2], b->op[2]); + + /* Check if they touch together */ + if (a->op[2] + aw == b->op[2] + && REF_BB(f->msched[i - 1]) == REF_BB(f->msched[i])) { + /* yes => do burst */ + f->mtype[i - 1] &= ~MT_BURSTE; + f->mtype[i - 1] |= MT_BURST; + f->mtype[i] |= MT_BURST | MT_BURSTE; + } + } + } + } + + for (i = 0; i < f->nmsched; i++) + cucdebug (2, "[%x]%x%c ", f->msched[i], f->mtype[i] & MT_WIDTH, (f->mtype[i] & MT_BURST) ? (f->mtype[i] & MT_BURSTE) ? 'E' : 'B' : ' '); + cucdebug (2, "\n"); + + /* We don't need dependencies in non-memory instructions */ + for (b = 0; b < f->num_bb; b++) { + cuc_insn *insn = f->bb[b].insn; + for (i = 0; i < f->bb[b].ninsn; i++) if (!(insn[i].type & IT_MEMORY)) + dispose_list (&insn[i].dep); + } + + if (cuc_debug > 5) print_cuc_bb (f, "AFTER_MEM_REMOVAL2"); + /* Reduce number of dependecies, keeping just direct dependencies, based on memory schedule */ + { + int lastl[3] = {-1, -1, -1}; + int lasts[3] = {-1, -1, -1}; + int lastc[3] = {-1, -1, -1}; + int last_load = -1, last_store = -1, last_call = -1; + for (i = 0; i < f->nmsched; i++) { + int t = f->mtype[i] & MT_LOAD ? 0 : f->mtype[i] & MT_STORE ? 1 : 2; + int maxl = lastl[t]; + int maxs = lasts[t]; + int maxc = lastc[t]; + dep_list *tmp = f->INSN(f->msched[i]).dep; + cucdebug (7, "!%i %x %p\n", i, f->msched[i], tmp); + while (tmp) { + if (f->INSN(tmp->ref).type & IT_MEMORY && REF_BB(tmp->ref) == REF_BB(f->msched[i])) { + cucdebug (7, "%i %x %lx\n", i, f->msched[i], tmp->ref); + /* Search for the reference */ + for (j = 0; j < f->nmsched; j++) if (f->msched[j] == tmp->ref) break; + assert (j < f->nmsched); + if (f->mtype[j] & MT_STORE) { + if (maxs < j) maxs = j; + } else if (f->mtype[j] & MT_LOAD) { + if (maxl < j) maxl = j; + } else if (f->mtype[j] & MT_CALL) { + if (maxc < j) maxc = j; + } + } + tmp = tmp->next; + } + dispose_list (&f->INSN(f->msched[i]).dep); + if (f->mtype[i] & MT_STORE) { + maxs = last_store; + last_store = i; + } else if (f->mtype[i] & MT_LOAD) { + maxl = last_load; + last_load = i; + } else if (f->mtype[i] & MT_CALL) { + maxc = last_call; + last_call = i; + } + + if (maxl > lastl[t]) { + add_dep (&f->INSN(f->msched[i]).dep, f->msched[maxl]); + lastl[t] = maxl; + } + if (maxs > lasts[t]) { + add_dep (&f->INSN(f->msched[i]).dep, f->msched[maxs]); + lasts[t] = maxs; + } + if (maxc > lastc[t]) { + add_dep (&f->INSN(f->msched[i]).dep, f->msched[maxc]); + lastc[t] = maxc; + } + //PRINTF ("%i(%i)> ml %i(%i) ms %i(%i) lastl %i %i lasts %i %i last_load %i last_store %i\n", i, f->msched[i], maxl, f->msched[maxl], maxs, f->msched[maxs], lastl[0], lastl[1], lasts[0], lasts[1], last_load, last_store); + + /* What we have to wait to finish this BB? */ + if (i + 1 >= f->nmsched || REF_BB(f->msched[i + 1]) != REF_BB(f->msched[i])) { + if (last_load > lastl[t]) { + add_dep (&f->bb[REF_BB(f->msched[i])].mdep, f->msched[last_load]); + lastl[t] = last_load; + } + if (last_store > lasts[t]) { + add_dep (&f->bb[REF_BB(f->msched[i])].mdep, f->msched[last_store]); + lasts[t] = last_store; + } + if (last_call > lastc[t]) { + add_dep (&f->bb[REF_BB(f->msched[i])].mdep, f->msched[last_call]); + lastc[t] = last_call; + } + } + } + } + return modified; +}
memory.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: adv.c =================================================================== --- adv.c (nonexistent) +++ adv.c (revision 1765) @@ -0,0 +1,297 @@ +/* adv.c -- OpenRISC Custom Unit Compiler, Advanced Optimizations + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "sim-config.h" +#include "abstract.h" +#include "cuc.h" +#include "insn.h" +#include "support/profile.h" + +/* Marks successor of b with mask m */ +static void mark_successors (cuc_func *f, int b, int m, int stopb) +{ + if (b < 0 || b == BBID_END) return; + if (f->bb[b].tmp & m) return; + f->bb[b].tmp |= m; + /* mark stopb also; and stop searching -- we will gen new result in stopb */ + if (b == stopb) return; + mark_successors (f, f->bb[b].next[0], m, stopb); + mark_successors (f, f->bb[b].next[1], m, stopb); +} + +static unsigned long mask (unsigned long c) +{ + if (c) return (1 << (log2_int (c) + 1)) - 1; + else return 0; +} + +/* Calculates facts, that are determined by conditionals */ +void insert_conditional_facts (cuc_func *f) +{ + int b, j; + int b1, i1, j1; + cuc_insn n[2]; + for (b = 0; b < f->num_bb; b++) if (f->bb[b].ninsn > 0) { + cuc_insn *ii = &f->bb[b].insn[f->bb[b].ninsn - 1]; + /* We have following situation + x <= ... + sfxx f, x, CONST + bf ..., f */ + if (ii->type & IT_BRANCH && ii->opt[1] & OPT_REF && REF_BB(ii->op[1]) == b + && f->INSN(ii->op[1]).opt[2] & OPT_CONST) { + int ok = 0; + unsigned long c = f->INSN(ii->op[1]).op[2]; + int rref = f->INSN(ii->op[1]).op[1]; + unsigned long r; + if (!(f->INSN(ii->op[1]).opt[1] & OPT_REF)) continue; + r = f->INSN(rref).op[0]; + + /* Assignment must be in same basic block */ + if (REF_BB(rref) != b) continue; + + for (j = 0; j < 2; j++) { + change_insn_type (&n[j], II_ADD); + n[j].type = 0; + n[j].dep = NULL; + n[j].op[0] = r; n[j].opt[0] = OPT_REGISTER | OPT_DEST; + n[j].op[1] = 0; n[j].opt[1] = OPT_CONST; + n[j].op[2] = rref; n[j].opt[2] = OPT_REF; + n[j].opt[3] = OPT_NONE; + sprintf (n[j].disasm, "conditional %s fact", j ? "false" : "true"); + } + + /* First get the conditional and two instruction to place after the current BB */ + switch (f->INSN(ii->op[1]).index) { + case II_SFEQ: + change_insn_type (&n[0], II_ADD); + n[0].op[0] = r; n[0].opt[0] = OPT_REGISTER | OPT_DEST; + n[0].op[1] = 0; n[0].opt[1] = OPT_CONST; + n[0].op[2] = c; n[0].opt[2] = OPT_CONST; + ok = 1; + break; + case II_SFNE: + change_insn_type (&n[1], II_ADD); + n[1].op[0] = r; n[1].opt[0] = OPT_REGISTER | OPT_DEST; + n[1].op[1] = 0; n[1].opt[1] = OPT_CONST; + n[1].op[2] = c; n[1].opt[2] = OPT_CONST; + ok = 2; + break; + case II_SFLT: + change_insn_type (&n[0], II_AND); + n[0].op[0] = r; n[0].opt[0] = OPT_REGISTER | OPT_DEST; + n[0].op[1] = rref; n[0].opt[1] = OPT_REF; + n[0].op[2] = mask (c); n[0].opt[2] = OPT_CONST; + ok = 1; + break; + case II_SFGT: + change_insn_type (&n[1], II_ADD); + n[1].op[0] = r; n[1].opt[0] = OPT_REGISTER | OPT_DEST; + n[1].op[1] = rref; n[1].opt[1] = OPT_REF; + n[1].op[2] = mask (c + 1); n[1].opt[2] = OPT_CONST; + ok = 2; + break; + case II_SFLE: + change_insn_type (&n[0], II_AND); + n[0].op[0] = r; n[0].opt[0] = OPT_REGISTER | OPT_DEST; + n[0].op[1] = rref; n[0].opt[1] = OPT_REF; + n[0].op[2] = mask (c); n[0].opt[2] = OPT_CONST; + ok = 1; + break; + case II_SFGE: + change_insn_type (&n[1], II_ADD); + n[1].op[0] = r; n[1].opt[0] = OPT_REGISTER | OPT_DEST; + n[1].op[1] = rref; n[1].opt[1] = OPT_REF; + n[1].op[2] = mask (c + 1); n[1].opt[2] = OPT_CONST; + ok = 2; + break; + default: + ok = 0; + break; + } + + /* Now add two BBs at the end and relink */ + if (ok) { + int cnt = 0; + cucdebug (1, "%x rref %x cnt %i\n", b, rref, cnt); + fflush (stdout); + for (j = 0; j < 2; j++) { + int nb = f->num_bb++; + int sb; + assert (nb < MAX_BB); + f->bb[nb].type = 0; + f->bb[nb].first = -1; f->bb[nb].last = -1; + f->bb[nb].prev[0] = b; f->bb[nb].prev[1] = -1; + sb = f->bb[nb].next[0] = f->bb[b].next[j]; f->bb[nb].next[1] = -1; + assert (cnt >= 0); + cucdebug (2, "%x %x %x rref %x cnt %i\n", b, sb, nb, rref, cnt); + fflush (stdout); + assert (sb >= 0); + f->bb[b].next[j] = nb; + if (sb != BBID_END) { + if (f->bb[sb].prev[0] == b) f->bb[sb].prev[0] = nb; + else if (f->bb[sb].prev[1] == b) f->bb[sb].prev[1] = nb; + else assert (0); + } + f->bb[nb].insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (cnt + 1)); + assert (f->bb[nb].insn); + f->bb[nb].insn[0] = n[j]; + f->bb[nb].ninsn = cnt + 1; + f->bb[nb].mdep = NULL; + f->bb[nb].nmemory = 0; + f->bb[nb].cnt = 0; + f->bb[nb].unrolled = 0; + f->bb[nb].ntim = 0; + f->bb[nb].selected_tim = -1; + } + for (b1 = 0; b1 < f->num_bb; b1++) f->bb[b1].tmp = 0; + + /* Find successor blocks and change links accordingly */ + mark_successors (f, f->num_bb - 2, 2, b); + mark_successors (f, f->num_bb - 1, 1, b); + for (b1 = 0; b1 < f->num_bb - 2; b1++) if (f->bb[b1].tmp == 1 || f->bb[b1].tmp == 2) { + int end; + if (REF_BB (rref) == b1) end = REF_I (rref) + 1; + else end = f->bb[b1].ninsn; + for (i1 = 0; i1 < end; i1++) + for (j1 = 0; j1 < MAX_OPERANDS; j1++) + if (f->bb[b1].insn[i1].opt[j1] & OPT_REF && f->bb[b1].insn[i1].op[j1] == rref) + f->bb[b1].insn[i1].op[j1] = REF (f->num_bb - f->bb[b1].tmp, 0); + } + if (cuc_debug >= 3) print_cuc_bb (f, "FACT"); + } + } + } +} + +static unsigned long max_op (cuc_func *f, int ref, int o) +{ + if (f->INSN(ref).opt[o] & OPT_REF) return f->INSN(f->INSN(ref).op[o]).max; + else if (f->INSN(ref).opt[o] & OPT_CONST) return f->INSN(ref).op[o]; + else if (f->INSN(ref).opt[o] & OPT_REGISTER) return 0xffffffff; + else assert (0); +} + +/* Returns maximum value, based on inputs */ +static unsigned long calc_max (cuc_func *f, int ref) +{ + cuc_insn *ii = &f->INSN(ref); + if (ii->type & IT_COND) return 1; + switch (ii->index) { + case II_ADD : return MIN ((unsigned long long) max_op (f, ref, 1) + + (unsigned long long)max_op (f, ref, 2), 0xffffffff); + case II_SUB : return 0xffffffff; + case II_AND : return MIN (max_op (f, ref, 1), max_op (f, ref, 2)); + case II_OR : return max_op (f, ref, 1) | max_op (f, ref, 2); + case II_XOR : return max_op (f, ref, 1) | max_op (f, ref, 2); + case II_MUL : return MIN ((unsigned long long) max_op (f, ref, 1) + * (unsigned long long)max_op (f, ref, 2), 0xffffffff); + case II_SLL : if (ii->opt[2] & OPT_CONST) return max_op (f, ref, 1) << ii->op[2]; + else return max_op (f, ref, 1); + case II_SRA : return max_op (f, ref, 1); + case II_SRL : if (ii->opt[2] & OPT_CONST) return max_op (f, ref, 1) >> ii->op[2]; + else return max_op (f, ref, 1); + case II_LB : return 0xff; + case II_LH : return 0xffff; + case II_LW : return 0xffffffff; + case II_SB : + case II_SH : + case II_SW : return 0; + case II_SFEQ: + case II_SFNE: + case II_SFLE: + case II_SFLT: + case II_SFGE: + case II_SFGT: return 1; + case II_BF : return 0; + case II_LRBB: return 1; + case II_CMOV: return MAX (max_op (f, ref, 1), max_op (f, ref, 2)); + case II_REG : return max_op (f, ref, 1); + case II_NOP : assert (0); + case II_CALL: assert (0); + default: assert (0); + } + return -1; +} + +/* Width optimization -- detect maximum values; + these values are actually estimates, since the problem + is to hard otherwise... + We calculate these maximums iteratively -- we are slowly + approaching final solution. This algorithm is surely finite, + but can be very slow; so we stop after some iterations; + normal loops should be in this range */ +void detect_max_values (cuc_func *f) +{ + int b, i; + int modified = 0; + int iteration = 0; + + for (b = 0; b < f->num_bb; b++) { + for (i = 0; i < f->bb[b].ninsn; i++) f->bb[b].insn[i].max = 0; + f->bb[b].tmp = 1; + } + + /* Repeat until something is changing */ + do { + modified = 0; + for (b = 0; b < f->num_bb; b++) { + if (f->bb[b].tmp) { + for (i = 0; i < f->bb[b].ninsn; i++) { + unsigned long m = calc_max (f, REF (b, i)); + if (m > f->bb[b].insn[i].max) { + f->bb[b].insn[i].max = m; + modified = 1; + } + } + } + } + if (iteration++ > CUC_WIDTH_ITERATIONS) break; + } while (modified); + + /* Something bad has happened; now we will assign 0xffffffff to all unsatisfied + instructions; this one is stoppable in O(n ^ 2) */ + if (iteration > CUC_WIDTH_ITERATIONS) { + do { + modified = 0; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) { + unsigned long m = calc_max (f, REF (b, i)); + if (m > f->bb[b].insn[i].max) { + f->bb[b].insn[i].max = 0xffffffff; + modified = 1; + } + } + } while (modified); + } + cucdebug (1, "detect_max_values %i iterations\n", iteration); +} + Index: timings.c =================================================================== --- timings.c (nonexistent) +++ timings.c (revision 1765) @@ -0,0 +1,305 @@ +/* timings.c -- OpenRISC Custom Unit Compiler, timing and size estimation + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include +#include +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_INTTYPES_H +#include +#endif + +#include "port.h" +#include "arch.h" +#include "abstract.h" +#include "sim-config.h" +#include "cuc.h" +#include "insn.h" + +static cuc_timing_table *timing_table; +static double max_bb_delay; + +/* Returns instruction delay */ +double insn_time (cuc_insn *ii) +{ + if (ii->opt[2] & OPT_CONST) { + if (ii->opt[1] & OPT_CONST) return 0.; + else return timing_table[ii->index].delayi; + } else return timing_table[ii->index].delay; +} + +/* Returns instruction size */ +double insn_size (cuc_insn *ii) +{ + double s = (ii->opt[2] & OPT_CONST) ? timing_table[ii->index].sizei + : timing_table[ii->index].size; + if (ii->opt[1] & OPT_CONST) return 0.; + if (ii->type & IT_COND && (ii->index == II_CMOV || ii->index == II_ADD)) return s / 32.; + else return s; +} + +/* Returns normal instruction size */ +double ii_size (int index, int imm) +{ + if (imm) return timing_table[index].sizei; + else return timing_table[index].size; +} + +/* Returns dataflow tree height in cycles */ +static double max_delay (cuc_func *f, int b) +{ + double max_d = 0.; + double *d; + cuc_bb *bb = &f->bb[b]; + int i, j; + d = (double *) malloc (sizeof (double) * bb->ninsn); + for (i = 0; i < bb->ninsn; i++) { + double md = 0.; + for (j = 0; j < MAX_OPERANDS; j++) { + int op = bb->insn[i].op[j]; + if (bb->insn[i].opt[j] & OPT_REF && op >= 0 && REF_BB (op) == b && REF_I (op) < i) { + double t = d[REF_I (op)]; + if (t > md) md = t; + } + } + d[i] = md + insn_time (&bb->insn[i]); + if (d[i] > max_d) max_d = d[i]; + } + free (d); + //PRINTF ("max_d%i=%f\n", b, max_d); + return max_d; +} + +/* Calculates memory delay of a single run of a basic block */ +static int memory_delay (cuc_func *f, int b) +{ + int i; + int d = 0; + for (i = 0; i < f->nmsched; i++) + if (REF_BB (f->msched[i]) == b) { + if (f->mtype[i] & MT_STORE) { + if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += runtime.cuc.mdelay[2]; + else d += runtime.cuc.mdelay[3]; + } else if (f->mtype[i] & MT_LOAD) { + if (!(f->mtype[i] & MT_BURST) || f->mtype[i] & MT_BURSTE) d += runtime.cuc.mdelay[0]; + else d += runtime.cuc.mdelay[1]; + } + } + //PRINTF ("md%i=%i\n", b, d); + return d; +} + +/* Cuts the tree and marks registers */ +void cut_tree (cuc_func *f, int b, double sd) +{ + int i, j; + double *depths; + cuc_bb *bb = &f->bb[b]; + depths = (double *) malloc (sizeof (double) * bb->ninsn); + + for (i = 0; i < bb->ninsn; i++) { + double md = 0.; + int mg = 0; + for (j = 0; j < MAX_OPERANDS; j++) { + int op = bb->insn[i].op[j]; + if (bb->insn[i].opt[j] & OPT_REF && op >= 0 && REF_BB (op) == b && REF_I (op) < i) { + double t = depths[REF_I (op)]; + if (f->INSN(op).type & IT_CUT) { + if (f->INSN(op).tmp + 1 >= mg) { + if (f->INSN(op).tmp + 1 > mg) md = 0.; + mg = f->INSN(op).tmp + 1; + if (t > md) md = t; + } + } else { + if (f->INSN(op).tmp >= mg) { + if (f->INSN(op).tmp > mg) md = 0.; + mg = f->INSN(op).tmp; + if (t > md) md = t; + } + } + } + } + //PRINTF ("%2x md%.1f ", i, md); + md += insn_time (&bb->insn[i]); + //PRINTF ("md%.1f mg%i %.1f\n", md, mg, sd); + bb->insn[i].tmp = mg; + if (md > sd) { + bb->insn[i].type |= IT_CUT; + if (md > runtime.cuc.cycle_duration) + log ("WARNING: operation t%x_%x may need to be registered inbetween\n", b, i); + depths[i] = 0.; + } else depths[i] = md; + } + free (depths); +} + +/* How many cycles we need now to get through the BB */ +static int new_bb_cycles (cuc_func *f, int b, int cut) +{ + long d; + double x = max_delay (f, b); + d = ceil (x / runtime.cuc.cycle_duration); + if (d < 1) d = 1; + if (cut && x > runtime.cuc.cycle_duration) cut_tree (f, b, x / d); + + if (x / d > max_bb_delay) max_bb_delay = x / d; + + return memory_delay (f, b) + d; +} + +/* Cuts the tree and marks registers */ +void mark_cut (cuc_func *f) +{ + int b, i; + for (b = 0; b < f->num_bb; b++) + for (i = 0; i < f->bb[b].ninsn; i++) + f->bb[b].insn[i].tmp = 0; /* Set starting groups */ + if (config.cuc.no_multicycle) + for (b = 0; b < f->num_bb; b++) + new_bb_cycles (f, b, 1); +} + +/* Returns basic block circuit area */ +static double bb_size (cuc_bb *bb) +{ + int i; + double d = 0.; + for (i = 0; i < bb->ninsn; i++) { + if (bb->insn[i].opt[2] & OPT_CONST) + d = d + timing_table[bb->insn[i].index].sizei; + else d = d + timing_table[bb->insn[i].index].size; + } + return d; +} + +/* Recalculates bb[].cnt values, based on generated profile file */ +void recalc_cnts (cuc_func *f, char *bb_filename) +{ + int i, r, b, prevbb = -1, prevcnt = 0; + int buf[256]; + const int bufsize = 256; + FILE *fi = fopen (bb_filename, "rb"); + + assert (fi); + + /* initialize counts */ + for (b = 0; b < f->num_bb; b++) f->bb[b].cnt = 0; + + /* read control flow from file and set counts */ + do { + r = fread (buf, sizeof (int), bufsize, fi); + for (i = 0; i < r; i++) { + b = f->init_bb_reloc[buf[i]]; + if (b < 0) continue; + /* Were we in the loop? */ + if (b == prevbb) { + prevcnt++; + } else { + /* End the block */ + if (prevbb >= 0 && prevbb != BBID_START) + f->bb[prevbb].cnt += prevcnt / f->bb[prevbb].unrolled + 1; + prevcnt = 0; + prevbb = b; + } + } + } while (r == bufsize); + + fclose (fi); +} + +/* Analizes current version of design and places results into timings structure */ +void analyse_timings (cuc_func *f, cuc_timings *timings) +{ + long new_time = 0; + double size = 0.; + int b, i; + + /* Add time needed for mtspr/mfspr */ + for (i = 0; i < MAX_REGS; i++) if (f->used_regs[i]) new_time++; + new_time++; /* always one mfspr at the end */ + new_time *= f->num_runs; + + max_bb_delay = 0.; + for (b = 0; b < f->num_bb; b++) { + new_time += new_bb_cycles (f, b, 0) * f->bb[b].cnt; + size = size + bb_size (&f->bb[b]); + } + timings->new_time = new_time; + timings->size = size; + log ("Max circuit delay %.2fns; max circuit clock speed %.1fMHz\n", + max_bb_delay, 1000. / max_bb_delay); +} + +/* Loads in the specified timings table */ +void load_timing_table (char *filename) +{ + int i; + FILE *fi; + + log ("Loading timings from %s\n", filename); + log ("Using clock delay %.2fns (frequency %.0fMHz)\n", runtime.cuc.cycle_duration, + 1000. / runtime.cuc.cycle_duration); + assert (fi = fopen (filename, "rt")); + + timing_table = (cuc_timing_table *)malloc ((II_LAST + 1) * sizeof (cuc_timing_table)); + assert (timing_table); + for (i = 0; i <= II_LAST; i++) { + timing_table[i].size = -1.; + timing_table[i].sizei = -1.; + timing_table[i].delay = -1.; + timing_table[i].delayi = -1.; + } + + while (!feof(fi)) { + char tmp[256]; + int index; + if (fscanf (fi, "%s", tmp) != 1) break; + if (tmp[0] == '#') { + while (!feof (fi) && fgetc (fi) != '\n'); + continue; + } + for (i = 0; i <= II_LAST; i++) + if (strcmp (known[i].name, tmp) == 0) { + index = i; + break; + } + assert (index <= II_LAST); + i = index; + if (fscanf (fi, "%lf%lf%lf%lf\n", &timing_table[i].size, + &timing_table[i].sizei, &timing_table[i].delay, &timing_table[i].delayi) != 4) break; + /*PRINTF ("!%s size %f,%f delay %f,%f\n", known[i].name, timing_table[i].size, + timing_table[i].sizei, timing_table[i].delay, timing_table[i].delayi);*/ + } + + /* Was everything initialized? */ + for (i = 0; i <= II_LAST; i++) { + assert (timing_table[i].size >= 0 && timing_table[i].sizei >= 0 + && timing_table[i].delay >= 0 && timing_table[i].delayi >= 0); + /*PRINTF ("%s size %f,%f delay %f,%f\n", known[i], timing_table[i].size, + timing_table[i].sizei, timing_table[i].delay, timing_table[i].delayi);*/ + } + + fclose (fi); +} +
timings.c Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: cuc.h =================================================================== --- cuc.h (nonexistent) +++ cuc.h (revision 1765) @@ -0,0 +1,332 @@ +/* cuc.h -- OpenRISC Custom Unit Compiler, main header file + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef __DATAF_H__ +#define __DATAF_H__ + +/* Maximum number of instructions per function */ +#define MAX_INSNS 0x10000 +#define MAX_BB 0x1000 +#define MAX_REGS 34 +#define FLAG_REG (MAX_REGS - 2) +#define LRBB_REG (MAX_REGS - 1) +#define CUC_MAX_STACK 0x1000 /* if more, not converted */ +#define MAX_PREROLL 32 +#define MAX_UNROLL 32 + +#define IT_BRANCH 0x0001 /* Branch instruction */ +#define IT_INDELAY 0x0002 /* Instruction is in delay slot */ +#define IT_BBSTART 0x0004 /* BB start marker */ +#define IT_BBEND 0x0008 /* BB end marker */ +#define IT_OUTPUT 0x0010 /* this instruction holds final value of the register */ +#define IT_SIGNED 0x0020 /* Instruction is signed */ +#define IT_MEMORY 0x0040 /* Instruction does memory access */ +#define IT_UNUSED 0x0080 /* dead instruction marker */ +#define IT_FLAG1 0x0100 /* misc flags */ +#define IT_FLAG2 0x0200 +#define IT_VOLATILE 0x0400 /* Should not be moved/removed */ +#define IT_MEMADD 0x0800 /* add before the load -- should not be removed */ +#define IT_COND 0x1000 /* Conditional */ +#define IT_LATCHED 0x2000 /* Output of this instruction is latched/registered */ +#define IT_CUT 0x4000 /* After this instruction register is placed */ + +#define OPT_NONE 0x00 +#define OPT_CONST 0x01 +#define OPT_REGISTER 0x02 +#define OPT_REF 0x04 +#define OPT_JUMP 0x08 /* Jump to an instruction index */ +#define OPT_DEST 0x10 /* This operand is dest */ +#define OPT_BB 0x20 /* Jumpt to BB */ +#define OPT_LRBB 0x40 /* 0 if we came in from left BB, or 1 otherwise */ + +#define MT_WIDTH 0x007 /* These bits hold memory access width in bytes */ +#define MT_BURST 0x008 /* burst start & end markers */ +#define MT_BURSTE 0x010 +#define MT_CALL 0x020 /* This is a call */ +#define MT_LOAD 0x040 /* This memory access does a read */ +#define MT_STORE 0x080 /* This memory access does a write */ +#define MT_SIGNED 0x100 /* Signed memory access */ + +#define MO_NONE 0 /* different memory ordering, even if there are dependencies, + burst can be made, width can change */ +#define MO_WEAK 1 /* different memory ordering, if there cannot be dependencies, + burst can be made, width can change */ +#define MO_STRONG 2 /* Same memory ordering, burst can be made, width can change */ +#define MO_EXACT 3 /* Exacltly the same memory ordering and widths */ + +#define BB_INLOOP 0x01 /* This block is inside a loop */ +#define BB_OPTIONAL 0x02 +#define BB_DEAD 0x08 /* This block is unaccessible -> to be removed */ + +#define BBID_START MAX_BB /* Start BB pointer */ +#define BBID_END (MAX_BB + 1) /* End BB pointer */ + +/* Various macros to minimize code size */ +#define REF(bb,i) (((bb) * MAX_INSNS) + (i)) +#define REF_BB(r) ((r) / MAX_INSNS) +#define REF_I(r) ((r) % MAX_INSNS) +#define INSN(ref) bb[REF_BB(ref)].insn[REF_I(ref)] + +#ifndef MIN +# define MIN(x,y) ((x) < (y) ? (x) : (y)) +#endif + +#ifndef MAX +# define MAX(x,y) ((x) > (y) ? (x) : (y)) +#endif + +#define log(x...) {fprintf (flog, x); fflush (flog); } + +#define cucdebug(x,s...) {if ((x) <= cuc_debug) PRINTF (s);} + +#define CUC_WIDTH_ITERATIONS 256 + +/* Options */ +/* Whether we are debugging cuc (0-9) */ +extern int cuc_debug; + +/* Temporary registers by software convention */ +extern const int caller_saved[MAX_REGS]; + +typedef struct _dep_list_t { + unsigned long ref; + struct _dep_list_t *next; +} dep_list; + +/* Shared list, if marked dead, entry is not used */ +typedef struct _csm_list { + int ref; + int cnt; + int cmovs; + double size, osize; + int cmatch; + int dead; + int ninsn; /* Number of associated instructions */ + struct _csm_list *from; + struct _csm_list *next; +} cuc_shared_list; + +/* Shared resource item definition */ +typedef struct { + int ref; + int cmatch; +} cuc_shared_item; + +/* Implementation specific timings */ +typedef struct { + int b; /* Basic block # this timing is referring to */ + int preroll; /* How many times was this BB pre/unrolled */ + int unroll; + int nshared; + cuc_shared_item *shared; /* List of shared resources */ + int new_time; + double size; +} cuc_timings; + +/* Instructionn entity */ +typedef struct { + int type; /* type of the instruction */ + int index; /* Instruction index */ + int opt[MAX_OPERANDS]; /* operand types */ + unsigned long op[MAX_OPERANDS]; /* operand values */ + dep_list *dep; /* instruction dependencies */ + unsigned long insn; /* Instruction opcode */ + char disasm[40]; /* disassembled string */ + unsigned long max; /* max result value */ + int tmp; +} cuc_insn; + +/* Basic block entity */ +typedef struct { + unsigned long type; /* Type of the bb */ + int first, last; /* Where this block lies */ + int prev[2], next[2]; + int tmp; + cuc_insn *insn; /* Instructions lie here */ + int ninsn; /* Number of instructions */ + int last_used_reg[MAX_REGS]; + dep_list *mdep; /* Last memory access dependencies */ + int nmemory; + int cnt; /* how many times was this block executed */ + int unrolled; /* how many times has been this block unrolled */ + + int ntim; /* Basic block options */ + cuc_timings *tim; + int selected_tim; /* Selected option, -1 if none */ +} cuc_bb; + +/* Function entity */ +typedef struct _cuc_func { + /* Basic blocks */ + int num_bb; + cuc_bb bb[MAX_BB]; + int saved_regs[MAX_REGS];/* Whether this register was saved */ + int lur[MAX_REGS]; /* Location of last use */ + int used_regs[MAX_REGS]; /* Nonzero if it was used */ + + /* Schedule of memory instructions */ + int nmsched; + int msched[MAX_INSNS]; + int mtype[MAX_INSNS]; + + /* initial bb and their relocations to new block numbers */ + int num_init_bb; + int *init_bb_reloc; + int orig_time; /* time in cyc required for SW implementation */ + int num_runs; /* Number times this function was run */ + cuc_timings timings; /* Base timings */ + unsigned long start_addr; /* Address of first instruction inn function */ + unsigned long end_addr; /* Address of last instruction inn function */ + int memory_order; /* Memory order */ + + int nfdeps; /* Function dependencies */ + struct _cuc_func **fdeps; + + int tmp; +} cuc_func; + +/* Instructions from function */ +extern cuc_insn insn[MAX_INSNS]; +extern int num_insn; +extern int reloc[MAX_INSNS]; +extern FILE *flog; + +/* returns log2(x) */ +int log2_int (unsigned long x); + +/* Loads from file into global array insn */ +int cuc_load (char *in_fn); + +/* Negates conditional instruction */ +void negate_conditional (cuc_insn *ii); + +/* Scans sequence of BBs and set bb[].cnt */ +void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename); + +/* Prints out instructions */ +void print_insns (int bb, cuc_insn *insn, int size, int verbose); + +/* prints out bb string */ +void print_bb_num (int num); + +/* Print out basic blocks */ +void print_cuc_bb (cuc_func *func, char *s); + +/* Duplicates function */ +cuc_func *dup_func (cuc_func *f); + +/* Releases memory allocated by function */ +void free_func (cuc_func *f); + +/* Common subexpression matching -- resource sharing, analysis pass */ +void csm (cuc_func *f); + +/* Common subexpression matching -- resource sharing, generation pass */ +void csm_gen (cuc_func *f, cuc_func *rf, cuc_shared_item *shared, int nshared); + +/* Set the BB limits */ +void detect_bb (cuc_func *func); + +/* Optimize basic blocks */ +int optimize_bb (cuc_func *func); + +/* Search and optimize complex cmov assignments */ +int optimize_cmovs (cuc_func *func); + +/* Optimizes dataflow tree */ +int optimize_tree (cuc_func *func); + +/* Remove nop instructions */ +int remove_nops (cuc_func *func); + +/* Removes dead instruction */ +int remove_dead (cuc_func *func); + +/* Removes trivial register assignments */ +int remove_trivial_regs (cuc_func *f); + +/* Determine inputs and outputs */ +void set_io (cuc_func *func); + +/* Removes BBs marked as dead */ +int remove_dead_bb (cuc_func *func); + +/* Common subexpression elimination */ +int cse (cuc_func *f); + +/* Detect register dependencies */ +void reg_dep (cuc_func *func); + +/* Cuts the tree and marks registers */ +void mark_cut (cuc_func *f); + +/* Unroll loop b times times and return new function. Original + function is unmodified. */ +cuc_func *preunroll_loop (cuc_func *func, int b, int preroll, int unroll, char *bb_filename); + +/* Clean memory and data dependencies */ +void clean_deps (cuc_func *func); + +/* Schedule memory accesses + 0 - exact; 1 - strong; 2 - weak; 3 - none */ +int schedule_memory (cuc_func *func, int otype); + +/* Generates verilog file out of insn dataflow */ +void output_verilog (cuc_func *func, char *filename, char *funcname); + +/* Recalculates bb[].cnt values, based on generated profile file */ +void recalc_cnts (cuc_func *f, char *bb_filename); + +/* Calculate timings */ +void analyse_timings (cuc_func *func, cuc_timings *timings); + +/* Calculates facts, that are determined by conditionals */ +void insert_conditional_facts (cuc_func *func); + +/* Width optimization -- detect maximum values */ +void detect_max_values (cuc_func *f); + +/* Inserts n nops before insn 'ref' */ +void insert_insns (cuc_func *f, int ref, int n); + +/* Checks for some anomalies with references */ +void cuc_check(cuc_func *f); + +/* Adds memory dependencies based on ordering type */ +void add_memory_dep(cuc_func *f, int otype); + +/* Prints out instructions */ +void print_cuc_insns(char *s, int verbose); + +/* Build basic blocks */ +void build_bb(cuc_func *f); + +/* Latch outputs in loops */ +void add_latches(cuc_func *f); + +void generate_main(int nfuncs, cuc_func **f, char *filename); + +void add_dep(dep_list **list, int dep); + +void dispose_list(dep_list **list); + +void main_cuc(char *filename); + +void add_data_dep(cuc_func *f); +#endif /* __DATAF_H__ */
cuc.h Property changes : Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: Makefile.am =================================================================== --- Makefile.am (nonexistent) +++ Makefile.am (revision 1765) @@ -0,0 +1,25 @@ +# Makefile -- Makefile for cpu architecture independent simulation +# Copyright (C) 2002 Marko Mlinar, markom@opencores.org +# +# This file is part of OpenRISC 1000 Architectural Simulator. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# + +noinst_LIBRARIES = libcuc.a + +libcuc_a_SOURCES = cuc.c cuc.h load.c bb.c memory.c \ + verilog.c timings.c insn.c insn.h adv.c + Index: insn.h =================================================================== --- insn.h (nonexistent) +++ insn.h (revision 1765) @@ -0,0 +1,111 @@ +/* insn.h -- OpenRISC Custom Unit Compiler, internal instruction definitions + * Copyright (C) 2002 Marko Mlinar, markom@opencores.org + * + * This file is part of OpenRISC 1000 Architectural Simulator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _DF_INSN_ +#define _DF_INSN_ + +#include "cuc.h" + +/* Instruction types */ +#define II_ADD 0 +#define II_SUB 1 +#define II_AND 2 +#define II_OR 3 +#define II_XOR 4 +#define II_MUL 5 +#define II_SRL 6 +#define II_SLL 7 +#define II_SRA 8 +#define II_LB 9 +#define II_LH 10 +#define II_LW 11 +#define II_SB 12 +#define II_SH 13 +#define II_SW 14 +#define II_SFEQ 15 +#define II_SFNE 16 +#define II_SFLE 17 +#define II_SFLT 18 +#define II_SFGE 19 +#define II_SFGT 20 +#define II_BF 21 +#define II_LRBB 22 +#define II_CMOV 23 +#define II_REG 24 +#define II_NOP 25 +#define II_CALL 26 +#define II_LAST 26 + +/* misc flags */ +#define II_MASK 0x0fff +#define II_MEM 0x1000 +#define II_SIGNED 0x2000 + +#define II_IS_LOAD(x) ((x) == II_LB || (x) == II_LH || (x) == II_LW) +#define II_IS_STORE(x) ((x) == II_SB || (x) == II_SH || (x) == II_SW) +#define II_MEM_WIDTH(x) (((x) == II_LB || (x) == II_SB) ? 1 :\ + ((x) == II_LH || (x) == II_SH) ? 2 :\ + ((x) == II_LW || (x) == II_SW) ? 4 : -1) + +/* List of known instructions and their rtl representation */ +typedef struct { + char *name; + int comutative; + char *rtl; +} cuc_known_insn; + +extern const cuc_known_insn known[II_LAST + 1]; + +/* Timing table -- same indexes as known table */ +typedef struct { + double delay; + double size; + double delayi; + double sizei; +} cuc_timing_table; + +/* Conversion links */ +typedef struct { + const char *from; + const int to; +} cuc_conv; + +/* normal (not immediate) size of a function */ +double ii_size (int index, int imm); + +/* Returns instruction size */ +double insn_time (cuc_insn *ii); + +/* Returns instruction time */ +double insn_size (cuc_insn *ii); + +/* Find known instruction and attach them to insn */ +void change_insn_type (cuc_insn *i, int index); + +/* Returns instruction name */ +const char *cuc_insn_name (cuc_insn *ii); + +/* Loads in the specified timings table */ +void load_timing_table (char *filename); + +/* Displays shared instructions */ +void print_shared (cuc_func *rf, cuc_shared_item *shared, int nshared); + +#endif /* _DF_INSN_ */ + Index: . =================================================================== --- . (nonexistent) +++ . (revision 1765)
. Property changes : Added: svn:ignore ## -0,0 +1,2 ## +Makefile +.deps

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.