OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [tags/] [nog_patch_69/] [or1ksim/] [cuc/] [bb.c] - Diff between revs 1464 and 1765

Only display areas with differences | Details | Blame | View Log

Rev 1464 Rev 1765
/* bb.c -- OpenRISC Custom Unit Compiler, Basic Block handling
/* bb.c -- OpenRISC Custom Unit Compiler, Basic Block handling
 *    Copyright (C) 2002 Marko Mlinar, markom@opencores.org
 *    Copyright (C) 2002 Marko Mlinar, markom@opencores.org
 *
 *
 *    This file is part of OpenRISC 1000 Architectural Simulator.
 *    This file is part of OpenRISC 1000 Architectural Simulator.
 *
 *
 *    This program is free software; you can redistribute it and/or modify
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *    (at your option) any later version.
 *
 *
 *    This program is distributed in the hope that it will be useful,
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *    GNU General Public License for more details.
 *
 *
 *    You should have received a copy of the GNU General Public License
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
 
 
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdarg.h>
#include <assert.h>
#include <assert.h>
#include <string.h>
#include <string.h>
 
 
#include "config.h"
#include "config.h"
 
 
#ifdef HAVE_INTTYPES_H
#ifdef HAVE_INTTYPES_H
#include <inttypes.h>
#include <inttypes.h>
#endif
#endif
 
 
#include "port.h"
#include "port.h"
#include "arch.h"
#include "arch.h"
#include "sim-config.h"
#include "sim-config.h"
#include "abstract.h"
#include "abstract.h"
#include "cuc.h"
#include "cuc.h"
#include "insn.h"
#include "insn.h"
#include "support/profile.h"
#include "support/profile.h"
 
 
/* prints out bb string */
/* prints out bb string */
void print_bb_num (int num)
void print_bb_num (int num)
{
{
  if (num < 0) PRINTF ("*");
  if (num < 0) PRINTF ("*");
  else if (num == BBID_END) PRINTF ("END");
  else if (num == BBID_END) PRINTF ("END");
  else if (num == BBID_START) PRINTF ("START");
  else if (num == BBID_START) PRINTF ("START");
  else PRINTF ("%2x", num);
  else PRINTF ("%2x", num);
}
}
 
 
/* Print out basic blocks */
/* Print out basic blocks */
void print_cuc_bb (cuc_func *f, char *s)
void print_cuc_bb (cuc_func *f, char *s)
{
{
  int i;
  int i;
  PRINTF ("------- %s -------\n", s);
  PRINTF ("------- %s -------\n", s);
  for (i = 0; i < f->num_bb; i++) {
  for (i = 0; i < f->num_bb; i++) {
    if (f->bb[i].insn) PRINTF ("\n---- BB%-2x * %x ---- ", i, f->bb[i].cnt);
    if (f->bb[i].insn) PRINTF ("\n---- BB%-2x * %x ---- ", i, f->bb[i].cnt);
    else PRINTF ("BB%-2x: %4x-%-4x", i, f->bb[i].first, f->bb[i].last);
    else PRINTF ("BB%-2x: %4x-%-4x", i, f->bb[i].first, f->bb[i].last);
    PRINTF (" type %02lx tmp %i ", f->bb[i].type, f->bb[i].tmp);
    PRINTF (" type %02lx tmp %i ", f->bb[i].type, f->bb[i].tmp);
    PRINTF ("next "); print_bb_num (f->bb[i].next[0]);
    PRINTF ("next "); print_bb_num (f->bb[i].next[0]);
    PRINTF (" "); print_bb_num (f->bb[i].next[1]);
    PRINTF (" "); print_bb_num (f->bb[i].next[1]);
    PRINTF (" prev "); print_bb_num (f->bb[i].prev[0]);
    PRINTF (" prev "); print_bb_num (f->bb[i].prev[0]);
    PRINTF (" "); print_bb_num (f->bb[i].prev[1]);
    PRINTF (" "); print_bb_num (f->bb[i].prev[1]);
    PRINTF ("\n");
    PRINTF ("\n");
 
 
    if (f->bb[i].insn) print_insns (i, f->bb[i].insn, f->bb[i].ninsn, 0);
    if (f->bb[i].insn) print_insns (i, f->bb[i].insn, f->bb[i].ninsn, 0);
  }
  }
  if (f->nmsched) {
  if (f->nmsched) {
    PRINTF ("\nmsched: ");
    PRINTF ("\nmsched: ");
    for (i = 0; i < f->nmsched; i++)
    for (i = 0; i < f->nmsched; i++)
      PRINTF ("%x ", f->msched[i]);
      PRINTF ("%x ", f->msched[i]);
    PRINTF ("\n\n\n");
    PRINTF ("\n\n\n");
  } else PRINTF ("\n");
  } else PRINTF ("\n");
  fflush (stdout);
  fflush (stdout);
}
}
 
 
/* Copies src basic block into destination */
/* Copies src basic block into destination */
cuc_bb *cpy_bb (cuc_bb *dest, cuc_bb *src)
cuc_bb *cpy_bb (cuc_bb *dest, cuc_bb *src)
{
{
  int i, j;
  int i, j;
  dep_list *d;
  dep_list *d;
  assert (dest != src);
  assert (dest != src);
  *dest = *src;
  *dest = *src;
  assert (dest->insn = malloc (sizeof (cuc_insn) * src->ninsn));
  assert (dest->insn = malloc (sizeof (cuc_insn) * src->ninsn));
  for (i = 0; i < src->ninsn; i++) {
  for (i = 0; i < src->ninsn; i++) {
    d = src->insn[i].dep;
    d = src->insn[i].dep;
    dest->insn[i] = src->insn[i];
    dest->insn[i] = src->insn[i];
    dest->insn[i].dep = NULL;
    dest->insn[i].dep = NULL;
    while (d) {
    while (d) {
      add_dep (&dest->insn[i].dep, d->ref);
      add_dep (&dest->insn[i].dep, d->ref);
      d = d->next;
      d = d->next;
    }
    }
  }
  }
 
 
  d = src->mdep;
  d = src->mdep;
  dest->mdep = NULL;
  dest->mdep = NULL;
  while (d) {
  while (d) {
    add_dep (&dest->mdep, d->ref);
    add_dep (&dest->mdep, d->ref);
    d = d->next;
    d = d->next;
  }
  }
  if (src->ntim) {
  if (src->ntim) {
    assert (dest->tim = malloc (sizeof (cuc_timings) * src->ntim));
    assert (dest->tim = malloc (sizeof (cuc_timings) * src->ntim));
    for (i = 0; i < src->ntim; i++) {
    for (i = 0; i < src->ntim; i++) {
      dest->tim[i] = src->tim[i];
      dest->tim[i] = src->tim[i];
      if (src->tim[i].nshared) {
      if (src->tim[i].nshared) {
        assert (dest->tim[i].shared = malloc (sizeof (int) * src->tim[i].nshared));
        assert (dest->tim[i].shared = malloc (sizeof (int) * src->tim[i].nshared));
        for (j = 0; j < src->tim[i].nshared; j++)
        for (j = 0; j < src->tim[i].nshared; j++)
          dest->tim[i].shared[j] = src->tim[i].shared[j];
          dest->tim[i].shared[j] = src->tim[i].shared[j];
      }
      }
    }
    }
  }
  }
}
}
 
 
/* Duplicates function */
/* Duplicates function */
cuc_func *dup_func (cuc_func *f)
cuc_func *dup_func (cuc_func *f)
{
{
  cuc_func *n = (cuc_func *) malloc (sizeof (cuc_func));
  cuc_func *n = (cuc_func *) malloc (sizeof (cuc_func));
  int b, i;
  int b, i;
  for (b = 0; b < f->num_bb; b++) cpy_bb (&n->bb[b], &f->bb[b]);
  for (b = 0; b < f->num_bb; b++) cpy_bb (&n->bb[b], &f->bb[b]);
  n->num_bb = f->num_bb;
  n->num_bb = f->num_bb;
  assert (n->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb));
  assert (n->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb));
  for (b = 0; b < f->num_init_bb; b++) n->init_bb_reloc[b] = f->init_bb_reloc[b];
  for (b = 0; b < f->num_init_bb; b++) n->init_bb_reloc[b] = f->init_bb_reloc[b];
  n->num_init_bb = f->num_init_bb;
  n->num_init_bb = f->num_init_bb;
  for (i = 0; i < MAX_REGS; i++) {
  for (i = 0; i < MAX_REGS; i++) {
    n->saved_regs[i] = f->saved_regs[i];
    n->saved_regs[i] = f->saved_regs[i];
    n->lur[i] = f->lur[i];
    n->lur[i] = f->lur[i];
    n->used_regs[i] = f->used_regs[i];
    n->used_regs[i] = f->used_regs[i];
  }
  }
  n->start_addr = f->start_addr;
  n->start_addr = f->start_addr;
  n->end_addr = f->end_addr;
  n->end_addr = f->end_addr;
  n->orig_time = f->orig_time;
  n->orig_time = f->orig_time;
  n->nmsched = f->nmsched;
  n->nmsched = f->nmsched;
  n->num_runs = f->num_runs;
  n->num_runs = f->num_runs;
  for (i = 0; i < f->nmsched; i++) {
  for (i = 0; i < f->nmsched; i++) {
    n->msched[i] = f->msched[i];
    n->msched[i] = f->msched[i];
    n->mtype[i] = f->mtype[i];
    n->mtype[i] = f->mtype[i];
  }
  }
  n->nfdeps = f->nfdeps;
  n->nfdeps = f->nfdeps;
  if (f->nfdeps) {
  if (f->nfdeps) {
    f->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * f->nfdeps);
    f->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * f->nfdeps);
    for (i = 0; i < f->nfdeps; i++) n->fdeps[i] = f->fdeps[i];
    for (i = 0; i < f->nfdeps; i++) n->fdeps[i] = f->fdeps[i];
  }
  }
  return n;
  return n;
}
}
 
 
/* Releases memory allocated by function */
/* Releases memory allocated by function */
void free_func (cuc_func *f)
void free_func (cuc_func *f)
{
{
  int b, i;
  int b, i;
  for (b = 0; b < f->num_bb; b++) {
  for (b = 0; b < f->num_bb; b++) {
    for (i = 0; i < f->bb[b].ninsn; i++)
    for (i = 0; i < f->bb[b].ninsn; i++)
      dispose_list (&f->bb[b].insn[i].dep);
      dispose_list (&f->bb[b].insn[i].dep);
    if (f->bb[b].insn) free (f->bb[b].insn);
    if (f->bb[b].insn) free (f->bb[b].insn);
    for (i = 0; i < f->bb[b].ntim; i++)
    for (i = 0; i < f->bb[b].ntim; i++)
      if (f->bb[b].tim[i].nshared && f->bb[b].tim[i].shared)
      if (f->bb[b].tim[i].nshared && f->bb[b].tim[i].shared)
        free (f->bb[b].tim[i].shared);
        free (f->bb[b].tim[i].shared);
    if (f->bb[b].tim && f->bb[b].ntim) free (f->bb[b].tim);
    if (f->bb[b].tim && f->bb[b].ntim) free (f->bb[b].tim);
  }
  }
  free (f);
  free (f);
}
}
 
 
/* Recalculates last_used_reg */
/* Recalculates last_used_reg */
void recalc_last_used_reg (cuc_func *f, int b)
void recalc_last_used_reg (cuc_func *f, int b)
{
{
  int i;
  int i;
  cuc_bb *bb = &f->bb[b];
  cuc_bb *bb = &f->bb[b];
 
 
  /* rebuild last used reg array */
  /* rebuild last used reg array */
  if (bb->insn[0].index == II_LRBB) bb->last_used_reg[LRBB_REG] = 0;
  if (bb->insn[0].index == II_LRBB) bb->last_used_reg[LRBB_REG] = 0;
  else bb->last_used_reg[LRBB_REG] = -1;
  else bb->last_used_reg[LRBB_REG] = -1;
 
 
  for (i = 1; i < MAX_REGS - 1; i++) bb->last_used_reg[i] = -1;
  for (i = 1; i < MAX_REGS - 1; i++) bb->last_used_reg[i] = -1;
 
 
    /* Create references */
    /* Create references */
  for (i = 0; i < bb->ninsn; i++) {
  for (i = 0; i < bb->ninsn; i++) {
    int k;
    int k;
    /* Now check for destination operand(s) */
    /* Now check for destination operand(s) */
    for (k = 0; k < MAX_OPERANDS; k++) if (bb->insn[i].opt[k] & OPT_DEST)
    for (k = 0; k < MAX_OPERANDS; k++) if (bb->insn[i].opt[k] & OPT_DEST)
      if ((bb->insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER
      if ((bb->insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER
        && (int)bb->insn[i].op[k] >= 0) {
        && (int)bb->insn[i].op[k] >= 0) {
        bb->last_used_reg[bb->insn[i].op[k]] = REF (b, i);
        bb->last_used_reg[bb->insn[i].op[k]] = REF (b, i);
      }
      }
  }
  }
}
}
 
 
/* Set the BB limits */
/* Set the BB limits */
void detect_bb (cuc_func *f)
void detect_bb (cuc_func *f)
{
{
  int i, j, end_bb = 0, eb = 0;
  int i, j, end_bb = 0, eb = 0;
 
 
  /* Mark block starts/ends */
  /* Mark block starts/ends */
  for (i = 0; i < num_insn; i++) {
  for (i = 0; i < num_insn; i++) {
    if (end_bb) insn[i].type |= IT_BBSTART;
    if (end_bb) insn[i].type |= IT_BBSTART;
    end_bb = 0;
    end_bb = 0;
    if (insn[i].type & IT_BRANCH) {
    if (insn[i].type & IT_BRANCH) {
      int jt = insn[i].op[0];
      int jt = insn[i].op[0];
      insn[i].type |= IT_BBEND;
      insn[i].type |= IT_BBEND;
      end_bb = 1;
      end_bb = 1;
      if (jt < 0 || jt >= num_insn) {
      if (jt < 0 || jt >= num_insn) {
        fprintf (stderr, "Instruction #%i:Jump out of function '%s'.\n", i, insn[i].disasm);
        fprintf (stderr, "Instruction #%i:Jump out of function '%s'.\n", i, insn[i].disasm);
        exit (1);
        exit (1);
      }
      }
      if (jt > 0) insn[jt - 1].type |= IT_BBEND;
      if (jt > 0) insn[jt - 1].type |= IT_BBEND;
      insn[jt].type |= IT_BBSTART;
      insn[jt].type |= IT_BBSTART;
    }
    }
  }
  }
 
 
  /* Initialize bb array */
  /* Initialize bb array */
  insn[0].type |= IT_BBSTART;
  insn[0].type |= IT_BBSTART;
  insn[num_insn - 1].type |= IT_BBEND;
  insn[num_insn - 1].type |= IT_BBEND;
  f->num_bb = 0;
  f->num_bb = 0;
  for (i = 0; i < num_insn; i++) {
  for (i = 0; i < num_insn; i++) {
    if (insn[i].type & IT_BBSTART) {
    if (insn[i].type & IT_BBSTART) {
      f->bb[f->num_bb].first = i;
      f->bb[f->num_bb].first = i;
      f->bb[f->num_bb].cnt = 0;
      f->bb[f->num_bb].cnt = 0;
    }
    }
    /* Determine repetitions of a loop */
    /* Determine repetitions of a loop */
    if (insn[i].type & IT_BBEND) {
    if (insn[i].type & IT_BBEND) {
      f->bb[f->num_bb].type = 0;
      f->bb[f->num_bb].type = 0;
      f->bb[f->num_bb].last = i;
      f->bb[f->num_bb].last = i;
      f->bb[f->num_bb].next[0] = f->bb[f->num_bb].next[1] = -1;
      f->bb[f->num_bb].next[0] = f->bb[f->num_bb].next[1] = -1;
      f->bb[f->num_bb].tmp = 0;
      f->bb[f->num_bb].tmp = 0;
      f->bb[f->num_bb].ntim = 0;
      f->bb[f->num_bb].ntim = 0;
      f->num_bb++;
      f->num_bb++;
      assert (f->num_bb < MAX_BB);
      assert (f->num_bb < MAX_BB);
    }
    }
  }
  }
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INIT");
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INIT");
 
 
  /* Build forward connections between BBs */
  /* Build forward connections between BBs */
  for (i = 0; i < f->num_bb; i++)
  for (i = 0; i < f->num_bb; i++)
    if (insn[f->bb[i].last].type & IT_BRANCH) {
    if (insn[f->bb[i].last].type & IT_BRANCH) {
      int j;
      int j;
      assert (insn[f->bb[i].last].index == II_BF);
      assert (insn[f->bb[i].last].index == II_BF);
      /* Find block this instruction jumps to */
      /* Find block this instruction jumps to */
      for (j = 0; j < f->num_bb; j++)
      for (j = 0; j < f->num_bb; j++)
        if (f->bb[j].first == insn[f->bb[i].last].op[0]) break;
        if (f->bb[j].first == insn[f->bb[i].last].op[0]) break;
      assert (j < f->num_bb);
      assert (j < f->num_bb);
 
 
      /* Convert the jump address to BB link */
      /* Convert the jump address to BB link */
      insn[f->bb[i].last].op[0] = j; insn[f->bb[i].last].opt[0] = OPT_BB;
      insn[f->bb[i].last].op[0] = j; insn[f->bb[i].last].opt[0] = OPT_BB;
 
 
      /* Make a link */
      /* Make a link */
      f->bb[i].next[0] = j;
      f->bb[i].next[0] = j;
      if (++f->bb[j].tmp > 2) eb++;
      if (++f->bb[j].tmp > 2) eb++;
      f->bb[i].next[1] = i + 1;
      f->bb[i].next[1] = i + 1;
      if (++f->bb[i + 1].tmp > 2) eb++;
      if (++f->bb[i + 1].tmp > 2) eb++;
    } else if (f->bb[i].last == num_insn - 1) { /* Last instruction doesn't have to do anything */
    } else if (f->bb[i].last == num_insn - 1) { /* Last instruction doesn't have to do anything */
    } else {
    } else {
      f->bb[i].next[0] = i + 1;
      f->bb[i].next[0] = i + 1;
      if (++f->bb[i + 1].tmp > 2) eb++;
      if (++f->bb[i + 1].tmp > 2) eb++;
    }
    }
 
 
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_NEXT");
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_NEXT");
 
 
  /* Build backward connections, but first insert artificial blocks
  /* Build backward connections, but first insert artificial blocks
   * to handle more than 2 connections */
   * to handle more than 2 connections */
  cucdebug (6, "artificial %i %i\n", f->num_bb, eb);
  cucdebug (6, "artificial %i %i\n", f->num_bb, eb);
  end_bb = f->num_bb + eb;
  end_bb = f->num_bb + eb;
  for (i = f->num_bb - 1; i >= 0; i--) {
  for (i = f->num_bb - 1; i >= 0; i--) {
    j = f->bb[i].tmp;
    j = f->bb[i].tmp;
    if (f->bb[i].tmp > 2) f->bb[i].tmp = -f->bb[i].tmp;
    if (f->bb[i].tmp > 2) f->bb[i].tmp = -f->bb[i].tmp;
    f->bb[--end_bb] = f->bb[i];
    f->bb[--end_bb] = f->bb[i];
    reloc[i] = end_bb;
    reloc[i] = end_bb;
    while (j-- > 2) {
    while (j-- > 2) {
      f->bb[--end_bb].first = f->bb[i].first;
      f->bb[--end_bb].first = f->bb[i].first;
      f->bb[end_bb].last = -1;
      f->bb[end_bb].last = -1;
      f->bb[end_bb].next[0] = -1;
      f->bb[end_bb].next[0] = -1;
      f->bb[end_bb].next[1] = -1;
      f->bb[end_bb].next[1] = -1;
      f->bb[end_bb].tmp = 0;
      f->bb[end_bb].tmp = 0;
      f->bb[end_bb].cnt = f->bb[i].cnt;
      f->bb[end_bb].cnt = f->bb[i].cnt;
      f->bb[end_bb].ntim = 0;
      f->bb[end_bb].ntim = 0;
    }
    }
  }
  }
  f->num_bb += eb;
  f->num_bb += eb;
 
 
  /* relocate jump instructions */
  /* relocate jump instructions */
  for (i = 0; i < num_insn; i++)
  for (i = 0; i < num_insn; i++)
    for (j = 0; j < MAX_OPERANDS; j++)
    for (j = 0; j < MAX_OPERANDS; j++)
      if (insn[i].opt[j] & OPT_BB)
      if (insn[i].opt[j] & OPT_BB)
        insn[i].op[j] = reloc[insn[i].op[j]];
        insn[i].op[j] = reloc[insn[i].op[j]];
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT-reloc");
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT-reloc");
  for (i = 0; i < f->num_bb; i++) {
  for (i = 0; i < f->num_bb; i++) {
    if (f->bb[i].next[0] >= 0) {
    if (f->bb[i].next[0] >= 0) {
      int t = reloc[f->bb[i].next[0]];
      int t = reloc[f->bb[i].next[0]];
      if (f->bb[t].tmp < 0) {
      if (f->bb[t].tmp < 0) {
        f->bb[t].tmp = -f->bb[t].tmp;
        f->bb[t].tmp = -f->bb[t].tmp;
        t -= f->bb[t].tmp - 2;
        t -= f->bb[t].tmp - 2;
      } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2;
      } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2;
      f->bb[i].next[0] = t;
      f->bb[i].next[0] = t;
    }
    }
    if (f->bb[i].next[1] >= 0) {
    if (f->bb[i].next[1] >= 0) {
      int t = reloc[f->bb[i].next[1]];
      int t = reloc[f->bb[i].next[1]];
      if (f->bb[t].tmp < 0) {
      if (f->bb[t].tmp < 0) {
        f->bb[t].tmp = -f->bb[t].tmp;
        f->bb[t].tmp = -f->bb[t].tmp;
        t -= f->bb[t].tmp - 2;
        t -= f->bb[t].tmp - 2;
      } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2;
      } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2;
      f->bb[i].next[1] = t;
      f->bb[i].next[1] = t;
    }
    }
    /* artificial blocks do not have relocations, hardcode them */
    /* artificial blocks do not have relocations, hardcode them */
    if (f->bb[i].last < 0) f->bb[i].next[0] = i + 1;
    if (f->bb[i].last < 0) f->bb[i].next[0] = i + 1;
  }
  }
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT");
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT");
 
 
  /* Uncoditional branched do not continue to next block */
  /* Uncoditional branched do not continue to next block */
  for (i = 0; i < f->num_bb; i++) {
  for (i = 0; i < f->num_bb; i++) {
    cuc_insn *ii;
    cuc_insn *ii;
    if (f->bb[i].last < 0) continue;
    if (f->bb[i].last < 0) continue;
    ii = &insn[f->bb[i].last];
    ii = &insn[f->bb[i].last];
    /* Unconditional branch? */
    /* Unconditional branch? */
    if (ii->type & IT_BRANCH && ii->opt[1] & OPT_CONST) {
    if (ii->type & IT_BRANCH && ii->opt[1] & OPT_CONST) {
      change_insn_type (ii, II_NOP);
      change_insn_type (ii, II_NOP);
#if 0
#if 0
      if (f->bb[i].next[1] == i + 1) f->bb[i].next[0] = f->bb[i].next[1];
      if (f->bb[i].next[1] == i + 1) f->bb[i].next[0] = f->bb[i].next[1];
#endif
#endif
      f->bb[i].next[1] = -1;
      f->bb[i].next[1] = -1;
    }
    }
  }
  }
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_UNCOND_JUMP");
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_UNCOND_JUMP");
 
 
  /* Add backward connections */
  /* Add backward connections */
  for (i = 0; i < f->num_bb; i++)
  for (i = 0; i < f->num_bb; i++)
    f->bb[i].prev[0] = f->bb[i].prev[1] = -1;
    f->bb[i].prev[0] = f->bb[i].prev[1] = -1;
 
 
  for (i = 0; i < f->num_bb; i++) {
  for (i = 0; i < f->num_bb; i++) {
    if (f->bb[i].next[0] >= 0) {
    if (f->bb[i].next[0] >= 0) {
      int t = f->bb[i].next[0];
      int t = f->bb[i].next[0];
      if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i;
      if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i;
      else {
      else {
        assert (f->bb[t].prev[1] < 0);
        assert (f->bb[t].prev[1] < 0);
        f->bb[t].prev[1] = i;
        f->bb[t].prev[1] = i;
      }
      }
    }
    }
    if (f->bb[i].next[1] >= 0) {
    if (f->bb[i].next[1] >= 0) {
      int t = f->bb[i].next[1];
      int t = f->bb[i].next[1];
      if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i;
      if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i;
      else {
      else {
        assert (f->bb[t].prev[1] < 0);
        assert (f->bb[t].prev[1] < 0);
        f->bb[t].prev[1] = i;
        f->bb[t].prev[1] = i;
      }
      }
    }
    }
  }
  }
  /* Add START marker */
  /* Add START marker */
  assert (f->bb[0].prev[0] < 0);
  assert (f->bb[0].prev[0] < 0);
  f->bb[0].prev[0] = BBID_START;
  f->bb[0].prev[0] = BBID_START;
 
 
  /* Add END marker */
  /* Add END marker */
  assert (f->bb[f->num_bb - 1].next[0] < 0);
  assert (f->bb[f->num_bb - 1].next[0] < 0);
  assert (f->bb[f->num_bb - 1].next[1] < 0);
  assert (f->bb[f->num_bb - 1].next[1] < 0);
  f->bb[f->num_bb - 1].next[0] = BBID_END;
  f->bb[f->num_bb - 1].next[0] = BBID_END;
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_PREV");
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_PREV");
}
}
 
 
/* We do a quick check if there are some anomalies with references */
/* We do a quick check if there are some anomalies with references */
void cuc_check (cuc_func *f)
void cuc_check (cuc_func *f)
{
{
  int i, j, k;
  int i, j, k;
  cucdebug (1, "cuc_check\n");
  cucdebug (1, "cuc_check\n");
  for (i = 0; i < f->num_bb; i++) {
  for (i = 0; i < f->num_bb; i++) {
    if (!f->bb[i].insn && f->bb[i].ninsn) goto err;
    if (!f->bb[i].insn && f->bb[i].ninsn) goto err;
    for (j = 0; j < f->bb[i].ninsn; j++) {
    for (j = 0; j < f->bb[i].ninsn; j++) {
      cuc_insn *ii = &f->bb[i].insn[j];
      cuc_insn *ii = &f->bb[i].insn[j];
      if ((ii->index == II_CMOV || ii->index == II_ADD) && ii->type & IT_COND && ii->opt[0] & OPT_DEST) {
      if ((ii->index == II_CMOV || ii->index == II_ADD) && ii->type & IT_COND && ii->opt[0] & OPT_DEST) {
        k = 0;
        k = 0;
        assert (ii->opt[k] & OPT_REGISTER);
        assert (ii->opt[k] & OPT_REGISTER);
        if ((signed)ii->op[k] >= 0 && ii->op[k] != FLAG_REG && ii->op[k] != LRBB_REG) {
        if ((signed)ii->op[k] >= 0 && ii->op[k] != FLAG_REG && ii->op[k] != LRBB_REG) {
          cucdebug (1, "Invalid dest conditional type opt%x op%lx\n", ii->opt[0], ii->op[0]);
          cucdebug (1, "Invalid dest conditional type opt%x op%lx\n", ii->opt[0], ii->op[0]);
          goto err;
          goto err;
        }
        }
      }
      }
      for (k = 0; k < MAX_OPERANDS; k++) {
      for (k = 0; k < MAX_OPERANDS; k++) {
        if (ii->opt[k] & OPT_REF) {
        if (ii->opt[k] & OPT_REF) {
          int t = ii->op[k];
          int t = ii->op[k];
          if (REF_BB(t) >= f->num_bb || REF_I (t) >= f->bb[REF_BB(t)].ninsn
          if (REF_BB(t) >= f->num_bb || REF_I (t) >= f->bb[REF_BB(t)].ninsn
           || (ii->index == II_CMOV || ii->index == II_ADD) && (
           || (ii->index == II_CMOV || ii->index == II_ADD) && (
                (f->INSN(t).type & IT_COND) != (ii->type & IT_COND) && k < 3
                (f->INSN(t).type & IT_COND) != (ii->type & IT_COND) && k < 3
              || !(f->INSN(t).type & IT_COND) && k == 3)) {
              || !(f->INSN(t).type & IT_COND) && k == 3)) {
            cucdebug (1, "Conditional misused\n");
            cucdebug (1, "Conditional misused\n");
            goto err;
            goto err;
          }
          }
        }
        }
        if (k && ii->opt[k] & OPT_DEST) {
        if (k && ii->opt[k] & OPT_DEST) {
          cucdebug (1, "Destination only allowed for op0!\n");
          cucdebug (1, "Destination only allowed for op0!\n");
          goto err;
          goto err;
        }
        }
      }
      }
    }
    }
  }
  }
  return;
  return;
err:
err:
  cucdebug (1, "Anomaly detected at [%x_%x].%i\n", i, j, k);
  cucdebug (1, "Anomaly detected at [%x_%x].%i\n", i, j, k);
  print_cuc_bb (f, "ANOMALY");
  print_cuc_bb (f, "ANOMALY");
  cucdebug (1, "Anomaly detected at [%x_%x].%i\n", i, j, k);
  cucdebug (1, "Anomaly detected at [%x_%x].%i\n", i, j, k);
  exit (1);
  exit (1);
}
}
 
 
/* Build basic blocks */
/* Build basic blocks */
void build_bb (cuc_func *f)
void build_bb (cuc_func *f)
{
{
  int i, j, k;
  int i, j, k;
  for (i = 0; i < f->num_bb; i++) {
  for (i = 0; i < f->num_bb; i++) {
    if (f->bb[i].last < 0) f->bb[i].ninsn = MAX_REGS - 1;
    if (f->bb[i].last < 0) f->bb[i].ninsn = MAX_REGS - 1;
    else f->bb[i].ninsn = f->bb[i].last - f->bb[i].first + 1 + MAX_REGS - 1;
    else f->bb[i].ninsn = f->bb[i].last - f->bb[i].first + 1 + MAX_REGS - 1;
    assert (f->bb[i].ninsn >= MAX_REGS - 1);
    assert (f->bb[i].ninsn >= MAX_REGS - 1);
    f->bb[i].insn = (cuc_insn *) malloc (sizeof (cuc_insn) * f->bb[i].ninsn);
    f->bb[i].insn = (cuc_insn *) malloc (sizeof (cuc_insn) * f->bb[i].ninsn);
    assert (f->bb[i].insn);
    assert (f->bb[i].insn);
    f->bb[i].nmemory = 0;
    f->bb[i].nmemory = 0;
    f->bb[i].unrolled = 1;
    f->bb[i].unrolled = 1;
 
 
    /* Save space for conditional moves, exclude r0, place lrbb instead */
    /* Save space for conditional moves, exclude r0, place lrbb instead */
    change_insn_type (&f->bb[i].insn[0], II_LRBB);
    change_insn_type (&f->bb[i].insn[0], II_LRBB);
    strcpy (f->bb[i].insn[0].disasm, "lrbb");
    strcpy (f->bb[i].insn[0].disasm, "lrbb");
    f->bb[i].insn[0].type = IT_UNUSED | IT_COND;
    f->bb[i].insn[0].type = IT_UNUSED | IT_COND;
    f->bb[i].insn[0].dep = NULL;
    f->bb[i].insn[0].dep = NULL;
    f->bb[i].insn[0].op[0] = LRBB_REG; f->bb[i].insn[0].opt[0] = OPT_REGISTER | OPT_DEST;
    f->bb[i].insn[0].op[0] = LRBB_REG; f->bb[i].insn[0].opt[0] = OPT_REGISTER | OPT_DEST;
    f->bb[i].insn[0].opt[1] = OPT_LRBB;
    f->bb[i].insn[0].opt[1] = OPT_LRBB;
    f->bb[i].insn[0].opt[2] = f->bb[i].insn[0].opt[3] = OPT_NONE;
    f->bb[i].insn[0].opt[2] = f->bb[i].insn[0].opt[3] = OPT_NONE;
    for (j = 1; j < MAX_REGS - 1; j++) {
    for (j = 1; j < MAX_REGS - 1; j++) {
      change_insn_type (&f->bb[i].insn[j], II_CMOV);
      change_insn_type (&f->bb[i].insn[j], II_CMOV);
      strcpy (f->bb[i].insn[j].disasm, "cmov");
      strcpy (f->bb[i].insn[j].disasm, "cmov");
      f->bb[i].insn[j].type = j == FLAG_REG || j == LRBB_REG ? IT_COND : 0;
      f->bb[i].insn[j].type = j == FLAG_REG || j == LRBB_REG ? IT_COND : 0;
      f->bb[i].insn[j].dep = NULL;
      f->bb[i].insn[j].dep = NULL;
      f->bb[i].insn[j].opt[0] = f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_REGISTER;
      f->bb[i].insn[j].opt[0] = f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_REGISTER;
      f->bb[i].insn[j].opt[0] |= OPT_DEST;
      f->bb[i].insn[j].opt[0] |= OPT_DEST;
      f->bb[i].insn[j].op[0] = f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = j;
      f->bb[i].insn[j].op[0] = f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = j;
      f->bb[i].insn[j].op[3] = LRBB_REG; f->bb[i].insn[j].opt[3] = OPT_REGISTER;
      f->bb[i].insn[j].op[3] = LRBB_REG; f->bb[i].insn[j].opt[3] = OPT_REGISTER;
    }
    }
 
 
    /* Relocate instructions */
    /* Relocate instructions */
    for (j = MAX_REGS - 1; j < f->bb[i].ninsn; j++) {
    for (j = MAX_REGS - 1; j < f->bb[i].ninsn; j++) {
      f->bb[i].insn[j] = insn[f->bb[i].first + j - (MAX_REGS - 1)];
      f->bb[i].insn[j] = insn[f->bb[i].first + j - (MAX_REGS - 1)];
      for (k = 0; k < MAX_OPERANDS; k++)
      for (k = 0; k < MAX_OPERANDS; k++)
        if (f->bb[i].insn[j].opt[k] & OPT_REF) {
        if (f->bb[i].insn[j].opt[k] & OPT_REF) {
          int b1;
          int b1;
          for (b1 = 0; b1 < i; b1++)
          for (b1 = 0; b1 < i; b1++)
            if (f->bb[b1].first <= (signed) f->bb[i].insn[j].op[k]
            if (f->bb[b1].first <= (signed) f->bb[i].insn[j].op[k]
              && (signed)f->bb[i].insn[j].op[k] <= f->bb[b1].last) break;
              && (signed)f->bb[i].insn[j].op[k] <= f->bb[b1].last) break;
          assert (b1 < f->num_bb);
          assert (b1 < f->num_bb);
          f->bb[i].insn[j].op[k] = REF (b1, f->bb[i].insn[j].op[k] - f->bb[b1].first + MAX_REGS - 1);
          f->bb[i].insn[j].op[k] = REF (b1, f->bb[i].insn[j].op[k] - f->bb[b1].first + MAX_REGS - 1);
        }
        }
      if (f->bb[i].insn[j].type & IT_MEMORY) f->bb[i].nmemory++;
      if (f->bb[i].insn[j].type & IT_MEMORY) f->bb[i].nmemory++;
    }
    }
  }
  }
  cuc_check (f);
  cuc_check (f);
}
}
 
 
/* Does simplification on blocks A, B, C:
/* Does simplification on blocks A, B, C:
   A->B->C, A->C to just A->B->C */
   A->B->C, A->C to just A->B->C */
static void simplify_bb (cuc_func *f, int pred, int s1, int s2, int neg)
static void simplify_bb (cuc_func *f, int pred, int s1, int s2, int neg)
{
{
  cuc_insn *last;
  cuc_insn *last;
  int i;
  int i;
  if (cuc_debug >= 3) print_cuc_bb (f, "BEFORE_SIMPLIFY");
  if (cuc_debug >= 3) print_cuc_bb (f, "BEFORE_SIMPLIFY");
  cucdebug (3, "simplify %x->%x->%x (%i)\n", pred, s1, s2, neg);
  cucdebug (3, "simplify %x->%x->%x (%i)\n", pred, s1, s2, neg);
  assert (s2 != pred); /* Shouldn't occur => stupid */
  assert (s2 != pred); /* Shouldn't occur => stupid */
  f->bb[pred].next[1] = -1;
  f->bb[pred].next[1] = -1;
  f->bb[pred].next[0] = s1;
  f->bb[pred].next[0] = s1;
 
 
  if (f->bb[s2].prev[0] == pred) {
  if (f->bb[s2].prev[0] == pred) {
    f->bb[s2].prev[0] = f->bb[s2].prev[1];
    f->bb[s2].prev[0] = f->bb[s2].prev[1];
    f->bb[s2].prev[1] = -1;
    f->bb[s2].prev[1] = -1;
  } else if (f->bb[s2].prev[1] == pred) {
  } else if (f->bb[s2].prev[1] == pred) {
    f->bb[s2].prev[1] = -1;
    f->bb[s2].prev[1] = -1;
  } else assert (0);
  } else assert (0);
 
 
  last = &f->bb[pred].insn[f->bb[pred].ninsn - 1];
  last = &f->bb[pred].insn[f->bb[pred].ninsn - 1];
  assert (last->type & IT_BRANCH);
  assert (last->type & IT_BRANCH);
  for (i = 0; i < f->bb[s2].ninsn; i++) {
  for (i = 0; i < f->bb[s2].ninsn; i++) {
    cuc_insn *ii= &f->bb[s2].insn[i];
    cuc_insn *ii= &f->bb[s2].insn[i];
    if (ii->index == II_LRBB) {
    if (ii->index == II_LRBB) {
      change_insn_type (ii, II_CMOV);
      change_insn_type (ii, II_CMOV);
      ii->type = IT_COND;
      ii->type = IT_COND;
      ii->op[1] = neg ? 0 : 1; ii->opt[1] = OPT_CONST;
      ii->op[1] = neg ? 0 : 1; ii->opt[1] = OPT_CONST;
      ii->op[2] = neg ? 1 : 0; ii->opt[2] = OPT_CONST;
      ii->op[2] = neg ? 1 : 0; ii->opt[2] = OPT_CONST;
      ii->op[3] = last->op[1]; ii->opt[3] = last->opt[1];
      ii->op[3] = last->op[1]; ii->opt[3] = last->opt[1];
    }
    }
  }
  }
  change_insn_type (last, II_NOP);
  change_insn_type (last, II_NOP);
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_SIMPLIFY");
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_SIMPLIFY");
}
}
 
 
/* type == 0; keep predecessor condition
/* type == 0; keep predecessor condition
 * type == 1; keep successor condition
 * type == 1; keep successor condition
 * type == 2; join loop unrolled blocks */
 * type == 2; join loop unrolled blocks */
static void join_bb (cuc_func *f, int pred, int succ, int type)
static void join_bb (cuc_func *f, int pred, int succ, int type)
{
{
  int i, j, k, n1, n2, ninsn, add_cond = 0;
  int i, j, k, n1, n2, ninsn, add_cond = 0;
  unsigned long cond_op, cond_opt;
  unsigned long cond_op, cond_opt;
  cuc_insn *insn;
  cuc_insn *insn;
 
 
  if (cuc_debug) cuc_check (f);
  if (cuc_debug) cuc_check (f);
  cucdebug (3, "%x <= %x+%x (%i)\n", pred, pred, succ, type);
  cucdebug (3, "%x <= %x+%x (%i)\n", pred, pred, succ, type);
  cucdebug (3, "%x %x\n", f->bb[pred].ninsn, f->bb[succ].ninsn);
  cucdebug (3, "%x %x\n", f->bb[pred].ninsn, f->bb[succ].ninsn);
  if (cuc_debug >= 3) fflush (stdout);
  if (cuc_debug >= 3) fflush (stdout);
 
 
  n1 = f->bb[pred].ninsn;
  n1 = f->bb[pred].ninsn;
  n2 = f->bb[succ].ninsn;
  n2 = f->bb[succ].ninsn;
  if (n1 <= 0
  if (n1 <= 0
   || !(f->bb[pred].insn[n1 - 1].type & IT_BRANCH)) type = 1;
   || !(f->bb[pred].insn[n1 - 1].type & IT_BRANCH)) type = 1;
  if (type == 0 && f->bb[succ].prev[0] == f->bb[succ].next[0]) add_cond = 1;
  if (type == 0 && f->bb[succ].prev[0] == f->bb[succ].next[0]) add_cond = 1;
  if (type == 2) add_cond = 1;
  if (type == 2) add_cond = 1;
 
 
  //assert (f->bb[pred].next[0] == f->bb[succ].next[0] || type != 2); /* not supported */
  //assert (f->bb[pred].next[0] == f->bb[succ].next[0] || type != 2); /* not supported */
 
 
  ninsn = n1 + n2 + (type == 1 ? 0 : 1) + (add_cond ? MAX_REGS : 0);
  ninsn = n1 + n2 + (type == 1 ? 0 : 1) + (add_cond ? MAX_REGS : 0);
 
 
  insn = (cuc_insn *) malloc (ninsn * sizeof (cuc_insn));
  insn = (cuc_insn *) malloc (ninsn * sizeof (cuc_insn));
  for (i = 0; i < n1; i++) insn[i] = f->bb[pred].insn[i];
  for (i = 0; i < n1; i++) insn[i] = f->bb[pred].insn[i];
  /* when type == 0, we move the last (jump) instruction to the end */
  /* when type == 0, we move the last (jump) instruction to the end */
  if (type == 0 || type == 2) {
  if (type == 0 || type == 2) {
    /* Move first branch instruction to the end */
    /* Move first branch instruction to the end */
    assert (insn[n1 - 1].type & IT_BRANCH);
    assert (insn[n1 - 1].type & IT_BRANCH);
    insn[ninsn - 1] = insn[n1 - 1];
    insn[ninsn - 1] = insn[n1 - 1];
    cond_op = insn[n1 - 1].op[1];
    cond_op = insn[n1 - 1].op[1];
    cond_opt = insn[n1 - 1].opt[1];
    cond_opt = insn[n1 - 1].opt[1];
 
 
    /* Remove old branch */
    /* Remove old branch */
    change_insn_type (&insn[n1 - 1], II_NOP);
    change_insn_type (&insn[n1 - 1], II_NOP);
  }
  }
  /* Copy second block */
  /* Copy second block */
  for (i = 0; i < n2; i++) insn[i + n1] = f->bb[succ].insn[i];
  for (i = 0; i < n2; i++) insn[i + n1] = f->bb[succ].insn[i];
 
 
  /* and when type == 2, we may need to add sfor instruction, to quit when either is true */
  /* and when type == 2, we may need to add sfor instruction, to quit when either is true */
  if (type == 2) {
  if (type == 2) {
    /* Move second branch instruction to the end */
    /* Move second branch instruction to the end */
    if (insn[n1 + n2 - 1].type & IT_BRANCH) {
    if (insn[n1 + n2 - 1].type & IT_BRANCH) {
      insn[ninsn - 1] = insn[n1 + n2 - 1];
      insn[ninsn - 1] = insn[n1 + n2 - 1];
 
 
      /* Use conditional from cmov FLAG_REG, c_p, c_s, c_p */
      /* Use conditional from cmov FLAG_REG, c_p, c_s, c_p */
      insn[ninsn - 1].op[1] = REF (pred, n1 + n2 + FLAG_REG); insn[ninsn - 1].opt[1] = OPT_REF;
      insn[ninsn - 1].op[1] = REF (pred, n1 + n2 + FLAG_REG); insn[ninsn - 1].opt[1] = OPT_REF;
 
 
      /* Remove old one */
      /* Remove old one */
      change_insn_type (&insn[n1 + n2 - 1], II_NOP);
      change_insn_type (&insn[n1 + n2 - 1], II_NOP);
    } else change_insn_type (&insn[ninsn - 1], II_NOP); /* do not use branch slot */
    } else change_insn_type (&insn[ninsn - 1], II_NOP); /* do not use branch slot */
  }
  }
 
 
#if 1
#if 1
  /* LRBB at start of succ BB is not valid anymore */
  /* LRBB at start of succ BB is not valid anymore */
  if (n1 > 0 && insn[n1].index == II_LRBB) {
  if (n1 > 0 && insn[n1].index == II_LRBB) {
    if (type == 1) {
    if (type == 1) {
      /* We have two possibilities, how this could have happened:
      /* We have two possibilities, how this could have happened:
         1. we just moved second predecessor of succ to pred,
         1. we just moved second predecessor of succ to pred,
            pred now having two predecessors => everything is ok
            pred now having two predecessors => everything is ok
         2. we just moved second predecessor of succ to pred,
         2. we just moved second predecessor of succ to pred,
            now, having just one predecessor => LRBB is not needed anymore */
            now, having just one predecessor => LRBB is not needed anymore */
      if (f->bb[pred].prev[1] < 0) { /* handle second option */
      if (f->bb[pred].prev[1] < 0) { /* handle second option */
        change_insn_type (&insn[n1], II_ADD);
        change_insn_type (&insn[n1], II_ADD);
        insn[n1].op[1] = 1; insn[n1].opt[1] = OPT_CONST;
        insn[n1].op[1] = 1; insn[n1].opt[1] = OPT_CONST;
        insn[n1].op[2] = 0; insn[n1].opt[2] = OPT_CONST;
        insn[n1].op[2] = 0; insn[n1].opt[2] = OPT_CONST;
        insn[n1].opt[3] = OPT_NONE;
        insn[n1].opt[3] = OPT_NONE;
      }
      }
    } else {
    } else {
      assert (0); /* not tested yet */
      assert (0); /* not tested yet */
      change_insn_type (&insn[n1], II_NOP);
      change_insn_type (&insn[n1], II_NOP);
      for (i = n1; i < ninsn; i++)
      for (i = n1; i < ninsn; i++)
        if (insn[i].index == II_CMOV && insn[i].op[3] == REF (pred, n1)) {
        if (insn[i].index == II_CMOV && insn[i].op[3] == REF (pred, n1)) {
          assert (insn[i].opt[3] == OPT_REF);
          assert (insn[i].opt[3] == OPT_REF);
          insn[i].op[3] = cond_op;
          insn[i].op[3] = cond_op;
          insn[i].opt[3] = cond_opt;
          insn[i].opt[3] = cond_opt;
          if (f->bb[pred].next[0] != succ) {
          if (f->bb[pred].next[0] != succ) {
            unsigned long t; /* negate conditional -- exchange */
            unsigned long t; /* negate conditional -- exchange */
            assert (f->bb[pred].next[1] == succ);
            assert (f->bb[pred].next[1] == succ);
            t = insn[i].op[1];
            t = insn[i].op[1];
            insn[i].op[1] = insn[i].op[2];
            insn[i].op[1] = insn[i].op[2];
            insn[i].op[2] = t;
            insn[i].op[2] = t;
            t = insn[i].opt[1];
            t = insn[i].opt[1];
            insn[i].opt[1] = insn[i].opt[2];
            insn[i].opt[1] = insn[i].opt[2];
            insn[i].opt[2] = t;
            insn[i].opt[2] = t;
          }
          }
        }
        }
    }
    }
  }
  }
#endif
#endif
 
 
  for (i = 0; i < ninsn; i++) reloc[i] = -1;
  for (i = 0; i < ninsn; i++) reloc[i] = -1;
 
 
  /* Add conditional instructions if required */
  /* Add conditional instructions if required */
  if (add_cond) {
  if (add_cond) {
    recalc_last_used_reg (f, pred);
    recalc_last_used_reg (f, pred);
    recalc_last_used_reg (f, succ);
    recalc_last_used_reg (f, succ);
 
 
    /* r0 -- add nop for it */
    /* r0 -- add nop for it */
    change_insn_type (&insn[n1 + n2], II_NOP);
    change_insn_type (&insn[n1 + n2], II_NOP);
    for (i = 1; i < MAX_REGS; i++) {
    for (i = 1; i < MAX_REGS; i++) {
      cuc_insn *ii = &insn[n1 + n2 + i];
      cuc_insn *ii = &insn[n1 + n2 + i];
      int a = f->bb[pred].last_used_reg[i];
      int a = f->bb[pred].last_used_reg[i];
      int b = f->bb[succ].last_used_reg[i];
      int b = f->bb[succ].last_used_reg[i];
 
 
      /* We have deleted first branch instruction, now we must setup FLAG_REG,
      /* We have deleted first branch instruction, now we must setup FLAG_REG,
         to point to conditional */
         to point to conditional */
      if (i == FLAG_REG) {
      if (i == FLAG_REG) {
        change_insn_type (ii, II_CMOV);
        change_insn_type (ii, II_CMOV);
        ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
        ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
        ii->dep = NULL;
        ii->dep = NULL;
        ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
        ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
        ii->op[1] = cond_op; ii->opt[1] = cond_opt;
        ii->op[1] = cond_op; ii->opt[1] = cond_opt;
        if (b >= 0) {
        if (b >= 0) {
          ii->op[2] = b; ii->opt[2] = OPT_REF;
          ii->op[2] = b; ii->opt[2] = OPT_REF;
        } else {
        } else {
          ii->op[2] = cond_op; ii->opt[2] = cond_opt;
          ii->op[2] = cond_op; ii->opt[2] = cond_opt;
        }
        }
        ii->op[3] = cond_op; ii->opt[3] = cond_opt;
        ii->op[3] = cond_op; ii->opt[3] = cond_opt;
        reloc[REF_I(a)] = REF (pred, n1 + n2 + i);
        reloc[REF_I(a)] = REF (pred, n1 + n2 + i);
      } else if (b < 0) change_insn_type (ii, II_NOP);
      } else if (b < 0) change_insn_type (ii, II_NOP);
      else if (a < 0) {
      else if (a < 0) {
        change_insn_type (ii, II_ADD);
        change_insn_type (ii, II_ADD);
        ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
        ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
        ii->dep = NULL;
        ii->dep = NULL;
        ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
        ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
        ii->op[1] = b; ii->opt[1] = OPT_REF;
        ii->op[1] = b; ii->opt[1] = OPT_REF;
        ii->op[2] = 0; ii->opt[2] = OPT_CONST;
        ii->op[2] = 0; ii->opt[2] = OPT_CONST;
        ii->opt[3] = OPT_NONE;
        ii->opt[3] = OPT_NONE;
      } else if (b >= 0) {
      } else if (b >= 0) {
        change_insn_type (ii, II_CMOV);
        change_insn_type (ii, II_CMOV);
        ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
        ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
        ii->dep = NULL;
        ii->dep = NULL;
        ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
        ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
        ii->op[1] = a; ii->opt[1] = OPT_REF;
        ii->op[1] = a; ii->opt[1] = OPT_REF;
        ii->op[2] = b; ii->opt[2] = OPT_REF;
        ii->op[2] = b; ii->opt[2] = OPT_REF;
        ii->op[3] = cond_op; ii->opt[3] = cond_opt;
        ii->op[3] = cond_op; ii->opt[3] = cond_opt;
        reloc[REF_I(a)] = REF (pred, n1 + n2 + i);
        reloc[REF_I(a)] = REF (pred, n1 + n2 + i);
      }
      }
      sprintf (ii->disasm, "cmov (join BB)");
      sprintf (ii->disasm, "cmov (join BB)");
    }
    }
  }
  }
 
 
  if (cuc_debug) cuc_check (f);
  if (cuc_debug) cuc_check (f);
  i = 0;
  i = 0;
  switch (type) {
  switch (type) {
  case 0:
  case 0:
    assert (f->bb[pred].next[0] >= 0);
    assert (f->bb[pred].next[0] >= 0);
    if (f->bb[pred].next[0] == succ) f->bb[pred].next[0] = f->bb[succ].next[0];
    if (f->bb[pred].next[0] == succ) f->bb[pred].next[0] = f->bb[succ].next[0];
    if (f->bb[pred].next[1] == succ) f->bb[pred].next[1] = f->bb[succ].next[0];
    if (f->bb[pred].next[1] == succ) f->bb[pred].next[1] = f->bb[succ].next[0];
    break;
    break;
  case 1:
  case 1:
    assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END);
    assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END);
    f->bb[pred].next[0] = f->bb[succ].next[0];
    f->bb[pred].next[0] = f->bb[succ].next[0];
    f->bb[pred].next[1] = f->bb[succ].next[1];
    f->bb[pred].next[1] = f->bb[succ].next[1];
    break;
    break;
  case 2:
  case 2:
    assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END);
    assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END);
    f->bb[pred].next[0] = f->bb[succ].next[0];
    f->bb[pred].next[0] = f->bb[succ].next[0];
    f->bb[pred].next[1] = f->bb[succ].next[1];
    f->bb[pred].next[1] = f->bb[succ].next[1];
    break;
    break;
  }
  }
  if (f->bb[pred].next[0] < 0) f->bb[pred].next[0] = f->bb[pred].next[1];
  if (f->bb[pred].next[0] < 0) f->bb[pred].next[0] = f->bb[pred].next[1];
  if (f->bb[pred].next[0] == f->bb[pred].next[1]) f->bb[pred].next[1] = -1;
  if (f->bb[pred].next[0] == f->bb[pred].next[1]) f->bb[pred].next[1] = -1;
 
 
  if (type == 0) assert (f->bb[succ].next[1] < 0);
  if (type == 0) assert (f->bb[succ].next[1] < 0);
 
 
  /* We just did something stupid -- we joined two predecessors into one;
  /* We just did something stupid -- we joined two predecessors into one;
     succ may need the information from which block we came.  We will repair
     succ may need the information from which block we came.  We will repair
     this by converting LRBB to CMOV */
     this by converting LRBB to CMOV */
  for (j = 0; j < 2; j++) {
  for (j = 0; j < 2; j++) {
    int nb = f->bb[pred].next[j];
    int nb = f->bb[pred].next[j];
    int t;
    int t;
 
 
    /* check just valid connections */
    /* check just valid connections */
    if (nb < 0 || nb == BBID_END) continue;
    if (nb < 0 || nb == BBID_END) continue;
 
 
    /* check type */
    /* check type */
    if (f->bb[nb].prev[0] == pred && f->bb[nb].prev[1] == succ) t = 1;
    if (f->bb[nb].prev[0] == pred && f->bb[nb].prev[1] == succ) t = 1;
    else if (f->bb[nb].prev[1] == pred && f->bb[nb].prev[0] == succ) t = 0;
    else if (f->bb[nb].prev[1] == pred && f->bb[nb].prev[0] == succ) t = 0;
    else continue;
    else continue;
 
 
    /* check all LRBB instructions.  */
    /* check all LRBB instructions.  */
    for (i = 0; i < f->bb[nb].ninsn; i++)
    for (i = 0; i < f->bb[nb].ninsn; i++)
      if (f->bb[nb].insn[i].index == II_LRBB) {
      if (f->bb[nb].insn[i].index == II_LRBB) {
        cuc_insn *lrbb =&f->bb[nb].insn[i];
        cuc_insn *lrbb =&f->bb[nb].insn[i];
        change_insn_type (lrbb, II_CMOV);
        change_insn_type (lrbb, II_CMOV);
        lrbb->op[1] = t; lrbb->opt[1] = OPT_CONST;
        lrbb->op[1] = t; lrbb->opt[1] = OPT_CONST;
        lrbb->op[2] = 1 - t; lrbb->opt[2] = OPT_CONST;
        lrbb->op[2] = 1 - t; lrbb->opt[2] = OPT_CONST;
        lrbb->op[3] = cond_op; lrbb->opt[3] = cond_opt;
        lrbb->op[3] = cond_op; lrbb->opt[3] = cond_opt;
        lrbb->type |= IT_COND;
        lrbb->type |= IT_COND;
      }
      }
  }
  }
 
 
  f->bb[succ].type = BB_DEAD;
  f->bb[succ].type = BB_DEAD;
  //PRINTF (" %x %x %x %x %x\n", f->bb[pred].next[0], f->bb[pred].next[1], f->bb[succ].next[0], f->bb[succ].next[1], insn[ninsn - 1].type);
  //PRINTF (" %x %x %x %x %x\n", f->bb[pred].next[0], f->bb[pred].next[1], f->bb[succ].next[0], f->bb[succ].next[1], insn[ninsn - 1].type);
  /* remove branch instruction, if there is only one successor */
  /* remove branch instruction, if there is only one successor */
  if (f->bb[pred].next[1] < 0 && ninsn > 0 && insn[ninsn - 1].type & IT_BRANCH) {
  if (f->bb[pred].next[1] < 0 && ninsn > 0 && insn[ninsn - 1].type & IT_BRANCH) {
    assert (f->bb[pred].next[0] != pred); /* end BB, loop should not be possible */
    assert (f->bb[pred].next[0] != pred); /* end BB, loop should not be possible */
    change_insn_type (&insn[ninsn - 1], II_NOP);
    change_insn_type (&insn[ninsn - 1], II_NOP);
  }
  }
 
 
  /* Set max count */
  /* Set max count */
  if (f->bb[pred].cnt < f->bb[succ].cnt) f->bb[pred].cnt = f->bb[succ].cnt;
  if (f->bb[pred].cnt < f->bb[succ].cnt) f->bb[pred].cnt = f->bb[succ].cnt;
  f->bb[pred].ninsn = ninsn;
  f->bb[pred].ninsn = ninsn;
  f->bb[succ].ninsn = 0;
  f->bb[succ].ninsn = 0;
  free (f->bb[pred].insn); f->bb[pred].insn = NULL;
  free (f->bb[pred].insn); f->bb[pred].insn = NULL;
  free (f->bb[succ].insn); f->bb[succ].insn = NULL;
  free (f->bb[succ].insn); f->bb[succ].insn = NULL;
  f->bb[pred].insn = insn;
  f->bb[pred].insn = insn;
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
    if (f->bb[i].prev[0] == succ) f->bb[i].prev[0] = pred;
    if (f->bb[i].prev[0] == succ) f->bb[i].prev[0] = pred;
    if (f->bb[i].prev[1] == succ) f->bb[i].prev[1] = pred;
    if (f->bb[i].prev[1] == succ) f->bb[i].prev[1] = pred;
    if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1;
    if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1;
    for (j = 0; j < f->bb[i].ninsn; j++)
    for (j = 0; j < f->bb[i].ninsn; j++)
      for (k = 0; k < MAX_OPERANDS; k++)
      for (k = 0; k < MAX_OPERANDS; k++)
        if (f->bb[i].insn[j].opt[k] & OPT_REF) {
        if (f->bb[i].insn[j].opt[k] & OPT_REF) {
          /* Check if we are referencing successor BB -> relocate to second part of
          /* Check if we are referencing successor BB -> relocate to second part of
             the new block */
             the new block */
          if (REF_BB (f->bb[i].insn[j].op[k]) == succ) {
          if (REF_BB (f->bb[i].insn[j].op[k]) == succ) {
            int t = f->bb[i].insn[j].op[k];
            int t = f->bb[i].insn[j].op[k];
            int ndest = REF (pred, REF_I (t) + n1);
            int ndest = REF (pred, REF_I (t) + n1);
            //PRINTF ("%x: %x %x\n", REF(i, j), t, ndest);
            //PRINTF ("%x: %x %x\n", REF(i, j), t, ndest);
 
 
            /* We've found a reference to succ. block, being removed, relocate */
            /* We've found a reference to succ. block, being removed, relocate */
            f->bb[i].insn[j].op[k] = ndest;
            f->bb[i].insn[j].op[k] = ndest;
          } else if (REF_BB(f->bb[i].insn[j].op[k]) == pred) {
          } else if (REF_BB(f->bb[i].insn[j].op[k]) == pred) {
            if (i != pred && reloc[REF_I(f->bb[i].insn[j].op[k])] >= 0) {
            if (i != pred && reloc[REF_I(f->bb[i].insn[j].op[k])] >= 0) {
              f->bb[i].insn[j].op[k] = reloc[REF_I(f->bb[i].insn[j].op[k])];
              f->bb[i].insn[j].op[k] = reloc[REF_I(f->bb[i].insn[j].op[k])];
            }
            }
          }
          }
        }
        }
  }
  }
 
 
  if (cuc_debug) cuc_check (f);
  if (cuc_debug) cuc_check (f);
  if (cuc_debug >= 3) print_cuc_bb (f, "join");
  if (cuc_debug >= 3) print_cuc_bb (f, "join");
}
}
 
 
/* Optimize basic blocks */
/* Optimize basic blocks */
int optimize_bb (cuc_func *f)
int optimize_bb (cuc_func *f)
{
{
  int modified = 0;
  int modified = 0;
  int i, j;
  int i, j;
remove_lrbb:
remove_lrbb:
  /* we can remove lrbb instructions from blocks with just one predecessor */
  /* we can remove lrbb instructions from blocks with just one predecessor */
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* exactly one predecessor */
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* exactly one predecessor */
      for (j = 0; j < f->bb[i].ninsn; j++)
      for (j = 0; j < f->bb[i].ninsn; j++)
        if (f->bb[i].insn[j].index == II_LRBB) {
        if (f->bb[i].insn[j].index == II_LRBB) {
          cuc_insn *t;
          cuc_insn *t;
          cucdebug (4, "-lrbb %x.%x\n", i, j);
          cucdebug (4, "-lrbb %x.%x\n", i, j);
 
 
          /* Change to add LRBB, 0, 0 */
          /* Change to add LRBB, 0, 0 */
          change_insn_type (&f->bb[i].insn[j], II_ADD);
          change_insn_type (&f->bb[i].insn[j], II_ADD);
          f->bb[i].insn[j].type &= ~IT_VOLATILE;
          f->bb[i].insn[j].type &= ~IT_VOLATILE;
          f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_CONST;
          f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_CONST;
          f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = 0; /* always use left block */
          f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = 0; /* always use left block */
          f->bb[i].insn[j].opt[3] = OPT_NONE;
          f->bb[i].insn[j].opt[3] = OPT_NONE;
          modified = 1;
          modified = 1;
          if (f->bb[i].prev[0] != BBID_START && f->bb[f->bb[i].prev[0]].ninsn > 0) {
          if (f->bb[i].prev[0] != BBID_START && f->bb[f->bb[i].prev[0]].ninsn > 0) {
            t = &f->bb[f->bb[i].prev[0]].insn[f->bb[f->bb[i].prev[0]].ninsn - 1];
            t = &f->bb[f->bb[i].prev[0]].insn[f->bb[f->bb[i].prev[0]].ninsn - 1];
 
 
            /* If the predecessor still has a conditional jump instruction, we must be careful.
            /* If the predecessor still has a conditional jump instruction, we must be careful.
               If next[0] == next[1] join them. Now we will link lrbb and correct the situation */
               If next[0] == next[1] join them. Now we will link lrbb and correct the situation */
            if (t->type & IT_BRANCH) { /* We must set a reference to branch result */
            if (t->type & IT_BRANCH) { /* We must set a reference to branch result */
              f->bb[i].insn[j].opt[1] = t->opt[1];
              f->bb[i].insn[j].opt[1] = t->opt[1];
              f->bb[i].insn[j].op[1] = t->op[1];
              f->bb[i].insn[j].op[1] = t->op[1];
              /* sometimes branch is not needed anymore */
              /* sometimes branch is not needed anymore */
              if (f->bb[f->bb[i].prev[0]].next[1] < 0) change_insn_type (t, II_NOP);
              if (f->bb[f->bb[i].prev[0]].next[1] < 0) change_insn_type (t, II_NOP);
            }
            }
          }
          }
        }
        }
    }
    }
  }
  }
 
 
  /* Ordering of joining types is cruical -- we should concat all directly connected BBs
  /* Ordering of joining types is cruical -- we should concat all directly connected BBs
     together first, so when we do a type != 1 joining, we can remove LRBB, directly by
     together first, so when we do a type != 1 joining, we can remove LRBB, directly by
     looking at number of its predeccessors */
     looking at number of its predeccessors */
 
 
  /* Type 1 joining
  /* Type 1 joining
     1. link between pred & succ
     1. link between pred & succ
     2. no other pred's successors
     2. no other pred's successors
     3. no other succ's predecessors, except if pred has max one */
     3. no other succ's predecessors, except if pred has max one */
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
    int p = f->bb[i].prev[0];
    int p = f->bb[i].prev[0];
    if (p < 0 || p == BBID_START) continue;
    if (p < 0 || p == BBID_START) continue;
    /* one successor and max sum of 3 predecessors */
    /* one successor and max sum of 3 predecessors */
    if (f->bb[p].next[0] >= 0 && f->bb[p].next[1] < 0
    if (f->bb[p].next[0] >= 0 && f->bb[p].next[1] < 0
     && (f->bb[p].prev[1] < 0 || f->bb[i].prev[1] < 0)) {
     && (f->bb[p].prev[1] < 0 || f->bb[i].prev[1] < 0)) {
      /* First we will move all predecessors from succ to pred, and then we will do
      /* First we will move all predecessors from succ to pred, and then we will do
         real type 1 joining */
         real type 1 joining */
      if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START) {
      if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START) {
        int p1 = f->bb[i].prev[1];
        int p1 = f->bb[i].prev[1];
        /* joining is surely not worth another extra memory access */
        /* joining is surely not worth another extra memory access */
        if (f->bb[p].nmemory) continue;
        if (f->bb[p].nmemory) continue;
        if (f->bb[p].prev[0] >= 0) {
        if (f->bb[p].prev[0] >= 0) {
           assert (f->bb[p].prev[1] < 0);
           assert (f->bb[p].prev[1] < 0);
           f->bb[p].prev[1] = p1;
           f->bb[p].prev[1] = p1;
        } else f->bb[p].prev[0] = p1;
        } else f->bb[p].prev[0] = p1;
        if (f->bb[p1].next[0] == i) f->bb[p1].next[0] = p;
        if (f->bb[p1].next[0] == i) f->bb[p1].next[0] = p;
        else if (f->bb[p1].next[1] == i) f->bb[p1].next[1] = p;
        else if (f->bb[p1].next[1] == i) f->bb[p1].next[1] = p;
        else assert (0);
        else assert (0);
        f->bb[i].prev[1] = -1;
        f->bb[i].prev[1] = -1;
      }
      }
      assert (p >= 0 && f->bb[i].prev[1] < 0); /* one predecessor */
      assert (p >= 0 && f->bb[i].prev[1] < 0); /* one predecessor */
      join_bb (f, p, i, 1);
      join_bb (f, p, i, 1);
      modified = 1;
      modified = 1;
      goto remove_lrbb;
      goto remove_lrbb;
    }
    }
  }
  }
 
 
  /* Type 0 joining
  /* Type 0 joining
     1. link between pred & succ
     1. link between pred & succ
     2. no memory accesses in succ
     2. no memory accesses in succ
     3. optional pred's second successors
     3. optional pred's second successors
     4. max. one succ's successors */
     4. max. one succ's successors */
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START
     && f->bb[i].prev[1] < 0 /* one predecessor */
     && f->bb[i].prev[1] < 0 /* one predecessor */
     && f->bb[i].next[1] < 0 /* max. one successor */
     && f->bb[i].next[1] < 0 /* max. one successor */
     && f->bb[i].nmemory == 0) {                  /* and no memory acceses */
     && f->bb[i].nmemory == 0) {                  /* and no memory acceses */
      join_bb (f, f->bb[i].prev[0], i, 0);
      join_bb (f, f->bb[i].prev[0], i, 0);
      modified = 1;
      modified = 1;
      goto remove_lrbb;
      goto remove_lrbb;
    }
    }
 
 
  /* Type 2 joining
  /* Type 2 joining
     1. link between pred & succ
     1. link between pred & succ
     2. succ has exactly one predeccessor
     2. succ has exactly one predeccessor
     3. pred & succ share common successor
     3. pred & succ share common successor
     4. optional succ's second successor */
     4. optional succ's second successor */
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* one predecessor */
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* one predecessor */
      int p = f->bb[i].prev[0];
      int p = f->bb[i].prev[0];
      if (p == BBID_START) continue;
      if (p == BBID_START) continue;
#if 0 /* not yet supported */
#if 0 /* not yet supported */
      if (f->bb[p].next[0] == i
      if (f->bb[p].next[0] == i
       && (f->bb[i].next[1] == f->bb[p].next[1]
       && (f->bb[i].next[1] == f->bb[p].next[1]
        || f->bb[i].next[1] == f->bb[p].next[0])) {
        || f->bb[i].next[1] == f->bb[p].next[0])) {
        join_bb (f, p, i, 2);
        join_bb (f, p, i, 2);
        goto remove_lrbb;
        goto remove_lrbb;
      }
      }
#endif
#endif
      if (f->bb[p].next[1] == i
      if (f->bb[p].next[1] == i
       && (f->bb[p].next[0] == f->bb[i].next[1]
       && (f->bb[p].next[0] == f->bb[i].next[1]
        || f->bb[p].next[0] == f->bb[i].next[0])) {
        || f->bb[p].next[0] == f->bb[i].next[0])) {
        join_bb (f, p, i, 2);
        join_bb (f, p, i, 2);
        modified = 1;
        modified = 1;
        goto remove_lrbb;
        goto remove_lrbb;
      }
      }
    }
    }
 
 
  /* BB simplify:
  /* BB simplify:
     1. a block has exactly 2 successors A and B
     1. a block has exactly 2 successors A and B
     2. A has exactly one successor -- B
     2. A has exactly one successor -- B
     3. A has no memory accesses
     3. A has no memory accesses
     to:
     to:
     flow always goes though A, LRBB is replaced by current block conditional
     flow always goes though A, LRBB is replaced by current block conditional
    */
    */
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
    if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END
    if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END
      && f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END) {
      && f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END) {
      int a = f->bb[i].next[0];
      int a = f->bb[i].next[0];
      int b = f->bb[i].next[1];
      int b = f->bb[i].next[1];
      int neg = 0;
      int neg = 0;
      /* Exchange? */
      /* Exchange? */
      if (f->bb[b].next[0] == a && f->bb[b].next[1] < 0) {
      if (f->bb[b].next[0] == a && f->bb[b].next[1] < 0) {
        int t = a;
        int t = a;
        a = b;
        a = b;
        b = t;
        b = t;
        neg = 1;
        neg = 1;
      }
      }
      /* Do the simplification if possible */
      /* Do the simplification if possible */
      if (f->bb[a].next[0] == b && f->bb[a].next[1] < 0
      if (f->bb[a].next[0] == b && f->bb[a].next[1] < 0
       && f->bb[a].nmemory == 0) {
       && f->bb[a].nmemory == 0) {
        simplify_bb (f, i, a, b, neg);
        simplify_bb (f, i, a, b, neg);
        modified = 1;
        modified = 1;
        goto remove_lrbb;
        goto remove_lrbb;
      }
      }
    }
    }
 
 
  return modified;
  return modified;
}
}
 
 
/* Removes BBs marked as dead */
/* Removes BBs marked as dead */
int remove_dead_bb (cuc_func *f)
int remove_dead_bb (cuc_func *f)
{
{
  int i, j, k, d = 0;
  int i, j, k, d = 0;
 
 
  for (i = 0; i < f->num_bb; i++) if (f->bb[i].type & BB_DEAD) {
  for (i = 0; i < f->num_bb; i++) if (f->bb[i].type & BB_DEAD) {
    if (f->bb[i].insn) free (f->bb[i].insn);
    if (f->bb[i].insn) free (f->bb[i].insn);
    f->bb[i].insn = NULL;
    f->bb[i].insn = NULL;
    reloc[i] = -1;
    reloc[i] = -1;
  } else {
  } else {
    reloc[i] = d;
    reloc[i] = d;
    f->bb[d++] = f->bb[i];
    f->bb[d++] = f->bb[i];
  }
  }
  if (f->num_bb == d) return 0;
  if (f->num_bb == d) return 0;
  f->num_bb = d;
  f->num_bb = d;
 
 
  /* relocate initial blocks */
  /* relocate initial blocks */
  for (i = 0; i < f->num_init_bb; i++)
  for (i = 0; i < f->num_init_bb; i++)
    f->init_bb_reloc[i] = reloc[f->init_bb_reloc[i]];
    f->init_bb_reloc[i] = reloc[f->init_bb_reloc[i]];
 
 
  /* repair references */
  /* repair references */
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
          cucdebug (5, "%x %x %x %x %x\n", i, f->bb[i].prev[0], f->bb[i].prev[1], f->bb[i].next[0], f->bb[i].next[1]);
          cucdebug (5, "%x %x %x %x %x\n", i, f->bb[i].prev[0], f->bb[i].prev[1], f->bb[i].next[0], f->bb[i].next[1]);
          fflush (stdout);
          fflush (stdout);
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START)
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START)
      assert ((f->bb[i].prev[0] = reloc[f->bb[i].prev[0]]) >= 0);
      assert ((f->bb[i].prev[0] = reloc[f->bb[i].prev[0]]) >= 0);
    if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START)
    if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START)
      assert ((f->bb[i].prev[1] = reloc[f->bb[i].prev[1]]) >= 0);
      assert ((f->bb[i].prev[1] = reloc[f->bb[i].prev[1]]) >= 0);
    if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END)
    if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END)
      assert ((f->bb[i].next[0] = reloc[f->bb[i].next[0]]) >= 0);
      assert ((f->bb[i].next[0] = reloc[f->bb[i].next[0]]) >= 0);
    if (f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END)
    if (f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END)
      assert ((f->bb[i].next[1] = reloc[f->bb[i].next[1]]) >= 0);
      assert ((f->bb[i].next[1] = reloc[f->bb[i].next[1]]) >= 0);
    if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1;
    if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1;
    if (f->bb[i].next[0] == f->bb[i].next[1]) f->bb[i].next[1] = -1;
    if (f->bb[i].next[0] == f->bb[i].next[1]) f->bb[i].next[1] = -1;
 
 
    for (j = 0; j < f->bb[i].ninsn; j++)
    for (j = 0; j < f->bb[i].ninsn; j++)
      for (k = 0; k < MAX_OPERANDS; k++)
      for (k = 0; k < MAX_OPERANDS; k++)
        if ((f->bb[i].insn[j].opt[k] & OPT_BB) &&
        if ((f->bb[i].insn[j].opt[k] & OPT_BB) &&
                                        ((signed)f->bb[i].insn[j].op[k] >= 0)) {
                                        ((signed)f->bb[i].insn[j].op[k] >= 0)) {
          if (f->bb[i].insn[j].op[k] != BBID_END)
          if (f->bb[i].insn[j].op[k] != BBID_END)
            assert ((f->bb[i].insn[j].op[k] = reloc[f->bb[i].insn[j].op[k]]) >= 0);
            assert ((f->bb[i].insn[j].op[k] = reloc[f->bb[i].insn[j].op[k]]) >= 0);
        } else if (f->bb[i].insn[j].opt[k] & OPT_REF) {
        } else if (f->bb[i].insn[j].opt[k] & OPT_REF) {
          int t = f->bb[i].insn[j].op[k];
          int t = f->bb[i].insn[j].op[k];
          assert (reloc[REF_BB(t)] >= 0);
          assert (reloc[REF_BB(t)] >= 0);
          f->bb[i].insn[j].op[k] = REF (reloc[REF_BB(t)], REF_I (t));
          f->bb[i].insn[j].op[k] = REF (reloc[REF_BB(t)], REF_I (t));
        }
        }
  }
  }
  return 1;
  return 1;
}
}
 
 
/* Recursive calculation of dependencies */
/* Recursive calculation of dependencies */
static int reg_dep_rec (cuc_func *f, int cur)
static int reg_dep_rec (cuc_func *f, int cur)
{
{
  int i, j;
  int i, j;
  cuc_insn *insn = f->bb[cur].insn;
  cuc_insn *insn = f->bb[cur].insn;
 
 
  //PRINTF ("\n %i", cur); 
  //PRINTF ("\n %i", cur); 
  /* Spread only, do not loop */
  /* Spread only, do not loop */
  if (f->bb[cur].tmp) return;
  if (f->bb[cur].tmp) return;
  f->bb[cur].tmp = 1;
  f->bb[cur].tmp = 1;
  //PRINTF ("!   ");
  //PRINTF ("!   ");
 
 
  for (i = 0; i < f->bb[cur].ninsn; i++) {
  for (i = 0; i < f->bb[cur].ninsn; i++) {
    /* Check for destination operand(s) */
    /* Check for destination operand(s) */
    for (j = 0; j < MAX_OPERANDS; j++) if (insn[i].opt[j] & OPT_DEST)
    for (j = 0; j < MAX_OPERANDS; j++) if (insn[i].opt[j] & OPT_DEST)
      if ((insn[i].opt[j] & ~OPT_DEST) == OPT_REGISTER && (signed)insn[i].op[j] >= 0) {
      if ((insn[i].opt[j] & ~OPT_DEST) == OPT_REGISTER && (signed)insn[i].op[j] >= 0) {
        //PRINTF ("%i:%i,%x ", insn[i].op[j], i, REF (cur, i));
        //PRINTF ("%i:%i,%x ", insn[i].op[j], i, REF (cur, i));
        assert (insn[i].op[j] > 0 && insn[i].op[j] < MAX_REGS); /* r0 should never be dest */
        assert (insn[i].op[j] > 0 && insn[i].op[j] < MAX_REGS); /* r0 should never be dest */
        f->bb[cur].last_used_reg[insn[i].op[j]] = REF (cur, i);
        f->bb[cur].last_used_reg[insn[i].op[j]] = REF (cur, i);
      }
      }
  }
  }
 
 
  if (f->bb[cur].next[0] >= 0 && f->bb[cur].next[0] != BBID_END)
  if (f->bb[cur].next[0] >= 0 && f->bb[cur].next[0] != BBID_END)
    reg_dep_rec (f, f->bb[cur].next[0]);
    reg_dep_rec (f, f->bb[cur].next[0]);
  if (f->bb[cur].next[1] >= 0 && f->bb[cur].next[1] != BBID_END)
  if (f->bb[cur].next[1] >= 0 && f->bb[cur].next[1] != BBID_END)
    reg_dep_rec (f, f->bb[cur].next[1]);
    reg_dep_rec (f, f->bb[cur].next[1]);
}
}
 
 
/* Detect register dependencies */
/* Detect register dependencies */
void reg_dep (cuc_func *f)
void reg_dep (cuc_func *f)
{
{
  int i, b, c;
  int i, b, c;
 
 
  /* Set dead blocks */
  /* Set dead blocks */
  for (b = 0; b < f->num_bb; b++) {
  for (b = 0; b < f->num_bb; b++) {
    f->bb[b].tmp = 0;
    f->bb[b].tmp = 0;
    for (i = 0; i < MAX_REGS; i++) f->bb[b].last_used_reg[i] = -1;
    for (i = 0; i < MAX_REGS; i++) f->bb[b].last_used_reg[i] = -1;
  }
  }
 
 
  /* Start with first block and set dependecies of all reachable blocks */
  /* Start with first block and set dependecies of all reachable blocks */
  /* At the same time set last_used_regs */
  /* At the same time set last_used_regs */
  reg_dep_rec (f, 0);
  reg_dep_rec (f, 0);
 
 
  for (i = 0; i < f->num_bb; i++)
  for (i = 0; i < f->num_bb; i++)
    if (f->bb[i].tmp) f->bb[i].tmp = 0;
    if (f->bb[i].tmp) f->bb[i].tmp = 0;
    else f->bb[i].type |= BB_DEAD;
    else f->bb[i].type |= BB_DEAD;
 
 
  /* Detect loops; mark BBs where loops must be broken */
  /* Detect loops; mark BBs where loops must be broken */
  for (c = 0; c < f->num_bb; c++) {
  for (c = 0; c < f->num_bb; c++) {
    int min = 3, minb;
    int min = 3, minb;
 
 
    /* search though all non-visited for minimum number of unvisited predecessors */
    /* search though all non-visited for minimum number of unvisited predecessors */
    for (b = 0; b < f->num_bb; b++) if (!f->bb[b].tmp) {
    for (b = 0; b < f->num_bb; b++) if (!f->bb[b].tmp) {
      int tmp = 0;
      int tmp = 0;
      if (f->bb[b].prev[0] >= 0 && f->bb[b].prev[0] != BBID_START
      if (f->bb[b].prev[0] >= 0 && f->bb[b].prev[0] != BBID_START
       && !f->bb[f->bb[b].prev[0]].tmp) tmp++;
       && !f->bb[f->bb[b].prev[0]].tmp) tmp++;
      if (f->bb[b].prev[1] >= 0 && f->bb[b].prev[1] != BBID_START
      if (f->bb[b].prev[1] >= 0 && f->bb[b].prev[1] != BBID_START
       && !f->bb[f->bb[b].prev[1]].tmp) tmp++;
       && !f->bb[f->bb[b].prev[1]].tmp) tmp++;
      if (tmp < min) {
      if (tmp < min) {
        minb = b;
        minb = b;
        min = tmp;
        min = tmp;
        if (tmp == 0) break; /* We already have the best one */
        if (tmp == 0) break; /* We already have the best one */
      }
      }
    }
    }
    b = minb;
    b = minb;
    f->bb[b].tmp = 1; /* Mark visited */
    f->bb[b].tmp = 1; /* Mark visited */
    cucdebug (3, "minb %i min %i\n", minb, min);
    cucdebug (3, "minb %i min %i\n", minb, min);
    if (min) { /* We just broke the loop */
    if (min) { /* We just broke the loop */
      f->bb[b].type |= BB_INLOOP;
      f->bb[b].type |= BB_INLOOP;
    }
    }
  }
  }
 
 
  /* Set real predecessors in cmov instructions to previous blocks */
  /* Set real predecessors in cmov instructions to previous blocks */
  for (b = 0; b < f->num_bb; b++)
  for (b = 0; b < f->num_bb; b++)
    for (i = 1; i < MAX_REGS - 1; i++) {
    for (i = 1; i < MAX_REGS - 1; i++) {
      int pa, pb;
      int pa, pb;
      assert (f->bb[b].insn[i].index ==  II_CMOV);
      assert (f->bb[b].insn[i].index ==  II_CMOV);
      assert (f->bb[b].insn[i].opt[0] == OPT_REGISTER | OPT_DEST);
      assert (f->bb[b].insn[i].opt[0] == OPT_REGISTER | OPT_DEST);
      assert (f->bb[b].insn[i].op[0] == i);
      assert (f->bb[b].insn[i].op[0] == i);
      if (f->bb[b].prev[0] < 0 || f->bb[b].prev[0] == BBID_START) pa = -1;
      if (f->bb[b].prev[0] < 0 || f->bb[b].prev[0] == BBID_START) pa = -1;
      else pa = f->bb[f->bb[b].prev[0]].last_used_reg[i];
      else pa = f->bb[f->bb[b].prev[0]].last_used_reg[i];
      if (f->bb[b].prev[1] < 0 || f->bb[b].prev[1] == BBID_START) pb = -1;
      if (f->bb[b].prev[1] < 0 || f->bb[b].prev[1] == BBID_START) pb = -1;
      else pb = f->bb[f->bb[b].prev[1]].last_used_reg[i];
      else pb = f->bb[f->bb[b].prev[1]].last_used_reg[i];
 
 
      /* We do some very simple optimizations right away to make things more readable */
      /* We do some very simple optimizations right away to make things more readable */
      if (pa < 0 && pb < 0) {
      if (pa < 0 && pb < 0) {
        /* Was not used at all */
        /* Was not used at all */
        change_insn_type (&f->bb[b].insn[i], II_ADD);
        change_insn_type (&f->bb[b].insn[i], II_ADD);
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
        f->bb[b].insn[i].opt[3] = OPT_NONE;
        f->bb[b].insn[i].opt[3] = OPT_NONE;
      } else if (pa < 0) {
      } else if (pa < 0) {
        change_insn_type (&f->bb[b].insn[i], II_ADD);
        change_insn_type (&f->bb[b].insn[i], II_ADD);
        assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST));
        assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST));
        f->bb[b].insn[i].op[1] = pb; f->bb[b].insn[i].opt[1] = OPT_REF;
        f->bb[b].insn[i].op[1] = pb; f->bb[b].insn[i].opt[1] = OPT_REF;
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
        f->bb[b].insn[i].opt[3] = OPT_NONE;
        f->bb[b].insn[i].opt[3] = OPT_NONE;
      } else if (pb < 0) {
      } else if (pb < 0) {
        change_insn_type (&f->bb[b].insn[i], II_ADD);
        change_insn_type (&f->bb[b].insn[i], II_ADD);
        assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST));
        assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST));
        f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF;
        f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF;
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
        f->bb[b].insn[i].opt[3] = OPT_NONE;
        f->bb[b].insn[i].opt[3] = OPT_NONE;
      } else {
      } else {
        int t = REF (b, 0); /* lrbb should be first instruction */
        int t = REF (b, 0); /* lrbb should be first instruction */
        assert (f->INSN(t).index == II_LRBB);
        assert (f->INSN(t).index == II_LRBB);
 
 
        f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF;
        f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF;
        assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST));
        assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST));
 
 
        f->bb[b].insn[i].op[2] = pb; f->bb[b].insn[i].opt[2] = OPT_REF;
        f->bb[b].insn[i].op[2] = pb; f->bb[b].insn[i].opt[2] = OPT_REF;
        assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST));
        assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST));
 
 
        /* Update op[3] -- flag register */
        /* Update op[3] -- flag register */
        assert (f->bb[b].insn[i].opt[3] == OPT_REGISTER);
        assert (f->bb[b].insn[i].opt[3] == OPT_REGISTER);
        assert (f->bb[b].insn[i].op[3] == LRBB_REG);
        assert (f->bb[b].insn[i].op[3] == LRBB_REG);
        assert (t >= 0);
        assert (t >= 0);
        f->bb[b].insn[i].opt[3] = OPT_REF; /* Convert already used regs to references */
        f->bb[b].insn[i].opt[3] = OPT_REF; /* Convert already used regs to references */
        f->bb[b].insn[i].op[3] = t;
        f->bb[b].insn[i].op[3] = t;
        assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST));
        assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST));
      }
      }
    }
    }
 
 
  /* assign register references */
  /* assign register references */
  for (b = 0; b < f->num_bb; b++) {
  for (b = 0; b < f->num_bb; b++) {
    /* rebuild last used reg array */
    /* rebuild last used reg array */
    f->bb[b].last_used_reg[0] = -1;
    f->bb[b].last_used_reg[0] = -1;
    if (f->bb[b].insn[0].index == II_LRBB) f->bb[b].last_used_reg[LRBB_REG] = 0;
    if (f->bb[b].insn[0].index == II_LRBB) f->bb[b].last_used_reg[LRBB_REG] = 0;
    else f->bb[b].last_used_reg[LRBB_REG] = -1;
    else f->bb[b].last_used_reg[LRBB_REG] = -1;
 
 
    for (i = 1; i < MAX_REGS - 1; i++)
    for (i = 1; i < MAX_REGS - 1; i++)
      f->bb[b].last_used_reg[i] = -1;
      f->bb[b].last_used_reg[i] = -1;
 
 
    /* Create references */
    /* Create references */
    for (i = 0; i < f->bb[b].ninsn; i++) {
    for (i = 0; i < f->bb[b].ninsn; i++) {
      int k;
      int k;
      /* Check for source operands first */
      /* Check for source operands first */
      for (k = 0; k < MAX_OPERANDS; k++) {
      for (k = 0; k < MAX_OPERANDS; k++) {
        if (!(f->bb[b].insn[i].opt[k] & OPT_DEST)) {
        if (!(f->bb[b].insn[i].opt[k] & OPT_DEST)) {
          if (f->bb[b].insn[i].opt[k] & OPT_REGISTER) {
          if (f->bb[b].insn[i].opt[k] & OPT_REGISTER) {
            int t = f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]];
            int t = f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]];
 
 
            if (f->bb[b].insn[i].op[k] == 0) { /* Convert r0 to const0 */
            if (f->bb[b].insn[i].op[k] == 0) { /* Convert r0 to const0 */
              f->bb[b].insn[i].opt[k] = OPT_CONST;
              f->bb[b].insn[i].opt[k] = OPT_CONST;
              f->bb[b].insn[i].op[k] = 0;
              f->bb[b].insn[i].op[k] = 0;
            } else if (t >= 0) {
            } else if (t >= 0) {
              f->bb[b].insn[i].opt[k] = OPT_REF; /* Convert already used regs to references */
              f->bb[b].insn[i].opt[k] = OPT_REF; /* Convert already used regs to references */
              f->bb[b].insn[i].op[k] = t;
              f->bb[b].insn[i].op[k] = t;
              assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST));
              assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST));
              //f->INSN(t).op[0] = -1;
              //f->INSN(t).op[0] = -1;
            }
            }
          } else if (f->bb[b].insn[i].opt[k] & OPT_REF) {
          } else if (f->bb[b].insn[i].opt[k] & OPT_REF) {
            //f->INSN(f->bb[b].insn[i].op[k]).op[0] = -1; /* Mark referenced */
            //f->INSN(f->bb[b].insn[i].op[k]).op[0] = -1; /* Mark referenced */
            f->INSN(f->bb[b].insn[i].op[k]).type &= ~IT_UNUSED;
            f->INSN(f->bb[b].insn[i].op[k]).type &= ~IT_UNUSED;
          }
          }
        }
        }
      }
      }
 
 
      /* Now check for destination operand(s) */
      /* Now check for destination operand(s) */
      for (k = 0; k < MAX_OPERANDS; k++) if (f->bb[b].insn[i].opt[k] & OPT_DEST)
      for (k = 0; k < MAX_OPERANDS; k++) if (f->bb[b].insn[i].opt[k] & OPT_DEST)
        if ((f->bb[b].insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER
        if ((f->bb[b].insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER
          && (int)f->bb[b].insn[i].op[k] >= 0) {
          && (int)f->bb[b].insn[i].op[k] >= 0) {
          assert (f->bb[b].insn[i].op[k] != 0); /* r0 should never be dest */
          assert (f->bb[b].insn[i].op[k] != 0); /* r0 should never be dest */
          f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]] = REF (b, i);
          f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]] = REF (b, i);
        }
        }
    }
    }
  }
  }
 
 
  /* Remove all unused lrbb */
  /* Remove all unused lrbb */
  for (b = 0; b < f->num_bb; b++)
  for (b = 0; b < f->num_bb; b++)
    for (i = 0; i < f->bb[b].ninsn; i++)
    for (i = 0; i < f->bb[b].ninsn; i++)
      if (f->bb[b].insn[i].type & IT_UNUSED) change_insn_type (&f->bb[b].insn[i], II_NOP);
      if (f->bb[b].insn[i].type & IT_UNUSED) change_insn_type (&f->bb[b].insn[i], II_NOP);
 
 
  /* SSAs with final register value are marked as outputs */
  /* SSAs with final register value are marked as outputs */
  assert (f->bb[f->num_bb - 1].next[0] == BBID_END);
  assert (f->bb[f->num_bb - 1].next[0] == BBID_END);
  for (i = 0; i < MAX_REGS; i++) if (!caller_saved[i]) {
  for (i = 0; i < MAX_REGS; i++) if (!caller_saved[i]) {
    int t = f->bb[f->num_bb - 1].last_used_reg[i];
    int t = f->bb[f->num_bb - 1].last_used_reg[i];
    /* Mark them volatile, so optimizer does not remove them */
    /* Mark them volatile, so optimizer does not remove them */
    if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT;
    if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT;
  }
  }
  {
  {
    int t = f->bb[f->num_bb - 1].last_used_reg[i];
    int t = f->bb[f->num_bb - 1].last_used_reg[i];
    /* Mark them volatile, so optimizer does not remove them */
    /* Mark them volatile, so optimizer does not remove them */
    if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT;
    if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT;
  }
  }
}
}
 
 
/* split the BB, based on the group numbers in .tmp */
/* split the BB, based on the group numbers in .tmp */
void expand_bb (cuc_func *f, int b)
void expand_bb (cuc_func *f, int b)
{
{
  int n = f->num_bb;
  int n = f->num_bb;
  int mg = 0;
  int mg = 0;
  int b1, i, j;
  int b1, i, j;
 
 
  for (i = 0; i < f->bb[b].ninsn; i++)
  for (i = 0; i < f->bb[b].ninsn; i++)
    if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp;
    if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp;
 
 
  /* Create copies */
  /* Create copies */
  for (b1 = 1; b1 <= mg; b1++) {
  for (b1 = 1; b1 <= mg; b1++) {
    assert (f->num_bb < MAX_BB);
    assert (f->num_bb < MAX_BB);
    cpy_bb (&f->bb[f->num_bb], &f->bb[b]);
    cpy_bb (&f->bb[f->num_bb], &f->bb[b]);
    f->num_bb++;
    f->num_bb++;
  }
  }
 
 
  /* Relocate */
  /* Relocate */
  for (b1 = 0; b1 < f->num_bb; b1++)
  for (b1 = 0; b1 < f->num_bb; b1++)
    for (i = 0; i < f->bb[b1].ninsn; i++) {
    for (i = 0; i < f->bb[b1].ninsn; i++) {
      dep_list *d = f->bb[b1].insn[i].dep;
      dep_list *d = f->bb[b1].insn[i].dep;
      for (j = 0; j < MAX_OPERANDS; j++)
      for (j = 0; j < MAX_OPERANDS; j++)
        if (f->bb[b1].insn[i].opt[j] & OPT_REF) {
        if (f->bb[b1].insn[i].opt[j] & OPT_REF) {
          int t = f->bb[b1].insn[i].op[j];
          int t = f->bb[b1].insn[i].op[j];
          if (REF_BB(t) == b && f->INSN(t).tmp != 0)
          if (REF_BB(t) == b && f->INSN(t).tmp != 0)
            f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t));
            f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t));
        }
        }
      while (d) {
      while (d) {
        if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0)
        if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0)
          d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref));
          d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref));
        d = d->next;
        d = d->next;
      }
      }
    }
    }
 
 
  /* Delete unused instructions */
  /* Delete unused instructions */
  for (j = 0; j <= mg; j++) {
  for (j = 0; j <= mg; j++) {
    if (j == 0) b1 = b;
    if (j == 0) b1 = b;
    else b1 = n + j - 1;
    else b1 = n + j - 1;
    for (i = 0; i < f->bb[b1].ninsn; i++) {
    for (i = 0; i < f->bb[b1].ninsn; i++) {
      if (f->bb[b1].insn[i].tmp != j)
      if (f->bb[b1].insn[i].tmp != j)
        change_insn_type (&f->bb[b1].insn[i], II_NOP);
        change_insn_type (&f->bb[b1].insn[i], II_NOP);
      f->bb[b1].insn[i].tmp = 0;
      f->bb[b1].insn[i].tmp = 0;
    }
    }
    if (j < mg) {
    if (j < mg) {
      f->bb[b1].next[0] = n + j;
      f->bb[b1].next[0] = n + j;
      f->bb[b1].next[1] = -1;
      f->bb[b1].next[1] = -1;
      f->bb[n + j].prev[0] = b1;
      f->bb[n + j].prev[0] = b1;
      f->bb[n + j].prev[1] = -1;
      f->bb[n + j].prev[1] = -1;
    } else {
    } else {
      i = f->bb[b1].next[0];
      i = f->bb[b1].next[0];
      f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1;
      f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1;
      f->bb[n + j].prev[1] = -1;
      f->bb[n + j].prev[1] = -1;
      if (i >= 0 && i != BBID_END) {
      if (i >= 0 && i != BBID_END) {
        if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
        if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
        if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
        if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
      }
      }
      i = f->bb[b1].next[1];
      i = f->bb[b1].next[1];
      if (i >= 0 && i != BBID_END) {
      if (i >= 0 && i != BBID_END) {
        if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
        if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
        if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
        if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
      }
      }
    }
    }
  }
  }
}
}
 
 
/* Scans sequence of BBs and set bb[].cnt */
/* Scans sequence of BBs and set bb[].cnt */
void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename)
void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename)
{
{
  FILE *fi, *fo;
  FILE *fi, *fo;
  struct mprofentry_struct *buf;
  struct mprofentry_struct *buf;
  const int bufsize = 256;
  const int bufsize = 256;
  unsigned long *bb_start;
  unsigned long *bb_start;
  unsigned long *bb_end;
  unsigned long *bb_end;
  int b, i, r;
  int b, i, r;
  int curbb, prevbb = -1;
  int curbb, prevbb = -1;
  unsigned long addr = -1;
  unsigned long addr = -1;
  unsigned long prevaddr = -1;
  unsigned long prevaddr = -1;
  int mssum = 0;
  int mssum = 0;
  int mlsum = 0;
  int mlsum = 0;
  int mscnt = 0;
  int mscnt = 0;
  int mlcnt = 0;
  int mlcnt = 0;
  int reopened = 0;
  int reopened = 0;
 
 
  /* Use already opened stream? */
  /* Use already opened stream? */
  if (runtime.sim.fmprof) {
  if (runtime.sim.fmprof) {
    fi = runtime.sim.fmprof;
    fi = runtime.sim.fmprof;
    reopened = 1;
    reopened = 1;
    rewind (fi);
    rewind (fi);
  } else assert (fi = fopen (mp_filename, "rb"));
  } else assert (fi = fopen (mp_filename, "rb"));
  assert (fo = fopen (bb_filename, "wb+"));
  assert (fo = fopen (bb_filename, "wb+"));
 
 
  assert (bb_start = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb));
  assert (bb_start = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb));
  assert (bb_end = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb));
  assert (bb_end = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb));
  for (b = 0; b < f->num_bb; b++) {
  for (b = 0; b < f->num_bb; b++) {
    bb_start[b] = f->start_addr + f->bb[b].first * 4;
    bb_start[b] = f->start_addr + f->bb[b].first * 4;
    bb_end[b] = f->start_addr + f->bb[b].last * 4;
    bb_end[b] = f->start_addr + f->bb[b].last * 4;
    //PRINTF ("%i %x %x\n", b, bb_start[b], bb_end[b]);
    //PRINTF ("%i %x %x\n", b, bb_start[b], bb_end[b]);
    f->bb[0].cnt = 0;
    f->bb[0].cnt = 0;
  }
  }
 
 
  buf = (struct mprofentry_struct *) malloc (sizeof (struct mprofentry_struct) * bufsize);
  buf = (struct mprofentry_struct *) malloc (sizeof (struct mprofentry_struct) * bufsize);
  assert (buf);
  assert (buf);
 
 
  //PRINTF ("BBSEQ:\n");
  //PRINTF ("BBSEQ:\n");
  do {
  do {
    r = fread (buf, sizeof (struct mprofentry_struct), bufsize, fi);
    r = fread (buf, sizeof (struct mprofentry_struct), bufsize, fi);
    //PRINTF ("r%i : ", r);
    //PRINTF ("r%i : ", r);
    for (i = 0; i < r; i++) {
    for (i = 0; i < r; i++) {
      if (buf[i].type & MPROF_FETCH) {
      if (buf[i].type & MPROF_FETCH) {
        //PRINTF ("%x, ", buf[i].addr);
        //PRINTF ("%x, ", buf[i].addr);
        if (buf[i].addr >= f->start_addr && buf[i].addr <= f->end_addr) {
        if (buf[i].addr >= f->start_addr && buf[i].addr <= f->end_addr) {
          assert (buf[i].type & MPROF_32);
          assert (buf[i].type & MPROF_32);
          prevaddr = addr;
          prevaddr = addr;
          addr = buf[i].addr;
          addr = buf[i].addr;
          for (b = 0; b < f->num_bb; b++)
          for (b = 0; b < f->num_bb; b++)
            if (bb_start[b] <= addr && addr <= bb_end[b]) break;
            if (bb_start[b] <= addr && addr <= bb_end[b]) break;
          assert (b < f->num_bb);
          assert (b < f->num_bb);
          curbb = b;
          curbb = b;
          if (prevaddr + 4 != addr) prevbb = -1;
          if (prevaddr + 4 != addr) prevbb = -1;
        } else curbb = -1;
        } else curbb = -1;
 
 
#warning TODO: do not count interrupts
#warning TODO: do not count interrupts
        if (curbb != prevbb && curbb >= 0) {
        if (curbb != prevbb && curbb >= 0) {
          fwrite (&curbb, sizeof (unsigned long), 1, fo);
          fwrite (&curbb, sizeof (unsigned long), 1, fo);
          //PRINTF (" [%i] ", curbb);
          //PRINTF (" [%i] ", curbb);
          f->bb[curbb].cnt++;
          f->bb[curbb].cnt++;
          prevbb = curbb;
          prevbb = curbb;
        }
        }
      } else {
      } else {
        if (verify_memoryarea(buf[i].addr)) {
        if (verify_memoryarea(buf[i].addr)) {
          if (buf[i].type & MPROF_WRITE) mscnt++, mssum += cur_area->delayw;
          if (buf[i].type & MPROF_WRITE) mscnt++, mssum += cur_area->delayw;
          else mlcnt++, mlsum += cur_area->delayr;
          else mlcnt++, mlsum += cur_area->delayr;
        }
        }
      }
      }
    }
    }
    //PRINTF ("\n");
    //PRINTF ("\n");
  } while (r == bufsize);
  } while (r == bufsize);
  //PRINTF ("\n");
  //PRINTF ("\n");
 
 
  runtime.cuc.mdelay[0] = (1. * mlsum) / mlcnt;
  runtime.cuc.mdelay[0] = (1. * mlsum) / mlcnt;
  runtime.cuc.mdelay[1] = (1. * mssum) / mscnt;
  runtime.cuc.mdelay[1] = (1. * mssum) / mscnt;
  runtime.cuc.mdelay[2] = runtime.cuc.mdelay[3] = 1;
  runtime.cuc.mdelay[2] = runtime.cuc.mdelay[3] = 1;
  f->num_runs = f->bb[0].cnt;
  f->num_runs = f->bb[0].cnt;
  if (!reopened) fclose (fi);
  if (!reopened) fclose (fi);
  fclose (fo);
  fclose (fo);
  free (buf);
  free (buf);
  free (bb_end);
  free (bb_end);
  free (bb_start);
  free (bb_start);
 
 
  /* Initialize basic block relocations */
  /* Initialize basic block relocations */
  f->num_init_bb = f->num_bb;
  f->num_init_bb = f->num_bb;
  //PRINTF ("num_init_bb = %i\n", f->num_init_bb);
  //PRINTF ("num_init_bb = %i\n", f->num_init_bb);
  assert (f->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb));
  assert (f->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb));
  for (b = 0; b < f->num_init_bb; b++) f->init_bb_reloc[b] = b;
  for (b = 0; b < f->num_init_bb; b++) f->init_bb_reloc[b] = b;
}
}
 
 
/* Scans sequence of BBs and set counts for pre/unrolled loop for BB b */
/* Scans sequence of BBs and set counts for pre/unrolled loop for BB b */
void count_bb_seq (cuc_func *f, int b, char *bb_filename, int *counts, int preroll, int unroll)
void count_bb_seq (cuc_func *f, int b, char *bb_filename, int *counts, int preroll, int unroll)
{
{
  FILE *fi;
  FILE *fi;
  const int bufsize = 256;
  const int bufsize = 256;
  int i, r;
  int i, r;
  int *buf;
  int *buf;
  int cnt = 0;
  int cnt = 0;
  int times = preroll - 1 + unroll;
  int times = preroll - 1 + unroll;
 
 
  assert (fi = fopen (bb_filename, "rb"));
  assert (fi = fopen (bb_filename, "rb"));
  for (i = 0; i < times; i++) counts[i] = 0;
  for (i = 0; i < times; i++) counts[i] = 0;
  assert (buf = (int *) malloc (sizeof (int) * bufsize));
  assert (buf = (int *) malloc (sizeof (int) * bufsize));
 
 
  do {
  do {
    r = fread (buf, sizeof (int), bufsize, fi);
    r = fread (buf, sizeof (int), bufsize, fi);
    for (i = 0; i < r; i++) {
    for (i = 0; i < r; i++) {
      /* count consecutive acesses */
      /* count consecutive acesses */
      if (f->init_bb_reloc[buf[i]] == b) {
      if (f->init_bb_reloc[buf[i]] == b) {
        counts[cnt]++;
        counts[cnt]++;
        if (++cnt >= times) cnt = preroll - 1;
        if (++cnt >= times) cnt = preroll - 1;
      } else cnt = 0;
      } else cnt = 0;
    }
    }
  } while (r == bufsize);
  } while (r == bufsize);
 
 
  log ("Counts %i,%i :", preroll, unroll);
  log ("Counts %i,%i :", preroll, unroll);
  for (i = 0; i < times; i++) log ("%x ", counts[i]);
  for (i = 0; i < times; i++) log ("%x ", counts[i]);
  log ("\n");
  log ("\n");
 
 
  fclose (fi);
  fclose (fi);
  free (buf);
  free (buf);
}
}
 
 
/* relocate all accesses inside of BB b to back/fwd */
/* relocate all accesses inside of BB b to back/fwd */
static void relocate_bb (cuc_bb *bb, int b, int back, int fwd)
static void relocate_bb (cuc_bb *bb, int b, int back, int fwd)
{
{
  int i, j;
  int i, j;
  for (i = 0; i < bb->ninsn; i++)
  for (i = 0; i < bb->ninsn; i++)
    for (j = 0; j < MAX_OPERANDS; j++)
    for (j = 0; j < MAX_OPERANDS; j++)
      if (bb->insn[i].opt[j] & OPT_REF
      if (bb->insn[i].opt[j] & OPT_REF
       && REF_BB (bb->insn[i].op[j]) == b) {
       && REF_BB (bb->insn[i].op[j]) == b) {
        int t = REF_I (bb->insn[i].op[j]);
        int t = REF_I (bb->insn[i].op[j]);
        if (t < i) bb->insn[i].op[j] = REF (back, t);
        if (t < i) bb->insn[i].op[j] = REF (back, t);
        else bb->insn[i].op[j] = REF (fwd, t);
        else bb->insn[i].op[j] = REF (fwd, t);
      }
      }
}
}
 
 
/* Preroll if type == 1 or unroll if type == 0 loop in BB b `ntimes' times and return
/* Preroll if type == 1 or unroll if type == 0 loop in BB b `ntimes' times and return
   new function. Original function is unmodified. */
   new function. Original function is unmodified. */
static cuc_func *roll_loop (cuc_func *f, int b, int ntimes, int type)
static cuc_func *roll_loop (cuc_func *f, int b, int ntimes, int type)
{
{
  int b1, t, i, prevb, prevart_b;
  int b1, t, i, prevb, prevart_b;
  cuc_func *n = dup_func (f);
  cuc_func *n = dup_func (f);
  cuc_bb *ob = &f->bb[b];
  cuc_bb *ob = &f->bb[b];
  cuc_insn *ii;
  cuc_insn *ii;
 
 
  assert (ntimes > 1);
  assert (ntimes > 1);
  cucdebug (3, "roll type = %i, BB%i x %i (num_bb %i)\n", type, b, ntimes, n->num_bb);
  cucdebug (3, "roll type = %i, BB%i x %i (num_bb %i)\n", type, b, ntimes, n->num_bb);
  ntimes--;
  ntimes--;
  assert (n->num_bb + ntimes * 2 < MAX_BB);
  assert (n->num_bb + ntimes * 2 < MAX_BB);
 
 
  prevb = b;
  prevb = b;
  prevart_b = b;
  prevart_b = b;
 
 
  /* point to first artificial block */
  /* point to first artificial block */
  if (n->bb[b].next[0] != b) {
  if (n->bb[b].next[0] != b) {
    n->bb[b].next[0] = n->num_bb + 1;
    n->bb[b].next[0] = n->num_bb + 1;
  } else if (n->bb[b].next[1] != b) {
  } else if (n->bb[b].next[1] != b) {
    n->bb[b].next[1] = n->num_bb + 1;
    n->bb[b].next[1] = n->num_bb + 1;
  }
  }
 
 
  /* Duplicate the BB */
  /* Duplicate the BB */
  for (t = 0; t < ntimes; t++) {
  for (t = 0; t < ntimes; t++) {
    cuc_bb *pb = &n->bb[prevart_b];
    cuc_bb *pb = &n->bb[prevart_b];
    /* Add new block and set links */
    /* Add new block and set links */
    b1 = n->num_bb++;
    b1 = n->num_bb++;
    cpy_bb (&n->bb[b1], ob);
    cpy_bb (&n->bb[b1], ob);
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
    n->bb[b1].type &= ~BB_INLOOP;
    n->bb[b1].type &= ~BB_INLOOP;
    print_cuc_bb (n, "prerollA");
    print_cuc_bb (n, "prerollA");
 
 
    printf ("prevb %i b1 %i prevart %i\n", prevb, b1, prevart_b);
    printf ("prevb %i b1 %i prevart %i\n", prevb, b1, prevart_b);
    /* Set predecessor's successor */
    /* Set predecessor's successor */
    if (n->bb[prevb].next[0] == b) {
    if (n->bb[prevb].next[0] == b) {
      n->bb[prevb].next[0] = b1;
      n->bb[prevb].next[0] = b1;
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
      else pb->next[1] = b1 + 1;
      else pb->next[1] = b1 + 1;
      n->bb[b1].next[1] = b1 + 1;
      n->bb[b1].next[1] = b1 + 1;
    } else if (n->bb[prevb].next[1] == b) {
    } else if (n->bb[prevb].next[1] == b) {
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
      else pb->next[1] = b1 + 1;
      else pb->next[1] = b1 + 1;
      n->bb[b1].next[0] = b1 + 1;
      n->bb[b1].next[0] = b1 + 1;
      n->bb[prevb].next[1] = b1;
      n->bb[prevb].next[1] = b1;
    } else assert (0);
    } else assert (0);
 
 
    /* Set predecessor */
    /* Set predecessor */
    n->bb[b1].prev[0] = prevb;
    n->bb[b1].prev[0] = prevb;
    n->bb[b1].prev[1] = -1;
    n->bb[b1].prev[1] = -1;
 
 
    /* Relocate backward references to current instance and forward references
    /* Relocate backward references to current instance and forward references
       to previous one */
       to previous one */
    relocate_bb (&n->bb[b1], b, b1, prevb);
    relocate_bb (&n->bb[b1], b, b1, prevb);
 
 
    /* add artificial block, just to join accesses */
    /* add artificial block, just to join accesses */
    b1 = n->num_bb++;
    b1 = n->num_bb++;
    cpy_bb (&n->bb[b1], ob);
    cpy_bb (&n->bb[b1], ob);
    n->bb[b1].cnt = 0;
    n->bb[b1].cnt = 0;
 
 
    for (i = 0; i < ob->ninsn - 1; i++) {
    for (i = 0; i < ob->ninsn - 1; i++) {
      ii = &n->bb[b1].insn[i];
      ii = &n->bb[b1].insn[i];
      if (ob->insn[i].opt[0] & OPT_DEST) {
      if (ob->insn[i].opt[0] & OPT_DEST) {
        change_insn_type (ii, II_CMOV);
        change_insn_type (ii, II_CMOV);
        ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
        ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
        ii->op[1] = REF (prevart_b, i); ii->opt[1] = OPT_REF;
        ii->op[1] = REF (prevart_b, i); ii->opt[1] = OPT_REF;
        ii->op[2] = REF (b1 - 1, i); ii->opt[2] = OPT_REF;
        ii->op[2] = REF (b1 - 1, i); ii->opt[2] = OPT_REF;
 
 
        /* Take left one, if we should have finished the first iteration*/
        /* Take left one, if we should have finished the first iteration*/
        if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
        if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
          ii->op[3] = pb->insn[pb->ninsn - 1].op[1]; ii->opt[3] = pb->insn[pb->ninsn - 1].opt[1];
          ii->op[3] = pb->insn[pb->ninsn - 1].op[1]; ii->opt[3] = pb->insn[pb->ninsn - 1].opt[1];
        } else {
        } else {
          assert (pb->insn[pb->ninsn - 1].type & IT_COND);
          assert (pb->insn[pb->ninsn - 1].type & IT_COND);
          ii->op[3] = REF (prevart_b, pb->ninsn - 1); ii->opt[3] = OPT_REF;
          ii->op[3] = REF (prevart_b, pb->ninsn - 1); ii->opt[3] = OPT_REF;
        }
        }
        ii->dep = NULL;
        ii->dep = NULL;
        ii->type = ob->insn[i].type & IT_COND;
        ii->type = ob->insn[i].type & IT_COND;
      } else {
      } else {
        change_insn_type (ii, II_NOP);
        change_insn_type (ii, II_NOP);
      }
      }
    }
    }
 
 
    /* Add conditional or instruction at the end, prioritizing flags */
    /* Add conditional or instruction at the end, prioritizing flags */
    ii = &n->bb[b1].insn[ob->ninsn - 1];
    ii = &n->bb[b1].insn[ob->ninsn - 1];
    change_insn_type (ii, II_CMOV);
    change_insn_type (ii, II_CMOV);
    ii->op[0] = FLAG_REG; ii->opt[0] = OPT_REGISTER | OPT_DEST;
    ii->op[0] = FLAG_REG; ii->opt[0] = OPT_REGISTER | OPT_DEST;
    if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
    if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
      ii->op[1] = pb->insn[pb->ninsn - 1].op[1];
      ii->op[1] = pb->insn[pb->ninsn - 1].op[1];
      ii->opt[1] = pb->insn[pb->ninsn - 1].opt[1];
      ii->opt[1] = pb->insn[pb->ninsn - 1].opt[1];
    } else {
    } else {
      ii->op[1] = REF (prevart_b, pb->ninsn - 1);
      ii->op[1] = REF (prevart_b, pb->ninsn - 1);
      ii->opt[1] = OPT_REF;
      ii->opt[1] = OPT_REF;
    }
    }
    if (n->bb[b1 - 1].insn[pb->ninsn - 1].type & IT_BRANCH) {
    if (n->bb[b1 - 1].insn[pb->ninsn - 1].type & IT_BRANCH) {
      ii->op[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].op[1];
      ii->op[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].op[1];
      ii->opt[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].opt[1];
      ii->opt[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].opt[1];
    } else {
    } else {
      ii->op[2] = REF (b1 - 1, pb->ninsn - 1);
      ii->op[2] = REF (b1 - 1, pb->ninsn - 1);
      ii->opt[2] = OPT_REF;
      ii->opt[2] = OPT_REF;
    }
    }
    /* {z = x || y;} is same as {z = x ? x : y;} */
    /* {z = x || y;} is same as {z = x ? x : y;} */
    ii->op[3] = ii->op[1]; ii->opt[3] = ii->opt[1];
    ii->op[3] = ii->op[1]; ii->opt[3] = ii->opt[1];
    ii->type = IT_COND;
    ii->type = IT_COND;
 
 
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
    n->bb[b1].type &= ~BB_INLOOP;
    n->bb[b1].type &= ~BB_INLOOP;
    n->bb[b1].prev[0] = prevart_b;
    n->bb[b1].prev[0] = prevart_b;
    n->bb[b1].prev[1] = b1 - 1;
    n->bb[b1].prev[1] = b1 - 1;
    n->bb[b1].next[0] = -1;
    n->bb[b1].next[0] = -1;
    n->bb[b1].next[1] = -1;
    n->bb[b1].next[1] = -1;
 
 
    prevb = b1 - 1;
    prevb = b1 - 1;
    prevart_b = b1;
    prevart_b = b1;
    print_cuc_bb (n, "prerollB");
    print_cuc_bb (n, "prerollB");
  }
  }
 
 
  print_cuc_bb (n, "preroll0");
  print_cuc_bb (n, "preroll0");
  n->bb[prevart_b].next[0] = ob->next[0] == b ? ob->next[1] : ob->next[0];
  n->bb[prevart_b].next[0] = ob->next[0] == b ? ob->next[1] : ob->next[0];
 
 
  print_cuc_bb (n, "preroll1");
  print_cuc_bb (n, "preroll1");
  /* repair BB after loop, to point back to latest artificial BB */
  /* repair BB after loop, to point back to latest artificial BB */
  b1 = n->bb[prevart_b].next[0];
  b1 = n->bb[prevart_b].next[0];
  if (b1 >= 0 && b1 != BBID_END) {
  if (b1 >= 0 && b1 != BBID_END) {
    if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = prevart_b;
    if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = prevart_b;
    else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = prevart_b;
    else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = prevart_b;
    else assert (0);
    else assert (0);
  }
  }
 
 
  if (type) {
  if (type) {
    /* Relink to itself */
    /* Relink to itself */
    /* Set predecessor's successor */
    /* Set predecessor's successor */
    if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = prevb;
    if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = prevb;
    else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = prevb;
    else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = prevb;
    else assert (0);
    else assert (0);
    n->bb[prevb].prev[1] = prevb;
    n->bb[prevb].prev[1] = prevb;
 
 
    /* Set predecessor */
    /* Set predecessor */
    if (n->bb[b].prev[0] == b) {
    if (n->bb[b].prev[0] == b) {
      n->bb[b].prev[0] = n->bb[b].prev[1];
      n->bb[b].prev[0] = n->bb[b].prev[1];
      n->bb[b].prev[1] = -1;
      n->bb[b].prev[1] = -1;
    } else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = -1;
    } else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = -1;
    else assert (0);
    else assert (0);
  } else {
  } else {
    /* Relink back to start of the loop */
    /* Relink back to start of the loop */
    /* Set predecessor's successor */
    /* Set predecessor's successor */
    if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = b;
    if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = b;
    else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = b;
    else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = b;
    else assert (0);
    else assert (0);
 
 
    /* Set predecessor */
    /* Set predecessor */
    if (n->bb[b].prev[0] == b) n->bb[b].prev[0] = prevb;
    if (n->bb[b].prev[0] == b) n->bb[b].prev[0] = prevb;
    else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = prevb;
    else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = prevb;
    else assert (0);
    else assert (0);
  }
  }
 
 
  print_cuc_bb (n, "preroll2");
  print_cuc_bb (n, "preroll2");
 
 
  /* Relocate backward references to current instance and forward references
  /* Relocate backward references to current instance and forward references
     to previous one */
     to previous one */
  relocate_bb (&n->bb[b], b, b, prevb);
  relocate_bb (&n->bb[b], b, b, prevb);
 
 
  /* Relocate all other blocks to point to latest prevart_b */
  /* Relocate all other blocks to point to latest prevart_b */
  for (i = 0; i < f->num_bb; i++)
  for (i = 0; i < f->num_bb; i++)
    if (i != b) relocate_bb (&n->bb[i], b, prevart_b, prevart_b);
    if (i != b) relocate_bb (&n->bb[i], b, prevart_b, prevart_b);
 
 
  return n;
  return n;
}
}
 
 
/* Unroll loop b unroll times and return new function. Original
/* Unroll loop b unroll times and return new function. Original
   function is unmodified. */
   function is unmodified. */
cuc_func *preunroll_loop (cuc_func *f, int b, int preroll, int unroll, char *bb_filename)
cuc_func *preunroll_loop (cuc_func *f, int b, int preroll, int unroll, char *bb_filename)
{
{
  int b1, i;
  int b1, i;
  cuc_func *n, *t;
  cuc_func *n, *t;
  int *counts;
  int *counts;
 
 
  if (preroll > 1) {
  if (preroll > 1) {
    t = roll_loop (f, b, preroll, 1);
    t = roll_loop (f, b, preroll, 1);
    b1 = t->num_bb - 2;
    b1 = t->num_bb - 2;
    if (unroll > 1) {
    if (unroll > 1) {
      //print_cuc_bb (t, "preunroll1");
      //print_cuc_bb (t, "preunroll1");
      n = roll_loop (t, b1, unroll, 0);
      n = roll_loop (t, b1, unroll, 0);
      free_func (t);
      free_func (t);
    } else n = t;
    } else n = t;
  } else {
  } else {
    b1 = b;
    b1 = b;
    if (unroll > 1) n = roll_loop (f, b1, unroll, 0);
    if (unroll > 1) n = roll_loop (f, b1, unroll, 0);
    else return dup_func (f);
    else return dup_func (f);
  }
  }
 
 
  /* Assign new counts to functions */
  /* Assign new counts to functions */
  assert (counts = (int *)malloc (sizeof (int) * (preroll - 1 + unroll)));
  assert (counts = (int *)malloc (sizeof (int) * (preroll - 1 + unroll)));
  count_bb_seq (n, b, bb_filename, counts, preroll, unroll);
  count_bb_seq (n, b, bb_filename, counts, preroll, unroll);
  for (i = 0; i < preroll - 1 + unroll; i++) {
  for (i = 0; i < preroll - 1 + unroll; i++) {
    if (i == 0) b1 = b;
    if (i == 0) b1 = b;
    else b1 = f->num_bb + (i - 1) * 2;
    else b1 = f->num_bb + (i - 1) * 2;
    n->bb[b1].cnt = counts[i];
    n->bb[b1].cnt = counts[i];
  }
  }
 
 
  //print_cuc_bb (n, "preunroll");
  //print_cuc_bb (n, "preunroll");
  free (counts);
  free (counts);
  return n;
  return n;
}
}
 
 
 
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.