OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [branches/] [stable_0_2_x/] [or1ksim/] [cuc/] [bb.c] - Blame information for rev 939

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 879 markom
/* bb.c -- OpenRISC Custom Unit Compiler, Basic Block handling
2
 *    Copyright (C) 2002 Marko Mlinar, markom@opencores.org
3
 *
4
 *    This file is part of OpenRISC 1000 Architectural Simulator.
5
 *
6
 *    This program is free software; you can redistribute it and/or modify
7
 *    it under the terms of the GNU General Public License as published by
8
 *    the Free Software Foundation; either version 2 of the License, or
9
 *    (at your option) any later version.
10
 *
11
 *    This program is distributed in the hope that it will be useful,
12
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 *    GNU General Public License for more details.
15
 *
16
 *    You should have received a copy of the GNU General Public License
17
 *    along with this program; if not, write to the Free Software
18
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
19
 
20
#include <stdio.h>
21
#include <stdlib.h>
22
#include <stdarg.h>
23
#include <assert.h>
24 897 markom
#include "sim-config.h"
25
#include "abstract.h"
26 879 markom
#include "cuc.h"
27
#include "insn.h"
28
#include "support/profile.h"
29
 
30 925 markom
/* prints out bb string */
31
void print_bb_num (int num)
32
{
33
  if (num < 0) printf ("*");
34
  else if (num == BBID_END) printf ("END");
35 932 markom
  else if (num == BBID_START) printf ("START");
36 925 markom
  else printf ("%2x", num);
37
}
38
 
39 879 markom
/* Print out basic blocks */
40
void print_cuc_bb (cuc_func *f, char *s)
41
{
42
  int i;
43
  printf ("------- %s -------\n", s);
44
  for (i = 0; i < f->num_bb; i++) {
45
    if (f->bb[i].insn) printf ("\n---- BB%-2x * %x ---- ", i, f->bb[i].cnt);
46
    else printf ("BB%-2x: %4x-%-4x", i, f->bb[i].first, f->bb[i].last);
47
    printf (" type %02x tmp %i ", f->bb[i].type, f->bb[i].tmp);
48 925 markom
    printf ("next "); print_bb_num (f->bb[i].next[0]);
49
    printf (" "); print_bb_num (f->bb[i].next[1]);
50
    printf (" prev "); print_bb_num (f->bb[i].prev[0]);
51
    printf (" "); print_bb_num (f->bb[i].prev[1]);
52
    printf ("\n");
53 879 markom
 
54
    if (f->bb[i].insn) print_insns (f->bb[i].insn, f->bb[i].ninsn, 0);
55
  }
56
  printf ("\n");
57 897 markom
  fflush (stdout);
58 879 markom
}
59
 
60
/* Copies src basic block into destination */
61
cuc_bb *cpy_bb (cuc_bb *dest, cuc_bb *src)
62
{
63 897 markom
  int i, j;
64
  assert (dest != src);
65 879 markom
  *dest = *src;
66
  assert (dest->insn = malloc (sizeof (cuc_insn) * src->ninsn));
67
  for (i = 0; i < src->ninsn; i++)
68
    dest->insn[i] = src->insn[i];
69
  if (src->ntim) {
70
    assert (dest->tim = malloc (sizeof (cuc_timings) * src->ntim));
71 897 markom
    for (i = 0; i < src->ntim; i++) {
72
      dest->tim[i] = src->tim[i];
73
      if (src->tim[i].nshared) {
74
        assert (dest->tim[i].shared = malloc (sizeof (int) * src->tim[i].nshared));
75
        for (j = 0; j < src->tim[i].nshared; j++)
76
          dest->tim[i].shared[j] = src->tim[i].shared[j];
77
      }
78
    }
79 879 markom
  }
80
}
81
 
82
/* Duplicates function */
83
cuc_func *dup_func (cuc_func *f)
84
{
85
  cuc_func *n = (cuc_func *) malloc (sizeof (cuc_func));
86
  int b, i;
87
  for (b = 0; b < f->num_bb; b++) cpy_bb (&n->bb[b], &f->bb[b]);
88
  n->num_bb = f->num_bb;
89
  assert (n->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb));
90
  for (b = 0; b < f->num_init_bb; b++) n->init_bb_reloc[b] = f->init_bb_reloc[b];
91
  n->num_init_bb = f->num_init_bb;
92 915 markom
  for (i = 0; i < MAX_REGS; i++) {
93
    n->saved_regs[i] = f->saved_regs[i];
94
    n->lur[i] = f->lur[i];
95
    n->used_regs[i] = f->used_regs[i];
96
  }
97 879 markom
  n->start_addr = f->start_addr;
98
  n->end_addr = f->end_addr;
99
  n->orig_time = f->orig_time;
100
  n->nmsched = f->nmsched;
101
  for (i = 0; i < f->nmsched; i++) {
102
    n->msched[i] = f->msched[i];
103
    n->mtype[i] = f->mtype[i];
104
  }
105 906 markom
  n->nfdeps = f->nfdeps;
106
  if (f->nfdeps) {
107
    f->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * f->nfdeps);
108
    for (i = 0; i < f->nfdeps; i++) n->fdeps[i] = f->fdeps[i];
109
  }
110 879 markom
  return n;
111
}
112
 
113
/* Releases memory allocated by function */
114
void free_func (cuc_func *f)
115
{
116
  int b, i;
117
  for (b = 0; b < f->num_bb; b++) {
118
    for (i = 0; i < f->bb[b].ninsn; i++)
119
      dispose_list (&f->bb[b].insn[i].dep);
120
    if (f->bb[b].insn) free (f->bb[b].insn);
121 897 markom
    for (i = 0; i < f->bb[b].ntim; i++)
122
      if (f->bb[b].tim[i].nshared && f->bb[b].tim[i].shared)
123
        free (f->bb[b].tim[i].shared);
124 879 markom
    if (f->bb[b].tim && f->bb[b].ntim) free (f->bb[b].tim);
125
  }
126
  free (f);
127
}
128
 
129
 
130
/* Recalculates last_used_reg */
131
void recalc_last_used_reg (cuc_func *f, int b)
132
{
133
  int i;
134
  cuc_bb *bb = &f->bb[b];
135
 
136
  /* rebuild last used reg array */
137
  if (bb->insn[0].index == II_LRBB) bb->last_used_reg[LRBB_REG] = 0;
138
  else bb->last_used_reg[LRBB_REG] = -1;
139
 
140
  for (i = 1; i < MAX_REGS - 1; i++) bb->last_used_reg[i] = -1;
141
 
142
    /* Create references */
143
  for (i = 0; i < bb->ninsn; i++) {
144
    int k;
145
    /* Now check for destination operand(s) */
146
    for (k = 0; k < MAX_OPERANDS; k++) if (bb->insn[i].opt[k] & OPT_DEST)
147
      if ((bb->insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER
148
        && (int)bb->insn[i].op[k] >= 0) {
149
        bb->last_used_reg[bb->insn[i].op[k]] = REF (b, i);
150
      }
151
  }
152
}
153
 
154
/* Set the BB limits */
155
void detect_bb (cuc_func *f)
156
{
157
  int i, j, end_bb = 0, eb = 0;
158
 
159
  /* Mark block starts/ends */
160
  for (i = 0; i < num_insn; i++) {
161
    if (end_bb) insn[i].type |= IT_BBSTART;
162
    end_bb = 0;
163
    if (insn[i].type & IT_BRANCH) {
164
      int jt = insn[i].op[0];
165
      insn[i].type |= IT_BBEND;
166
      end_bb = 1;
167
      if (jt < 0 || jt >= num_insn) {
168
        fprintf (stderr, "Instruction #%i:Jump out of function '%s'.\n", i, insn[i].disasm);
169
        exit (1);
170
      }
171
      if (jt > 0) insn[jt - 1].type |= IT_BBEND;
172
      insn[jt].type |= IT_BBSTART;
173
    }
174
  }
175
 
176
  /* Initialize bb array */
177
  insn[0].type |= IT_BBSTART;
178
  insn[num_insn - 1].type |= IT_BBEND;
179
  f->num_bb = 0;
180
  for (i = 0; i < num_insn; i++) {
181
    if (insn[i].type & IT_BBSTART) {
182
      f->bb[f->num_bb].first = i;
183
      f->bb[f->num_bb].cnt = 0;
184
    }
185
    /* Determine repetitions of a loop */
186
    if (insn[i].type & IT_BBEND) {
187
      f->bb[f->num_bb].type = 0;
188
      f->bb[f->num_bb].last = i;
189
      f->bb[f->num_bb].next[0] = f->bb[f->num_bb].next[1] = -1;
190
      f->bb[f->num_bb].tmp = 0;
191
      f->bb[f->num_bb].ntim = 0;
192
      f->num_bb++;
193
      assert (f->num_bb < MAX_BB);
194
    }
195
  }
196 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INIT");
197 879 markom
 
198
  /* Build forward connections between BBs */
199
  for (i = 0; i < f->num_bb; i++)
200
    if (insn[f->bb[i].last].type & IT_BRANCH) {
201
      int j;
202
      assert (insn[f->bb[i].last].index == II_BF);
203
      /* Find block this instruction jumps to */
204
      for (j = 0; j < f->num_bb; j++)
205
        if (f->bb[j].first == insn[f->bb[i].last].op[0]) break;
206
      assert (j < f->num_bb);
207
 
208
      /* Convert the jump address to BB link */
209
      insn[f->bb[i].last].op[0] = j; insn[f->bb[i].last].opt[0] = OPT_BB;
210
 
211
      /* Make a link */
212
      f->bb[i].next[0] = j;
213
      if (++f->bb[j].tmp > 2) eb++;
214
      f->bb[i].next[1] = i + 1;
215
      if (++f->bb[i + 1].tmp > 2) eb++;
216
    } else if (f->bb[i].last == num_insn - 1) { /* Last instruction doesn't have to do anything */
217
      f->bb[i].type |= BB_END;
218
    } else {
219
      f->bb[i].next[0] = i + 1;
220
      if (++f->bb[i + 1].tmp > 2) eb++;
221
    }
222
 
223 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_NEXT");
224 879 markom
 
225
  /* Build backward connections, but first insert artificial blocks
226
   * to handle more than 2 connections */
227 883 markom
  cucdebug (6, "artificial %i %i\n", f->num_bb, eb);
228 879 markom
  end_bb = f->num_bb + eb;
229
  for (i = f->num_bb - 1; i >= 0; i--) {
230
    j = f->bb[i].tmp;
231
    if (f->bb[i].tmp > 2) f->bb[i].tmp = -f->bb[i].tmp;
232
    f->bb[--end_bb] = f->bb[i];
233
    reloc[i] = end_bb;
234
    while (j-- > 2) {
235
      f->bb[--end_bb].first = f->bb[i].first;
236
      f->bb[end_bb].last = -1;
237 925 markom
      f->bb[end_bb].type &= ~BB_END;
238 879 markom
      f->bb[end_bb].next[0] = -1;
239
      f->bb[end_bb].next[1] = -1;
240
      f->bb[end_bb].tmp = 0;
241
      f->bb[end_bb].cnt = f->bb[i].cnt;
242
      f->bb[end_bb].ntim = 0;
243
    }
244
  }
245
  f->num_bb += eb;
246
 
247
  /* relocate jump instructions */
248
  for (i = 0; i < num_insn; i++)
249
    for (j = 0; j < MAX_OPERANDS; j++)
250
      if (insn[i].opt[j] & OPT_BB)
251
        insn[i].op[j] = reloc[insn[i].op[j]];
252 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT-reloc");
253 879 markom
  for (i = 0; i < f->num_bb; i++) {
254
    if (f->bb[i].next[0] >= 0) {
255
      int t = reloc[f->bb[i].next[0]];
256
      if (f->bb[t].tmp < 0) {
257
        f->bb[t].tmp = -f->bb[t].tmp;
258
        t -= f->bb[t].tmp - 2;
259
      } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2;
260
      f->bb[i].next[0] = t;
261
    }
262
    if (f->bb[i].next[1] >= 0) {
263
      int t = reloc[f->bb[i].next[1]];
264
      if (f->bb[t].tmp < 0) {
265
        f->bb[t].tmp = -f->bb[t].tmp;
266
        t -= f->bb[t].tmp - 2;
267
      } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2;
268
      f->bb[i].next[1] = t;
269
    }
270
    /* artificial blocks do not have relocations, hardcode them */
271
    if (f->bb[i].last < 0) f->bb[i].next[0] = i + 1;
272
  }
273 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT");
274 879 markom
 
275
  /* Uncoditional branched do not continue to next block */
276
  for (i = 0; i < f->num_bb; i++) {
277
    cuc_insn *ii;
278
    if (f->bb[i].last < 0) continue;
279
    ii = &insn[f->bb[i].last];
280
    /* Unconditional branch? */
281
    if (ii->type & IT_BRANCH && ii->opt[1] & OPT_CONST) {
282
      change_insn_type (ii, II_NOP);
283
      if (f->bb[i].next[1] == i + 1) f->bb[i].next[0] = f->bb[i].next[1];
284
      f->bb[i].next[1] = -1;
285
    }
286
  }
287 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_UNCOND_JUMP");
288 879 markom
 
289
  /* Add backward connections */
290
  for (i = 0; i < f->num_bb; i++)
291
    f->bb[i].prev[0] = f->bb[i].prev[1] = -1;
292
 
293
  for (i = 0; i < f->num_bb; i++) {
294
    if (f->bb[i].next[0] >= 0) {
295
      int t = f->bb[i].next[0];
296
      if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i;
297
      else {
298
        assert (f->bb[t].prev[1] < 0);
299
        f->bb[t].prev[1] = i;
300
      }
301
    }
302
    if (f->bb[i].next[1] >= 0) {
303
      int t = f->bb[i].next[1];
304
      if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i;
305
      else {
306
        assert (f->bb[t].prev[1] < 0);
307
        f->bb[t].prev[1] = i;
308
      }
309
    }
310
  }
311 932 markom
  /* Add START marker */
312
  assert (f->bb[0].prev[0] < 0);
313
  f->bb[0].prev[0]= BBID_START;
314
 
315 925 markom
  /* Add END marker */
316
  for (i = 0; i < f->num_bb; i++)
317
    if (f->bb[i].type & BB_END) {
318
      assert (f->bb[i].next[0] < 0);
319
      f->bb[i].next[0] = BBID_END;
320
    }
321 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_PREV");
322 879 markom
}
323
 
324 905 markom
/* We do a quick check if there are some anomalies with references */
325
void cuc_check (cuc_func *f)
326
{
327
  int i, j, k;
328 930 markom
  if (cuc_debug) printf ("cuc_check\n");
329 905 markom
  for (i = 0; i < f->num_bb; i++) {
330 930 markom
    if (!f->bb[i].insn && f->bb[i].ninsn) goto err;
331
    for (j = 0; j < f->bb[i].ninsn; j++) {
332
      cuc_insn *ii = &f->bb[i].insn[j];
333 934 markom
      if ((ii->index == II_CMOV || ii->index == II_ADD) && ii->type & IT_COND) {
334 930 markom
        k = 0;
335
        assert (ii->opt[k] & OPT_REGISTER);
336
        if ((signed)ii->op[k] >= 0 && ii->op[k] != FLAG_REG && ii->op[k] != LRBB_REG) {
337
          printf ("%x %x\n", ii->opt[0], ii->op[0]);
338
          goto err;
339
        }
340
      }
341 905 markom
      for (k = 0; k < MAX_OPERANDS; k++)
342 930 markom
        if (ii->opt[k] & OPT_REF) {
343
          int t = ii->op[k];
344
          if (REF_BB(t) >= f->num_bb || REF_I (t) >= f->bb[REF_BB(t)].ninsn
345 934 markom
           || (ii->index == II_CMOV || ii->index == II_ADD) && (
346 930 markom
                (f->INSN(t).type & IT_COND) != (ii->type & IT_COND) && k < 3
347
              || !(f->INSN(t).type & IT_COND) && k == 3)) goto err;
348 905 markom
        }
349 930 markom
    }
350 905 markom
  }
351 930 markom
  return;
352
err:
353
  printf ("Anomaly detected at %x.%x[%i]\n", i, j, k);
354
  print_cuc_bb (f, "ANOMALY");
355
  exit (1);
356 905 markom
}
357
 
358 879 markom
/* Build basic blocks */
359
void build_bb (cuc_func *f)
360
{
361
  int i, j, k;
362
  for (i = 0; i < f->num_bb; i++) {
363
    if (f->bb[i].last < 0) f->bb[i].ninsn = MAX_REGS - 1;
364
    else f->bb[i].ninsn = f->bb[i].last - f->bb[i].first + 1 + MAX_REGS - 1;
365
    assert (f->bb[i].ninsn >= MAX_REGS - 1);
366
    f->bb[i].insn = (cuc_insn *) malloc (sizeof (cuc_insn) * f->bb[i].ninsn);
367
    assert (f->bb[i].insn);
368
    f->bb[i].nmemory = 0;
369
    f->bb[i].unrolled = 1;
370
 
371
    /* Save space for conditional moves, exclude r0, place lrbb instead */
372
    change_insn_type (&f->bb[i].insn[0], II_LRBB);
373
    strcpy (f->bb[i].insn[0].disasm, "lrbb");
374 930 markom
    f->bb[i].insn[0].type = IT_UNUSED | IT_COND;
375 879 markom
    f->bb[i].insn[0].dep = NULL;
376
    f->bb[i].insn[0].op[0] = LRBB_REG; f->bb[i].insn[0].opt[0] = OPT_REGISTER | OPT_DEST;
377
    f->bb[i].insn[0].opt[1] = OPT_LRBB;
378
    f->bb[i].insn[0].opt[2] = f->bb[i].insn[0].opt[3] = OPT_NONE;
379
    for (j = 1; j < MAX_REGS - 1; j++) {
380
      change_insn_type (&f->bb[i].insn[j], II_CMOV);
381
      strcpy (f->bb[i].insn[j].disasm, "cmov");
382 930 markom
      f->bb[i].insn[j].type = j == FLAG_REG || j == LRBB_REG ? IT_COND : 0;
383 879 markom
      f->bb[i].insn[j].dep = NULL;
384
      f->bb[i].insn[j].opt[0] = f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_REGISTER;
385
      f->bb[i].insn[j].opt[0] |= OPT_DEST;
386
      f->bb[i].insn[j].op[0] = f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = j;
387
      f->bb[i].insn[j].op[3] = LRBB_REG; f->bb[i].insn[j].opt[3] = OPT_REGISTER;
388
    }
389 897 markom
 
390
    /* Relocate instructions */
391 879 markom
    for (j = MAX_REGS - 1; j < f->bb[i].ninsn; j++) {
392
      f->bb[i].insn[j] = insn[f->bb[i].first + j - (MAX_REGS - 1)];
393
      for (k = 0; k < MAX_OPERANDS; k++)
394
        if (f->bb[i].insn[j].opt[k] & OPT_REF) {
395
          int b1;
396
          for (b1 = 0; b1 < i; b1++)
397 898 markom
            if (f->bb[b1].first <= (signed) f->bb[i].insn[j].op[k]
398
              && (signed)f->bb[i].insn[j].op[k] <= f->bb[b1].last) break;
399 879 markom
          assert (b1 < f->num_bb);
400
          f->bb[i].insn[j].op[k] = REF (b1, f->bb[i].insn[j].op[k] - f->bb[b1].first + MAX_REGS - 1);
401
        }
402
      if (f->bb[i].insn[j].type & IT_MEMORY) f->bb[i].nmemory++;
403
    }
404
  }
405 905 markom
  cuc_check (f);
406 879 markom
}
407
 
408
/* type == 0; keep predecessor condition
409
 * type == 1; keep successor condition
410
 * type == 2; join loop unrolled blocks */
411
static void join_bb (cuc_func *f, int pred, int succ, int type)
412
{
413 905 markom
  int i, j, k, n1, n2, ninsn, add_cond = 0;
414 879 markom
  unsigned long cond_op, cond_opt;
415
  cuc_insn *insn;
416
 
417 905 markom
  if (cuc_debug) cuc_check (f);
418 897 markom
  cucdebug (3, "%x <= %x+%x (%i)\n", pred, pred, succ, type);
419
  cucdebug (3, "%x %x\n", f->bb[pred].ninsn, f->bb[succ].ninsn);
420
  if (cuc_debug >= 3) fflush (stdout);
421 879 markom
 
422 905 markom
  n1 = f->bb[pred].ninsn;
423
  n2 = f->bb[succ].ninsn;
424
  if (n1 <= 0
425
   || !(f->bb[pred].insn[n1 - 1].type & IT_BRANCH)) type = 1;
426 879 markom
  if (type == 0 && f->bb[succ].prev[0] == f->bb[succ].next[0]) add_cond = 1;
427 905 markom
  if (type == 2) add_cond = 1;
428
 
429
  assert (f->bb[pred].next[0] == f->bb[succ].next[0] || type != 2); /* not supported */
430 879 markom
 
431 905 markom
  ninsn = n1 + n2 + (type == 1 ? 0 : 1) + (add_cond ? MAX_REGS : 0);
432 879 markom
 
433
  insn = (cuc_insn *) malloc (ninsn * sizeof (cuc_insn));
434 905 markom
  for (i = 0; i < n1; i++) insn[i] = f->bb[pred].insn[i];
435
  /* when type == 0, we move the last (jump) instruction to the end */
436 879 markom
  if (type == 0 || type == 2) {
437 905 markom
    /* Move first branch instruction to the end */
438
    assert (insn[n1 - 1].type & IT_BRANCH);
439
    insn[ninsn - 1] = insn[n1 - 1];
440
    cond_op = insn[n1 - 1].op[1];
441
    cond_opt = insn[n1 - 1].opt[1];
442
 
443
    /* Remove old branch */
444
    change_insn_type (&insn[n1 - 1], II_NOP);
445 879 markom
  }
446
  /* Copy second block */
447 905 markom
  for (i = 0; i < n2; i++) insn[i + n1] = f->bb[succ].insn[i];
448 902 markom
 
449
  /* and when type == 2, we may need to add sfor instruction, to quit when either is true */
450
  if (type == 2) {
451 905 markom
    /* Move second branch instruction to the end */
452
    if (insn[n1 + n2 - 1].type & IT_BRANCH) {
453
      insn[ninsn - 1] = insn[n1 + n2 - 1];
454
 
455
      /* Use conditional from cmov FLAG_REG, c_p, c_s, c_p */
456
      insn[ninsn - 1].op[1] = REF (pred, n1 + n2 + FLAG_REG); insn[ninsn - 1].opt[1] = OPT_REF;
457
 
458
      /* Remove old one */
459
      change_insn_type (&insn[n1 + n2 - 1], II_NOP);
460
    } else change_insn_type (&insn[ninsn - 1], II_NOP); /* do not use branch slot */
461 902 markom
  }
462
 
463 905 markom
#if 1
464 927 markom
  /* LRBB at start of succ BB is not valid anymore */
465 933 markom
  if (n1 > 0 && insn[n1].index == II_LRBB) {
466 925 markom
    if (type == 1) {
467
      /* We have two possibilities, how this could have happened:
468
         1. we just moved second predecessor of succ to pred,
469
            pred now having two predecessors => everything is ok
470
         2. we just moved second predecessor of succ to pred,
471
            now, having just one predecessor => LRBB is not needed anymore */
472
      if (f->bb[pred].prev[1] < 0) { /* handle second option */
473
        change_insn_type (&insn[n1], II_ADD);
474
        insn[n1].op[1] = 1; insn[n1].opt[1] = OPT_CONST;
475
        insn[n1].op[2] = 0; insn[n1].opt[2] = OPT_CONST;
476
        insn[n1].opt[3] = OPT_NONE;
477
      }
478
    } else {
479
      assert (0); /* not tested yet */
480
      change_insn_type (&insn[n1], II_NOP);
481
      for (i = n1; i < ninsn; i++)
482
        if (insn[i].index == II_CMOV && insn[i].op[3] == REF (pred, n1)) {
483
          assert (insn[i].opt[3] == OPT_REF);
484
          insn[i].op[3] = cond_op;
485
          insn[i].opt[3] = cond_opt;
486
          if (f->bb[pred].next[0] != succ) {
487
            unsigned long t; /* negate conditional -- exchange */
488
            assert (f->bb[pred].next[1] == succ);
489
            t = insn[i].op[1];
490
            insn[i].op[1] = insn[i].op[2];
491
            insn[i].op[2] = t;
492
            t = insn[i].opt[1];
493
            insn[i].opt[1] = insn[i].opt[2];
494
            insn[i].opt[2] = t;
495
          }
496 902 markom
        }
497 925 markom
    }
498 902 markom
  }
499 905 markom
#endif
500 879 markom
 
501
  for (i = 0; i < ninsn; i++) reloc[i] = -1;
502
 
503
  /* Add conditional instructions if required */
504
  if (add_cond) {
505
    recalc_last_used_reg (f, pred);
506
    recalc_last_used_reg (f, succ);
507
 
508
    /* r0 -- add nop for it */
509 905 markom
    change_insn_type (&insn[n1 + n2], II_NOP);
510 879 markom
    for (i = 1; i < MAX_REGS; i++) {
511 905 markom
      cuc_insn *ii = &insn[n1 + n2 + i];
512 879 markom
      int a = f->bb[pred].last_used_reg[i];
513
      int b = f->bb[succ].last_used_reg[i];
514
 
515
      if (b < 0) change_insn_type (ii, II_NOP);
516
      else if (a < 0) {
517
        change_insn_type (ii, II_ADD);
518 930 markom
        ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
519 879 markom
        ii->dep = NULL;
520
        ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
521
        ii->op[1] = b; ii->opt[1] = OPT_REF;
522
        ii->op[2] = 0; ii->opt[2] = OPT_CONST;
523 905 markom
        ii->opt[3] = OPT_NONE;
524 879 markom
      } else if (b >= 0) {
525
        change_insn_type (ii, II_CMOV);
526 930 markom
        ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
527 879 markom
        ii->dep = NULL;
528
        ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
529
        ii->op[1] = a; ii->opt[1] = OPT_REF;
530
        ii->op[2] = b; ii->opt[2] = OPT_REF;
531
        ii->op[3] = cond_op; ii->opt[3] = cond_opt;
532 905 markom
        reloc[REF_I(a)] = REF (pred, n1 + n2 + i);
533 879 markom
      }
534
      sprintf (ii->disasm, "cmov (join BB)");
535
    }
536
  }
537
 
538 905 markom
  if (cuc_debug) cuc_check (f);
539 925 markom
  if (f->bb[succ].type & BB_END) {
540
    f->bb[pred].type |= BB_END;
541 933 markom
    if (ninsn > 0 && insn[ninsn - 1].type & IT_BRANCH && insn[ninsn - 1].op[0] == succ) {
542 925 markom
      assert (insn[ninsn - 1].opt[0] & OPT_BB);
543
      insn[ninsn - 1].op[0] = BBID_END;
544
    }
545
  }
546 879 markom
  i = 0;
547 925 markom
  assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END);
548 879 markom
  switch (type) {
549
  case 0:
550
    if (f->bb[pred].next[0] == succ) f->bb[pred].next[0] = f->bb[succ].next[0];
551
    if (f->bb[pred].next[1] == succ) f->bb[pred].next[1] = f->bb[succ].next[0];
552
    assert (f->bb[succ].next[1] < 0);
553
    break;
554
  case 1:
555
    f->bb[pred].next[0] = f->bb[succ].next[0];
556
    f->bb[pred].next[1] = f->bb[succ].next[1];
557
    break;
558 905 markom
  case 2:
559
    f->bb[pred].next[0] = f->bb[succ].next[0];
560
    f->bb[pred].next[1] = f->bb[succ].next[1];
561
    break;
562 879 markom
  }
563 924 markom
  if (f->bb[pred].next[0] < 0) f->bb[pred].next[0] = f->bb[pred].next[1];
564 879 markom
  if (f->bb[pred].next[0] == f->bb[pred].next[1]) f->bb[pred].next[1] = -1;
565 927 markom
 
566
  /* We just did something stupid -- we joined two predecessors into one;
567
     succ may need the information from which block we came.  We will repair
568
     this by converting LRBB to CMOV */
569 930 markom
  for (j = 0; j < 2; j++) {
570
    int nb = f->bb[pred].next[j];
571 927 markom
    int t;
572
 
573
    /* check just valid connections */
574
    if (nb < 0 || nb == BBID_END) continue;
575
 
576
    /* check type */
577
    if (f->bb[nb].prev[0] == pred && f->bb[nb].prev[1] == succ) t = 1;
578
    else if (f->bb[nb].prev[1] == pred && f->bb[nb].prev[0] == succ) t = 0;
579
    else continue;
580
 
581 930 markom
    /* check all LRBB instructions.  */
582
    for (i = 0; i < f->bb[nb].ninsn; i++)
583
      if (f->bb[nb].insn[i].index == II_LRBB) {
584
        cuc_insn *lrbb =&f->bb[nb].insn[i];
585
        change_insn_type (lrbb, II_CMOV);
586
        lrbb->op[1] = t; lrbb->opt[1] = OPT_CONST;
587
        lrbb->op[2] = 1 - t; lrbb->opt[2] = OPT_CONST;
588
        lrbb->op[3] = cond_op; lrbb->opt[3] = cond_opt;
589
        lrbb->type |= IT_COND;
590
      }
591 927 markom
  }
592
 
593 879 markom
  f->bb[succ].type = BB_DEAD;
594 924 markom
  //printf (" %x %x %x %x %x\n", f->bb[pred].next[0], f->bb[pred].next[1], f->bb[succ].next[0], f->bb[succ].next[1], insn[ninsn - 1].type);
595 905 markom
  /* remove branch instruction, if there is only one successor */
596 933 markom
  if (f->bb[pred].next[1] < 0 && ninsn > 0 && insn[ninsn - 1].type & IT_BRANCH) {
597 925 markom
    assert (f->bb[pred].next[0] != pred); /* end BB, loop should not be possible */
598 905 markom
    change_insn_type (&insn[ninsn - 1], II_NOP);
599 925 markom
  }
600 879 markom
 
601
  /* Set max count */
602
  if (f->bb[pred].cnt < f->bb[succ].cnt) f->bb[pred].cnt = f->bb[succ].cnt;
603
  f->bb[pred].ninsn = ninsn;
604 905 markom
  f->bb[succ].ninsn = 0;
605 879 markom
  free (f->bb[pred].insn); f->bb[pred].insn = NULL;
606
  free (f->bb[succ].insn); f->bb[succ].insn = NULL;
607
  f->bb[pred].insn = insn;
608
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
609
    if (f->bb[i].prev[0] == succ) f->bb[i].prev[0] = pred;
610
    if (f->bb[i].prev[1] == succ) f->bb[i].prev[1] = pred;
611
    if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1;
612
    for (j = 0; j < f->bb[i].ninsn; j++)
613
      for (k = 0; k < MAX_OPERANDS; k++)
614
        if (f->bb[i].insn[j].opt[k] & OPT_REF) {
615
          /* Check if we are referencing successor BB -> relocate to second part of
616
             the new block */
617
          if (REF_BB (f->bb[i].insn[j].op[k]) == succ) {
618 905 markom
            int t = f->bb[i].insn[j].op[k];
619
            int ndest = REF (pred, REF_I (t) + n1);
620
            //printf ("%x: %x %x\n", REF(i, j), t, ndest);
621 879 markom
 
622 905 markom
            /* We've found a reference to succ. block, being removed, relocate */
623
            f->bb[i].insn[j].op[k] = ndest;
624 879 markom
          } else if (REF_BB(f->bb[i].insn[j].op[k]) == pred) {
625
            if (i != pred && reloc[REF_I(f->bb[i].insn[j].op[k])] >= 0) {
626
              f->bb[i].insn[j].op[k] = reloc[REF_I(f->bb[i].insn[j].op[k])];
627
            }
628
          }
629
        }
630
  }
631
 
632 905 markom
  if (cuc_debug) cuc_check (f);
633 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "join");
634 879 markom
}
635
 
636
/* Optimize basic blocks */
637 931 markom
int optimize_bb (cuc_func *f)
638 879 markom
{
639 931 markom
  int modified = 0;
640 879 markom
  int i, j;
641
remove_lrbb:
642
  /* we can remove lrbb instructions from blocks with just one predecessor */
643
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
644
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* exactly one predecessor */
645
      for (j = 0; j < f->bb[i].ninsn; j++)
646
        if (f->bb[i].insn[j].index == II_LRBB) {
647
          cuc_insn *t;
648 925 markom
          cucdebug (4, "-lrbb %x.%x\n", i, j);
649 879 markom
 
650
          /* Change to add LRBB, 0, 0 */
651
          change_insn_type (&f->bb[i].insn[j], II_ADD);
652
          f->bb[i].insn[j].type &= ~IT_VOLATILE;
653
          f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_CONST;
654
          f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = 0; /* always use left block */
655
          f->bb[i].insn[j].opt[3] = OPT_NONE;
656 931 markom
          modified = 1;
657 934 markom
          if (f->bb[i].prev[0] != BBID_START && f->bb[f->bb[i].prev[0]].ninsn > 0) {
658 932 markom
            t = &f->bb[f->bb[i].prev[0]].insn[f->bb[f->bb[i].prev[0]].ninsn - 1];
659 879 markom
 
660 932 markom
            /* If the predecessor still has a conditional jump instruction, we must be careful.
661
               If next[0] == next[1] join them. Now we will link lrbb and correct the situation */
662
            if (t->type & IT_BRANCH) { /* We must set a reference to branch result */
663
              f->bb[i].insn[j].opt[1] = t->opt[1];
664
              f->bb[i].insn[j].op[1] = t->op[1];
665
              /* sometimes branch is not needed anymore */
666
              if (f->bb[f->bb[i].prev[0]].next[1] < 0) change_insn_type (t, II_NOP);
667
            }
668 879 markom
          }
669
        }
670
    }
671
  }
672
 
673 927 markom
  /* Ordering of joining types is cruical -- we should concat all directly connected BBs
674
     together first, so when we do a type != 1 joining, we can remove LRBB, directly by
675
     looking at number of its predeccessors */
676 879 markom
 
677
  /* Type 1 joining
678
     1. link between pred & succ
679
     2. no other pred's successors
680 925 markom
     3. no other succ's predecessors, except if pred has max one */
681 932 markom
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
682
    int p = f->bb[i].prev[0];
683
    if (p < 0 || p == BBID_START) continue;
684 925 markom
    /* one successor and max sum of 3 predecessors */
685 932 markom
    if (f->bb[p].next[0] >= 0 && f->bb[p].next[1] < 0
686
     && (f->bb[p].prev[1] < 0 || f->bb[i].prev[1] < 0)) {
687 925 markom
      /* First we will move all predecessors from succ to pred, and then we will do
688
         real type 1 joining */
689 932 markom
      if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START) {
690
        int p1 = f->bb[i].prev[1];
691 925 markom
        /* joining is surely not worth another extra memory access */
692 932 markom
        if (f->bb[p].nmemory) continue;
693
        if (f->bb[p].prev[0] >= 0) {
694
           assert (f->bb[p].prev[1] < 0);
695
           f->bb[p].prev[1] = p1;
696
        } else f->bb[p].prev[0] = p1;
697
        if (f->bb[p1].next[0] == i) f->bb[p1].next[0] = p;
698
        else if (f->bb[p1].next[1] == i) f->bb[p1].next[1] = p;
699 925 markom
        else assert (0);
700
        f->bb[i].prev[1] = -1;
701
      }
702 932 markom
      assert (p >= 0 && f->bb[i].prev[1] < 0); /* one predecessor */
703
      join_bb (f, p, i, 1);
704 931 markom
      modified = 1;
705 879 markom
      goto remove_lrbb;
706
    }
707 932 markom
  }
708 927 markom
 
709
  /* Type 0 joining
710
     1. link between pred & succ
711
     2. no memory accesses in succ
712
     3. optional pred's second successors
713
     4. max. one succ's successors */
714
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
715 932 markom
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START
716
     && f->bb[i].prev[1] < 0 /* one predecessor */
717 927 markom
     && f->bb[i].next[1] < 0 /* max. one successor */
718
     && f->bb[i].nmemory == 0) {                  /* and no memory acceses */
719
      join_bb (f, f->bb[i].prev[0], i, 0);
720 931 markom
      modified = 1;
721 927 markom
      goto remove_lrbb;
722
    }
723 879 markom
 
724
  /* Type 2 joining
725
     1. link between pred & succ
726
     2. succ has exactly one predeccessor
727
     3. pred & succ share common successor
728
     4. optional succ's second successor */
729
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
730
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* one predecessor */
731
      int p = f->bb[i].prev[0];
732 932 markom
      if (p == BBID_START) continue;
733 905 markom
#if 0 /* not yet supported */
734
      if (f->bb[p].next[0] == i
735
       && (f->bb[i].next[1] == f->bb[p].next[1]
736
        || f->bb[i].next[1] == f->bb[p].next[0])) {
737
        join_bb (f, p, i, 2);
738 897 markom
        goto remove_lrbb;
739
      }
740 905 markom
#endif
741
      if (f->bb[p].next[1] == i
742
       && (f->bb[i].next[0] == f->bb[p].next[1]
743
        || f->bb[i].next[0] == f->bb[p].next[0])) {
744
        join_bb (f, p, i, 2);
745 931 markom
        modified = 1;
746 905 markom
        goto remove_lrbb;
747
      }
748 879 markom
    }
749 931 markom
  return modified;
750 879 markom
}
751
 
752
/* Removes BBs marked as dead */
753 931 markom
int remove_dead_bb (cuc_func *f)
754 879 markom
{
755
  int i, j, k, d = 0;
756
 
757
  for (i = 0; i < f->num_bb; i++) if (f->bb[i].type & BB_DEAD) {
758
    if (f->bb[i].insn) free (f->bb[i].insn);
759
    f->bb[i].insn = NULL;
760
    reloc[i] = -1;
761
  } else {
762
    reloc[i] = d;
763
    f->bb[d++] = f->bb[i];
764
  }
765 931 markom
  if (f->num_bb == d) return 0;
766 879 markom
  f->num_bb = d;
767
 
768
  /* relocate initial blocks */
769
  for (i = 0; i < f->num_init_bb; i++)
770
    f->init_bb_reloc[i] = reloc[f->init_bb_reloc[i]];
771
 
772
  /* repair references */
773
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
774 925 markom
          printf ("%x %x %x %x %x\n", i, f->bb[i].prev[0], f->bb[i].prev[1], f->bb[i].next[0], f->bb[i].next[1]);
775
          fflush (stdout);
776 932 markom
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START)
777
      assert ((f->bb[i].prev[0] = reloc[f->bb[i].prev[0]]) >= 0);
778
    if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START)
779
      assert ((f->bb[i].prev[1] = reloc[f->bb[i].prev[1]]) >= 0);
780 925 markom
    if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END)
781
      assert ((f->bb[i].next[0] = reloc[f->bb[i].next[0]]) >= 0);
782
    if (f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END)
783
      assert ((f->bb[i].next[1] = reloc[f->bb[i].next[1]]) >= 0);
784 879 markom
    if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1;
785
    if (f->bb[i].next[0] == f->bb[i].next[1]) f->bb[i].next[1] = -1;
786
 
787
    for (j = 0; j < f->bb[i].ninsn; j++)
788
      for (k = 0; k < MAX_OPERANDS; k++)
789 925 markom
        if (f->bb[i].insn[j].opt[k] & OPT_BB && (signed)f->bb[i].insn[j].op[k] >= 0) {
790
          if (f->bb[i].insn[j].op[k] != BBID_END)
791
            assert ((f->bb[i].insn[j].op[k] = reloc[f->bb[i].insn[j].op[k]]) >= 0);
792
        } else if (f->bb[i].insn[j].opt[k] & OPT_REF) {
793 879 markom
          int t = f->bb[i].insn[j].op[k];
794
          assert (reloc[REF_BB(t)] >= 0);
795
          f->bb[i].insn[j].op[k] = REF (reloc[REF_BB(t)], REF_I (t));
796
        }
797
  }
798 931 markom
  return 1;
799 879 markom
}
800
 
801
/* Recursive calculation of dependencies */
802
static int reg_dep_rec (cuc_func *f, int cur)
803
{
804
  int i, j;
805
  cuc_insn *insn = f->bb[cur].insn;
806
 
807
  //printf ("\n %i", cur); 
808
  /* Spread only, do not loop */
809
  if (f->bb[cur].tmp) return;
810
  f->bb[cur].tmp = 1;
811
  //printf ("!   ");
812
 
813
  for (i = 0; i < f->bb[cur].ninsn; i++) {
814
    /* Check for destination operand(s) */
815
    for (j = 0; j < MAX_OPERANDS; j++) if (insn[i].opt[j] & OPT_DEST)
816
      if ((insn[i].opt[j] & ~OPT_DEST) == OPT_REGISTER && (signed)insn[i].op[j] >= 0) {
817
        //printf ("%i:%i,%x ", insn[i].op[j], i, REF (cur, i));
818
        assert (insn[i].op[j] > 0 && insn[i].op[j] < MAX_REGS); /* r0 should never be dest */
819
        f->bb[cur].last_used_reg[insn[i].op[j]] = REF (cur, i);
820
      }
821
  }
822
 
823 925 markom
  if (f->bb[cur].next[0] >= 0 && f->bb[cur].next[0] != BBID_END)
824
    reg_dep_rec (f, f->bb[cur].next[0]);
825
  if (f->bb[cur].next[1] >= 0 && f->bb[cur].next[1] != BBID_END)
826
    reg_dep_rec (f, f->bb[cur].next[1]);
827 879 markom
}
828
 
829
/* Detect register dependencies */
830
void reg_dep (cuc_func *f)
831
{
832
  int i, b, c;
833
 
834
  /* Set dead blocks */
835
  for (b = 0; b < f->num_bb; b++) {
836
    f->bb[b].tmp = 0;
837
    for (i = 0; i < MAX_REGS; i++) f->bb[b].last_used_reg[i] = -1;
838
  }
839
 
840
  /* Start with first block and set dependecies of all reachable blocks */
841
  /* At the same time set last_used_regs */
842
  reg_dep_rec (f, 0);
843
 
844
  for (i = 0; i < f->num_bb; i++)
845
    if (f->bb[i].tmp) f->bb[i].tmp = 0;
846
    else f->bb[i].type |= BB_DEAD;
847
 
848
  /* Detect loops; mark BBs where loops must be broken */
849
  for (c = 0; c < f->num_bb; c++) {
850
    int min = 3, minb;
851
 
852
    /* search though all non-visited for minimum number of unvisited predecessors */
853
    for (b = 0; b < f->num_bb; b++) if (!f->bb[b].tmp) {
854
      int tmp = 0;
855 932 markom
      if (f->bb[b].prev[0] >= 0 && f->bb[b].prev[0] != BBID_START
856
       && !f->bb[f->bb[b].prev[0]].tmp) tmp++;
857
      if (f->bb[b].prev[1] >= 0 && f->bb[b].prev[1] != BBID_START
858
       && !f->bb[f->bb[b].prev[1]].tmp) tmp++;
859 879 markom
      if (tmp < min) {
860
        minb = b;
861
        min = tmp;
862
        if (tmp == 0) break; /* We already have the best one */
863
      }
864
    }
865
    b = minb;
866
    f->bb[b].tmp = 1; /* Mark visited */
867 883 markom
    cucdebug (3, "minb %i min %i\n", minb, min);
868 879 markom
    if (min) { /* We just broke the loop */
869
      f->bb[b].type |= BB_INLOOP;
870
    }
871
  }
872
 
873
  /* Set real predecessors in cmov instructions to previous blocks */
874
  for (b = 0; b < f->num_bb; b++)
875
    for (i = 1; i < MAX_REGS - 1; i++) {
876
      int pa, pb;
877
      assert (f->bb[b].insn[i].index ==  II_CMOV);
878
      assert (f->bb[b].insn[i].opt[0] == OPT_REGISTER | OPT_DEST);
879
      assert (f->bb[b].insn[i].op[0] == i);
880 932 markom
      if (f->bb[b].prev[0] < 0 || f->bb[b].prev[0] == BBID_START) pa = -1;
881 879 markom
      else pa = f->bb[f->bb[b].prev[0]].last_used_reg[i];
882 932 markom
      if (f->bb[b].prev[1] < 0 || f->bb[b].prev[1] == BBID_START) pb = -1;
883 879 markom
      else pb = f->bb[f->bb[b].prev[1]].last_used_reg[i];
884
 
885
      /* We do some very simple optimizations right away to make things more readable */
886
      if (pa < 0 && pb < 0) {
887
        /* Was not used at all */
888
        change_insn_type (&f->bb[b].insn[i], II_ADD);
889
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
890
        f->bb[b].insn[i].opt[3] = OPT_NONE;
891
      } else if (pa < 0) {
892
        change_insn_type (&f->bb[b].insn[i], II_ADD);
893
        assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST));
894
        f->bb[b].insn[i].op[1] = pb; f->bb[b].insn[i].opt[1] = OPT_REF;
895
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
896
        f->bb[b].insn[i].opt[3] = OPT_NONE;
897
      } else if (pb < 0) {
898
        change_insn_type (&f->bb[b].insn[i], II_ADD);
899
        assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST));
900
        f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF;
901
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
902
        f->bb[b].insn[i].opt[3] = OPT_NONE;
903
      } else {
904
        int t = REF (b, 0); /* lrbb should be first instruction */
905
        assert (f->INSN(t).index == II_LRBB);
906
 
907
        f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF;
908
        assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST));
909
 
910
        f->bb[b].insn[i].op[2] = pb; f->bb[b].insn[i].opt[2] = OPT_REF;
911
        assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST));
912
 
913
        /* Update op[3] -- flag register */
914
        assert (f->bb[b].insn[i].opt[3] == OPT_REGISTER);
915
        assert (f->bb[b].insn[i].op[3] == LRBB_REG);
916
        assert (t >= 0);
917
        f->bb[b].insn[i].opt[3] = OPT_REF; /* Convert already used regs to references */
918
        f->bb[b].insn[i].op[3] = t;
919
        assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST));
920
      }
921
    }
922
 
923
  /* assign register references */
924
  for (b = 0; b < f->num_bb; b++) {
925
    /* rebuild last used reg array */
926
    f->bb[b].last_used_reg[0] = -1;
927
    if (f->bb[b].insn[0].index == II_LRBB) f->bb[b].last_used_reg[LRBB_REG] = 0;
928
    else f->bb[b].last_used_reg[LRBB_REG] = -1;
929
 
930
    for (i = 1; i < MAX_REGS - 1; i++)
931
      f->bb[b].last_used_reg[i] = -1;
932
 
933
    /* Create references */
934
    for (i = 0; i < f->bb[b].ninsn; i++) {
935
      int k;
936
      /* Check for source operands first */
937
      for (k = 0; k < MAX_OPERANDS; k++) {
938
        if (!(f->bb[b].insn[i].opt[k] & OPT_DEST))
939
        if (f->bb[b].insn[i].opt[k] & OPT_REGISTER) {
940
          int t = f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]];
941
 
942
          if (f->bb[b].insn[i].op[k] == 0) { /* Convert r0 to const0 */
943
            f->bb[b].insn[i].opt[k] = OPT_CONST;
944
            f->bb[b].insn[i].op[k] = 0;
945
          } else if (t >= 0) {
946
            f->bb[b].insn[i].opt[k] = OPT_REF; /* Convert already used regs to references */
947
            f->bb[b].insn[i].op[k] = t;
948
            assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST));
949
            //f->INSN(t).op[0] = -1;
950
          }
951
        } else if (f->bb[b].insn[i].opt[k] & OPT_REF) {
952
          //f->INSN(f->bb[b].insn[i].op[k]).op[0] = -1; /* Mark referenced */
953
          f->INSN(f->bb[b].insn[i].op[k]).type &= ~IT_UNUSED;
954
        }
955
      }
956
 
957
      /* Now check for destination operand(s) */
958
      for (k = 0; k < MAX_OPERANDS; k++) if (f->bb[b].insn[i].opt[k] & OPT_DEST)
959
        if ((f->bb[b].insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER
960
          && (int)f->bb[b].insn[i].op[k] >= 0) {
961
          int t = f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]];
962
          assert (f->bb[b].insn[i].op[k] != 0); /* r0 should never be dest */
963
          f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]] = REF (b, i);
964
        }
965
    }
966
  }
967
 
968
  /* Remove all unused lrbb */
969
  for (b = 0; b < f->num_bb; b++)
970
    for (i = 0; i < f->bb[b].ninsn; i++)
971
      if (f->bb[b].insn[i].type & IT_UNUSED) change_insn_type (&f->bb[b].insn[i], II_NOP);
972
 
973
  /* SSAs with final register value are marked as outputs */
974
  assert (f->bb[f->num_bb - 1].type & BB_END);
975 939 markom
  for (i = 0; i < MAX_REGS; i++) if (!caller_saved[i]) {
976 879 markom
    int t = f->bb[f->num_bb - 1].last_used_reg[i];
977
    /* Mark them volatile, so optimizer does not remove them */
978
    if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT;
979
  }
980 939 markom
  {
981
    int t = f->bb[f->num_bb - 1].last_used_reg[i];
982
    /* Mark them volatile, so optimizer does not remove them */
983
    if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT;
984
  }
985 879 markom
}
986
 
987 897 markom
/* split the BB, based on the group numbers in .tmp */
988
void expand_bb (cuc_func *f, int b)
989
{
990
  int n = f->num_bb;
991
  int mg = 0;
992
  int b1, i, j;
993
 
994
  for (i = 0; i < f->bb[b].ninsn; i++)
995
    if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp;
996
 
997
  /* Create copies */
998
  for (b1 = 1; b1 <= mg; b1++) {
999
    assert (f->num_bb < MAX_BB);
1000
    cpy_bb (&f->bb[f->num_bb], &f->bb[b]);
1001
    f->num_bb++;
1002
  }
1003
 
1004
  /* Relocate */
1005
  for (b1 = 0; b1 < f->num_bb; b1++)
1006
    for (i = 0; i < f->bb[b1].ninsn; i++) {
1007
      dep_list *d = f->bb[b1].insn[i].dep;
1008
      for (j = 0; j < MAX_OPERANDS; j++)
1009
        if (f->bb[b1].insn[i].opt[j] & OPT_REF) {
1010
          int t = f->bb[b1].insn[i].op[j];
1011
          if (REF_BB(t) == b && f->INSN(t).tmp != 0)
1012
            f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t));
1013
        }
1014
      while (d) {
1015
        if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0)
1016
          d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref));
1017
        d = d->next;
1018
      }
1019
    }
1020
 
1021
  /* Delete unused instructions */
1022
  for (j = 0; j <= mg; j++) {
1023
    if (j == 0) b1 = b;
1024
    else b1 = n + j - 1;
1025
    for (i = 0; i < f->bb[b1].ninsn; i++) {
1026
      if (f->bb[b1].insn[i].tmp != j)
1027
        change_insn_type (&f->bb[b1].insn[i], II_NOP);
1028
      f->bb[b1].insn[i].tmp = 0;
1029
    }
1030
    if (j < mg) {
1031
      f->bb[b1].next[0] = n + j;
1032
      f->bb[b1].next[1] = -1;
1033
      f->bb[n + j].prev[0] = b1;
1034
      f->bb[n + j].prev[1] = -1;
1035
    } else {
1036
      i = f->bb[b1].next[0];
1037
      f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1;
1038
      f->bb[n + j].prev[1] = -1;
1039 925 markom
      if (i >= 0 && i != BBID_END) {
1040 897 markom
        if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
1041
        if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
1042
      }
1043
      i = f->bb[b1].next[1];
1044 925 markom
      if (i >= 0 && i != BBID_END) {
1045 897 markom
        if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
1046
        if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
1047
      }
1048
    }
1049
  }
1050
}
1051
 
1052 879 markom
/* Scans sequence of BBs and set bb[].cnt */
1053
void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename)
1054
{
1055
  FILE *fi, *fo;
1056
  struct mprofentry_struct *buf;
1057
  const int bufsize = 256;
1058
  unsigned long *bb_start;
1059
  unsigned long *bb_end;
1060
  int b, i, r;
1061
  int curbb, prevbb = -1;
1062
  unsigned long addr = -1;
1063
  unsigned long prevaddr = -1;
1064 897 markom
  int mssum = 0;
1065
  int mlsum = 0;
1066
  int mscnt = 0;
1067
  int mlcnt = 0;
1068 879 markom
 
1069
  assert (fi = fopen (mp_filename, "rb"));
1070
  assert (fo = fopen (bb_filename, "wb+"));
1071
 
1072
  assert (bb_start = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb));
1073
  assert (bb_end = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb));
1074
  for (b = 0; b < f->num_bb; b++) {
1075
    bb_start[b] = f->start_addr + f->bb[b].first * 4;
1076
    bb_end[b] = f->start_addr + f->bb[b].last * 4;
1077
    //printf ("%i %x %x\n", b, bb_start[b], bb_end[b]);
1078
    f->bb[0].cnt = 0;
1079
  }
1080
 
1081
  buf = (struct mprofentry_struct *) malloc (sizeof (struct mprofentry_struct) * bufsize);
1082
  assert (buf);
1083
 
1084
  //printf ("BBSEQ:\n");
1085
  do {
1086
    r = fread (buf, sizeof (struct mprofentry_struct), bufsize, fi);
1087
    //printf ("r%i : ", r);
1088
    for (i = 0; i < r; i++) {
1089
      if (buf[i].type & MPROF_FETCH) {
1090
        //printf ("%x, ", buf[i].addr);
1091
        if (buf[i].addr >= f->start_addr && buf[i].addr <= f->end_addr) {
1092
          assert (buf[i].type & MPROF_32);
1093
          prevaddr = addr;
1094
          addr = buf[i].addr;
1095
          for (b = 0; b < f->num_bb; b++)
1096
            if (bb_start[b] <= addr && addr <= bb_end[b]) break;
1097
          assert (b < f->num_bb);
1098
          curbb = b;
1099
          if (prevaddr + 4 != addr) prevbb = -1;
1100
        } else curbb = -1;
1101
 
1102
#warning TODO: do not count interrupts
1103
        if (curbb != prevbb && curbb >= 0) {
1104
          fwrite (&curbb, sizeof (unsigned long), 1, fo);
1105
          //printf (" [%i] ", curbb);
1106
          f->bb[curbb].cnt++;
1107
          prevbb = curbb;
1108
        }
1109 897 markom
      } else {
1110
        if (verify_memoryarea(buf[i].addr))
1111
          if (buf[i].type & MPROF_WRITE) mscnt++, mssum += cur_area->delayw;
1112
          else mlcnt++, mlsum += cur_area->delayw;
1113 879 markom
      }
1114
    }
1115
    //printf ("\n");
1116
  } while (r == bufsize);
1117
  //printf ("\n");
1118
 
1119 897 markom
  runtime.cuc.mdelay[0] = (1. * mlsum) / mlcnt;
1120
  runtime.cuc.mdelay[1] = (1. * mlsum) / mlcnt;
1121
  runtime.cuc.mdelay[2] = runtime.cuc.mdelay[3] = 1;
1122 883 markom
  f->num_runs = f->bb[0].cnt;
1123 879 markom
  fclose (fi);
1124
  fclose (fo);
1125
  free (buf);
1126
  free (bb_end);
1127
  free (bb_start);
1128
 
1129
  /* Initialize basic block relocations */
1130
  f->num_init_bb = f->num_bb;
1131
  //printf ("num_init_bb = %i\n", f->num_init_bb);
1132
  assert (f->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb));
1133
  for (b = 0; b < f->num_init_bb; b++) f->init_bb_reloc[b] = b;
1134
}
1135
 
1136
/* Scans sequence of BBs and set counts for pre/unrolled loop for BB b */
1137
void count_bb_seq (cuc_func *f, int b, char *bb_filename, int *counts, int preroll, int unroll)
1138
{
1139
  FILE *fi;
1140
  const int bufsize = 256;
1141
  int i, r;
1142
  int *buf;
1143
  int cnt = 0;
1144
  int times = preroll - 1 + unroll;
1145
 
1146
  assert (fi = fopen (bb_filename, "rb"));
1147
  for (i = 0; i < times; i++) counts[i] = 0;
1148
  assert (buf = (int *) malloc (sizeof (int) * bufsize));
1149
 
1150
  do {
1151
    r = fread (buf, sizeof (int), bufsize, fi);
1152
    for (i = 0; i < r; i++) {
1153
      /* count consecutive acesses */
1154
      if (f->init_bb_reloc[buf[i]] == b) {
1155
        counts[cnt]++;
1156
        if (++cnt >= times) cnt = preroll - 1;
1157
      } else cnt = 0;
1158
    }
1159
  } while (r == bufsize);
1160
 
1161
  log ("Counts %i,%i :", preroll, unroll);
1162
  for (i = 0; i < times; i++) log ("%x ", counts[i]);
1163
  log ("\n");
1164
 
1165
  fclose (fi);
1166
  free (buf);
1167
}
1168
 
1169
/* relocate all accesses inside of BB b to back/fwd */
1170
static void relocate_bb (cuc_bb *bb, int b, int back, int fwd)
1171
{
1172
  int i, j;
1173
  for (i = 0; i < bb->ninsn; i++)
1174
    for (j = 0; j < MAX_OPERANDS; j++)
1175
      if (bb->insn[i].opt[j] & OPT_REF
1176
       && REF_BB (bb->insn[i].op[j]) == b) {
1177
        int t = REF_I (bb->insn[i].op[j]);
1178
        if (t < i) bb->insn[i].op[j] = REF (back, t);
1179
        else bb->insn[i].op[j] = REF (fwd, t);
1180
      }
1181
}
1182
 
1183
/* Unroll loop b unroll times and return new function. Original
1184
   function is unmodified. */
1185
static cuc_func *unroll_loop (cuc_func *f, int b, int unroll)
1186
{
1187
  int b1, t, i, j, prevb, prevart_b;
1188
  cuc_func *n = dup_func (f);
1189
  cuc_bb *ob = &f->bb[b];
1190
  cuc_insn *ii;
1191
 
1192
  assert (unroll > 1);
1193
  //printf ("unroll BB%i x %i (num_bb %i)\n", b, unroll, n->num_bb);
1194
  unroll--;
1195
  assert (n->num_bb + unroll * 2 < MAX_BB);
1196
 
1197
  prevb = b;
1198
  prevart_b = b;
1199
  /* Duplicate the BB */
1200
  for (t = 0; t < unroll; t++) {
1201
    cuc_bb *pb = &n->bb[prevart_b];
1202
    /* Add new block and set links */
1203
    b1 = n->num_bb++;
1204
    cpy_bb (&n->bb[b1], ob);
1205
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
1206
    n->bb[b1].type &= ~(BB_END | BB_INLOOP);
1207
 
1208
    /* Set predecessor's successor */
1209
    if (n->bb[prevb].next[0] == b) {
1210
      n->bb[prevb].next[0] = b1;
1211
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
1212
      else pb->next[1] = b1 + 1;
1213
      n->bb[b1].next[1] = b1 + 1;
1214
    } else if (n->bb[prevb].next[1] == b) {
1215
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
1216
      else pb->next[1] = b1 + 1;
1217
      n->bb[b1].next[0] = b1 + 1;
1218
      n->bb[prevb].next[1] = b1;
1219
    } else assert (0);
1220
 
1221
    /* Set predecessor */
1222
    n->bb[b1].prev[0] = prevb;
1223
    n->bb[b1].prev[1] = -1;
1224
 
1225
    /* Relocate backward references to current instance and forward references
1226
       to previous one */
1227
    relocate_bb (&n->bb[b1], b, b1, prevb);
1228
 
1229
    /* add artificial block, just to join accesses */
1230
    b1 = n->num_bb++;
1231
    cpy_bb (&n->bb[b1], ob);
1232
    n->bb[b1].cnt = 0;
1233
 
1234
    for (i = 0; i < ob->ninsn - 1; i++) {
1235
      ii = &n->bb[b1].insn[i];
1236
      if (ob->insn[i].opt[0] & OPT_DEST) {
1237
        change_insn_type (ii, II_CMOV);
1238
        ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
1239
        ii->op[1] = REF (prevart_b, i); ii->opt[1] = OPT_REF;
1240
        ii->op[2] = REF (b1 - 1, i); ii->opt[2] = OPT_REF;
1241
 
1242
        /* Take left one, if we should have finished the first iteration*/
1243
        if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
1244
          ii->op[3] = pb->insn[pb->ninsn - 1].op[1]; ii->opt[3] = pb->insn[pb->ninsn - 1].opt[1];
1245
        } else {
1246
          assert (pb->insn[pb->ninsn - 1].type & IT_COND);
1247
          ii->op[3] = REF (prevart_b, pb->ninsn - 1); ii->opt[3] = OPT_REF;
1248
        }
1249
        ii->dep = NULL;
1250 930 markom
        ii->type = ob->insn[i].type & IT_COND;
1251 879 markom
      } else {
1252
        change_insn_type (ii, II_NOP);
1253
      }
1254
    }
1255
 
1256 928 markom
    /* Add conditional or instruction at the end, prioritizing flags */
1257 879 markom
    ii = &n->bb[b1].insn[ob->ninsn - 1];
1258 928 markom
    change_insn_type (ii, II_CMOV);
1259 879 markom
    ii->op[0] = FLAG_REG; ii->opt[0] = OPT_REGISTER | OPT_DEST;
1260
    if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
1261
      ii->op[1] = pb->insn[pb->ninsn - 1].op[1];
1262
      ii->opt[1] = pb->insn[pb->ninsn - 1].opt[1];
1263
    } else {
1264
      ii->op[1] = REF (prevart_b, pb->ninsn - 1);
1265
      ii->opt[1] = OPT_REF;
1266
    }
1267
    if (n->bb[b1 - 1].insn[pb->ninsn - 1].type & IT_BRANCH) {
1268
      ii->op[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].op[1];
1269
      ii->opt[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].opt[1];
1270
    } else {
1271
      ii->op[2] = REF (b1 - 1, pb->ninsn - 1);
1272
      ii->opt[2] = OPT_REF;
1273
    }
1274 928 markom
    /* {z = x || y;} is same as {z = x ? x : y;} */
1275
    ii->op[3] = ii->op[1]; ii->opt[3] = ii->opt[1];
1276 933 markom
    ii->type = IT_COND;
1277 879 markom
 
1278
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
1279
    n->bb[b1].type &= ~(BB_END | BB_INLOOP);
1280
    n->bb[b1].prev[0] = prevart_b;
1281
    n->bb[b1].prev[1] = b1 - 1;
1282 925 markom
    n->bb[b1].next[0] = -1;
1283 879 markom
    n->bb[b1].next[1] = -1;
1284
 
1285
    prevb = b1 - 1;
1286
    prevart_b = b1;
1287
  }
1288
  if (ob->type & BB_END) {
1289
    n->bb[prevart_b].type |= BB_END;
1290
    n->bb[b].type &= ~BB_END;
1291
  }
1292
 
1293 925 markom
  n->bb[prevart_b].next[0] = ob->next[0] == b ? ob->next[1] : ob->next[0];
1294 879 markom
  //print_cuc_bb (n, "unroll1");
1295
  /* repair BB after loop, to point back to latest artificial BB */
1296
  b1 = n->bb[prevart_b].next[0];
1297 925 markom
  if (b1 >= 0 && b1 != BBID_END) {
1298 897 markom
    if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = prevart_b;
1299
    else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = prevart_b;
1300 879 markom
    else assert (0);
1301
  }
1302
 
1303
  /* Relink back to start of the loop */
1304
  /* Set predecessor's successor */
1305
  if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = b;
1306
  else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = b;
1307
  else assert (0);
1308
 
1309
  /* Set predecessor */
1310
  if (n->bb[b].prev[0] == b) n->bb[b].prev[0] = prevb;
1311
  else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = prevb;
1312
  else assert (0);
1313
 
1314
  //print_cuc_bb (n, "unroll2");
1315
 
1316
  /* Relocate backward references to current instance and forward references
1317
     to previous one */
1318
  relocate_bb (&n->bb[b], b, b, prevb);
1319
 
1320
  /* Relocate all other blocks to point to latest prevart_b */
1321
  for (i = 0; i < f->num_bb; i++)
1322
    if (i != b) relocate_bb (&n->bb[i], b, prevart_b, prevart_b);
1323
 
1324
  return n;
1325
}
1326
 
1327
/* Preroll loop b preroll times and return new function. Original
1328
   function is unmodified. */
1329
static cuc_func *preroll_loop (cuc_func *f, int b, int preroll)
1330
{
1331
  int b1, t, i, j, prevb, prevart_b;
1332
  cuc_func *n = dup_func (f);
1333
  cuc_bb *ob = &f->bb[b];
1334
  cuc_insn *ii;
1335
 
1336
  assert (preroll > 1);
1337
  //printf ("preroll BB%i x %i (num_bb %i)\n", b, preroll, n->num_bb);
1338
  preroll--;
1339
  assert (n->num_bb + preroll * 2 < MAX_BB);
1340
 
1341
  prevb = b;
1342
  prevart_b = b;
1343
  /* Duplicate the BB */
1344
  for (t = 0; t < preroll; t++) {
1345
    cuc_bb *pb = &n->bb[prevart_b];
1346
    /* Add new block and set links */
1347
    b1 = n->num_bb++;
1348
    cpy_bb (&n->bb[b1], ob);
1349
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
1350
    n->bb[b1].type &= ~(BB_END | BB_INLOOP);
1351
 
1352
    /* Set predecessor's successor */
1353
    if (n->bb[prevb].next[0] == b) {
1354
      n->bb[prevb].next[0] = b1;
1355
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
1356
      else pb->next[1] = b1 + 1;
1357
      n->bb[b1].next[1] = b1 + 1;
1358
    } else if (n->bb[prevb].next[1] == b) {
1359
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
1360
      else pb->next[1] = b1 + 1;
1361
      n->bb[b1].next[0] = b1 + 1;
1362
      n->bb[prevb].next[1] = b1;
1363
    } else assert (0);
1364
 
1365
    /* Set predecessor */
1366
    n->bb[b1].prev[0] = prevb;
1367
    n->bb[b1].prev[1] = -1;
1368
 
1369
    /* Relocate backward references to current instance and forward references
1370
       to previous one */
1371
    relocate_bb (&n->bb[b1], b, b1, prevb);
1372
 
1373
    /* add artificial block, just to join accesses */
1374
    b1 = n->num_bb++;
1375
    cpy_bb (&n->bb[b1], ob);
1376
    n->bb[b1].cnt = 0;
1377
 
1378
    for (i = 0; i < ob->ninsn - 1; i++) {
1379
      ii = &n->bb[b1].insn[i];
1380
      if (ob->insn[i].opt[0] & OPT_DEST) {
1381
        change_insn_type (ii, II_CMOV);
1382
        ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
1383
        ii->op[1] = REF (prevart_b, i); ii->opt[1] = OPT_REF;
1384
        ii->op[2] = REF (b1 - 1, i); ii->opt[2] = OPT_REF;
1385
 
1386
        /* Take left one, if we should have finished the first iteration*/
1387
        if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
1388
          ii->op[3] = pb->insn[pb->ninsn - 1].op[1]; ii->opt[3] = pb->insn[pb->ninsn - 1].opt[1];
1389
        } else {
1390
          assert (pb->insn[pb->ninsn - 1].type & IT_COND);
1391
          ii->op[3] = REF (prevart_b, pb->ninsn - 1); ii->opt[3] = OPT_REF;
1392
        }
1393
        ii->dep = NULL;
1394 930 markom
        ii->type = ob->insn[i].type & IT_COND;
1395 879 markom
      } else {
1396
        change_insn_type (ii, II_NOP);
1397
      }
1398
    }
1399
 
1400 928 markom
    /* Add conditional or instruction at the end, prioritizing flags */
1401 879 markom
    ii = &n->bb[b1].insn[ob->ninsn - 1];
1402 928 markom
    change_insn_type (ii, II_CMOV);
1403 879 markom
    ii->op[0] = FLAG_REG; ii->opt[0] = OPT_REGISTER | OPT_DEST;
1404
    if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
1405
      ii->op[1] = pb->insn[pb->ninsn - 1].op[1];
1406
      ii->opt[1] = pb->insn[pb->ninsn - 1].opt[1];
1407
    } else {
1408
      ii->op[1] = REF (prevart_b, pb->ninsn - 1);
1409
      ii->opt[1] = OPT_REF;
1410
    }
1411
    if (n->bb[b1 - 1].insn[pb->ninsn - 1].type & IT_BRANCH) {
1412
      ii->op[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].op[1];
1413
      ii->opt[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].opt[1];
1414
    } else {
1415
      ii->op[2] = REF (b1 - 1, pb->ninsn - 1);
1416
      ii->opt[2] = OPT_REF;
1417
    }
1418 928 markom
    /* {z = x || y;} is same as {z = x ? x : y;} */
1419
    ii->op[3] = ii->op[1]; ii->opt[3] = ii->opt[1];
1420 933 markom
    ii->type = IT_COND;
1421 879 markom
 
1422
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
1423
    n->bb[b1].type &= ~(BB_END | BB_INLOOP);
1424
    n->bb[b1].prev[0] = prevart_b;
1425
    n->bb[b1].prev[1] = b1 - 1;
1426 925 markom
    n->bb[b1].next[0] = -1;
1427 879 markom
    n->bb[b1].next[1] = -1;
1428
 
1429
    prevb = b1 - 1;
1430
    prevart_b = b1;
1431
  }
1432
  if (ob->type & BB_END) {
1433
    n->bb[prevart_b].type |= BB_END;
1434
    n->bb[b].type &= ~BB_END;
1435
  }
1436 925 markom
  n->bb[prevart_b].next[0] = ob->next[0] == b ? ob->next[1] : ob->next[0];
1437 879 markom
 
1438
  //print_cuc_bb (n, "preroll1");
1439
  /* repair BB after loop, to point back to latest artificial BB */
1440
  b1 = n->bb[prevart_b].next[0];
1441 925 markom
  if (b1 >= 0 && b1 != BBID_END) {
1442 897 markom
    if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = prevart_b;
1443
    else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = prevart_b;
1444 879 markom
    else assert (0);
1445
  }
1446
 
1447
  /* Relink to itself */
1448
  /* Set predecessor's successor */
1449
  if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = prevb;
1450
  else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = prevb;
1451
  else assert (0);
1452
  n->bb[prevb].prev[1] = prevb;
1453
 
1454
  if (n->bb[b].prev[0] == b) {
1455
    n->bb[b].prev[0] = n->bb[b].prev[1];
1456
    n->bb[b].prev[1] = -1;
1457
  } else if (n->bb[b].prev[1] == b) {
1458
    n->bb[b].prev[1] = -1;
1459
  }
1460
 
1461
  //print_cuc_bb (n, "preroll2");
1462
 
1463
  /* Relocate backward references to current instance and forward references
1464
     to previous one */
1465
  relocate_bb (&n->bb[b], b, b, prevb);
1466
 
1467
  /* Relocate all other blocks to point to latest prevart_b */
1468
  for (i = 0; i < f->num_bb; i++)
1469
    if (i != b) relocate_bb (&n->bb[i], b, prevart_b, prevart_b);
1470
 
1471
  return n;
1472
}
1473
 
1474
/* Unroll loop b unroll times and return new function. Original
1475
   function is unmodified. */
1476
cuc_func *preunroll_loop (cuc_func *f, int b, int preroll, int unroll, char *bb_filename)
1477
{
1478
  int b1, i;
1479
  cuc_func *n, *t;
1480
  int *counts;
1481
  int *bb_reloc;
1482
 
1483
  if (preroll > 1) {
1484
    t = preroll_loop (f, b, preroll);
1485
    b1 = t->num_bb - 2;
1486
    if (unroll > 1) {
1487
      //print_cuc_bb (t, "preunroll1");
1488
      n = unroll_loop (t, b1, unroll);
1489
      free_func (t);
1490
    } else n = t;
1491
  } else {
1492
    b1 = b;
1493 897 markom
    if (unroll > 1) n = unroll_loop (f, b1, unroll);
1494
    else return dup_func (f);
1495 879 markom
  }
1496
 
1497 897 markom
  /* Assign new counts to functions */
1498 879 markom
  assert (counts = (int *)malloc (sizeof (int) * (preroll - 1 + unroll)));
1499
  count_bb_seq (n, b, bb_filename, counts, preroll, unroll);
1500
  for (i = 0; i < preroll - 1 + unroll; i++) {
1501
    if (i == 0) b1 = b;
1502
    else b1 = f->num_bb + (i - 1) * 2;
1503
    n->bb[b1].cnt = counts[i];
1504
  }
1505
 
1506
  //print_cuc_bb (n, "preunroll");
1507
  free (counts);
1508
  return n;
1509
}
1510 933 markom
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.