OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [tags/] [nog_patch_69/] [or1ksim/] [cuc/] [bb.c] - Blame information for rev 932

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 879 markom
/* bb.c -- OpenRISC Custom Unit Compiler, Basic Block handling
2
 *    Copyright (C) 2002 Marko Mlinar, markom@opencores.org
3
 *
4
 *    This file is part of OpenRISC 1000 Architectural Simulator.
5
 *
6
 *    This program is free software; you can redistribute it and/or modify
7
 *    it under the terms of the GNU General Public License as published by
8
 *    the Free Software Foundation; either version 2 of the License, or
9
 *    (at your option) any later version.
10
 *
11
 *    This program is distributed in the hope that it will be useful,
12
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 *    GNU General Public License for more details.
15
 *
16
 *    You should have received a copy of the GNU General Public License
17
 *    along with this program; if not, write to the Free Software
18
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
19
 
20
#include <stdio.h>
21
#include <stdlib.h>
22
#include <stdarg.h>
23
#include <assert.h>
24 897 markom
#include "sim-config.h"
25
#include "abstract.h"
26 879 markom
#include "cuc.h"
27
#include "insn.h"
28
#include "support/profile.h"
29
 
30 925 markom
/* prints out bb string */
31
void print_bb_num (int num)
32
{
33
  if (num < 0) printf ("*");
34
  else if (num == BBID_END) printf ("END");
35 932 markom
  else if (num == BBID_START) printf ("START");
36 925 markom
  else printf ("%2x", num);
37
}
38
 
39 879 markom
/* Print out basic blocks */
40
void print_cuc_bb (cuc_func *f, char *s)
41
{
42
  int i;
43
  printf ("------- %s -------\n", s);
44
  for (i = 0; i < f->num_bb; i++) {
45
    if (f->bb[i].insn) printf ("\n---- BB%-2x * %x ---- ", i, f->bb[i].cnt);
46
    else printf ("BB%-2x: %4x-%-4x", i, f->bb[i].first, f->bb[i].last);
47
    printf (" type %02x tmp %i ", f->bb[i].type, f->bb[i].tmp);
48 925 markom
    printf ("next "); print_bb_num (f->bb[i].next[0]);
49
    printf (" "); print_bb_num (f->bb[i].next[1]);
50
    printf (" prev "); print_bb_num (f->bb[i].prev[0]);
51
    printf (" "); print_bb_num (f->bb[i].prev[1]);
52
    printf ("\n");
53 879 markom
 
54
    if (f->bb[i].insn) print_insns (f->bb[i].insn, f->bb[i].ninsn, 0);
55
  }
56
  printf ("\n");
57 897 markom
  fflush (stdout);
58 879 markom
}
59
 
60
/* Copies src basic block into destination */
61
cuc_bb *cpy_bb (cuc_bb *dest, cuc_bb *src)
62
{
63 897 markom
  int i, j;
64
  assert (dest != src);
65 879 markom
  *dest = *src;
66
  assert (dest->insn = malloc (sizeof (cuc_insn) * src->ninsn));
67
  for (i = 0; i < src->ninsn; i++)
68
    dest->insn[i] = src->insn[i];
69
  if (src->ntim) {
70
    assert (dest->tim = malloc (sizeof (cuc_timings) * src->ntim));
71 897 markom
    for (i = 0; i < src->ntim; i++) {
72
      dest->tim[i] = src->tim[i];
73
      if (src->tim[i].nshared) {
74
        assert (dest->tim[i].shared = malloc (sizeof (int) * src->tim[i].nshared));
75
        for (j = 0; j < src->tim[i].nshared; j++)
76
          dest->tim[i].shared[j] = src->tim[i].shared[j];
77
      }
78
    }
79 879 markom
  }
80
}
81
 
82
/* Duplicates function */
83
cuc_func *dup_func (cuc_func *f)
84
{
85
  cuc_func *n = (cuc_func *) malloc (sizeof (cuc_func));
86
  int b, i;
87
  for (b = 0; b < f->num_bb; b++) cpy_bb (&n->bb[b], &f->bb[b]);
88
  n->num_bb = f->num_bb;
89
  assert (n->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb));
90
  for (b = 0; b < f->num_init_bb; b++) n->init_bb_reloc[b] = f->init_bb_reloc[b];
91
  n->num_init_bb = f->num_init_bb;
92 915 markom
  for (i = 0; i < MAX_REGS; i++) {
93
    n->saved_regs[i] = f->saved_regs[i];
94
    n->lur[i] = f->lur[i];
95
    n->used_regs[i] = f->used_regs[i];
96
  }
97 879 markom
  n->start_addr = f->start_addr;
98
  n->end_addr = f->end_addr;
99
  n->orig_time = f->orig_time;
100
  n->nmsched = f->nmsched;
101
  for (i = 0; i < f->nmsched; i++) {
102
    n->msched[i] = f->msched[i];
103
    n->mtype[i] = f->mtype[i];
104
  }
105 906 markom
  n->nfdeps = f->nfdeps;
106
  if (f->nfdeps) {
107
    f->fdeps = (cuc_func **) malloc (sizeof (cuc_func *) * f->nfdeps);
108
    for (i = 0; i < f->nfdeps; i++) n->fdeps[i] = f->fdeps[i];
109
  }
110 879 markom
  return n;
111
}
112
 
113
/* Releases memory allocated by function */
114
void free_func (cuc_func *f)
115
{
116
  int b, i;
117
  for (b = 0; b < f->num_bb; b++) {
118
    for (i = 0; i < f->bb[b].ninsn; i++)
119
      dispose_list (&f->bb[b].insn[i].dep);
120
    if (f->bb[b].insn) free (f->bb[b].insn);
121 897 markom
    for (i = 0; i < f->bb[b].ntim; i++)
122
      if (f->bb[b].tim[i].nshared && f->bb[b].tim[i].shared)
123
        free (f->bb[b].tim[i].shared);
124 879 markom
    if (f->bb[b].tim && f->bb[b].ntim) free (f->bb[b].tim);
125
  }
126
  free (f);
127
}
128
 
129
 
130
/* Recalculates last_used_reg */
131
void recalc_last_used_reg (cuc_func *f, int b)
132
{
133
  int i;
134
  cuc_bb *bb = &f->bb[b];
135
 
136
  /* rebuild last used reg array */
137
  if (bb->insn[0].index == II_LRBB) bb->last_used_reg[LRBB_REG] = 0;
138
  else bb->last_used_reg[LRBB_REG] = -1;
139
 
140
  for (i = 1; i < MAX_REGS - 1; i++) bb->last_used_reg[i] = -1;
141
 
142
    /* Create references */
143
  for (i = 0; i < bb->ninsn; i++) {
144
    int k;
145
    /* Now check for destination operand(s) */
146
    for (k = 0; k < MAX_OPERANDS; k++) if (bb->insn[i].opt[k] & OPT_DEST)
147
      if ((bb->insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER
148
        && (int)bb->insn[i].op[k] >= 0) {
149
        bb->last_used_reg[bb->insn[i].op[k]] = REF (b, i);
150
      }
151
  }
152
}
153
 
154
/* Set the BB limits */
155
void detect_bb (cuc_func *f)
156
{
157
  int i, j, end_bb = 0, eb = 0;
158
 
159
  /* Mark block starts/ends */
160
  for (i = 0; i < num_insn; i++) {
161
    if (end_bb) insn[i].type |= IT_BBSTART;
162
    end_bb = 0;
163
    if (insn[i].type & IT_BRANCH) {
164
      int jt = insn[i].op[0];
165
      insn[i].type |= IT_BBEND;
166
      end_bb = 1;
167
      if (jt < 0 || jt >= num_insn) {
168
        fprintf (stderr, "Instruction #%i:Jump out of function '%s'.\n", i, insn[i].disasm);
169
        exit (1);
170
      }
171
      if (jt > 0) insn[jt - 1].type |= IT_BBEND;
172
      insn[jt].type |= IT_BBSTART;
173
    }
174
  }
175
 
176
  /* Initialize bb array */
177
  insn[0].type |= IT_BBSTART;
178
  insn[num_insn - 1].type |= IT_BBEND;
179
  f->num_bb = 0;
180
  for (i = 0; i < num_insn; i++) {
181
    if (insn[i].type & IT_BBSTART) {
182
      f->bb[f->num_bb].first = i;
183
      f->bb[f->num_bb].cnt = 0;
184
    }
185
    /* Determine repetitions of a loop */
186
    if (insn[i].type & IT_BBEND) {
187
      f->bb[f->num_bb].type = 0;
188
      f->bb[f->num_bb].last = i;
189
      f->bb[f->num_bb].next[0] = f->bb[f->num_bb].next[1] = -1;
190
      f->bb[f->num_bb].tmp = 0;
191
      f->bb[f->num_bb].ntim = 0;
192
      f->num_bb++;
193
      assert (f->num_bb < MAX_BB);
194
    }
195
  }
196 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INIT");
197 879 markom
 
198
  /* Build forward connections between BBs */
199
  for (i = 0; i < f->num_bb; i++)
200
    if (insn[f->bb[i].last].type & IT_BRANCH) {
201
      int j;
202
      assert (insn[f->bb[i].last].index == II_BF);
203
      /* Find block this instruction jumps to */
204
      for (j = 0; j < f->num_bb; j++)
205
        if (f->bb[j].first == insn[f->bb[i].last].op[0]) break;
206
      assert (j < f->num_bb);
207
 
208
      /* Convert the jump address to BB link */
209
      insn[f->bb[i].last].op[0] = j; insn[f->bb[i].last].opt[0] = OPT_BB;
210
 
211
      /* Make a link */
212
      f->bb[i].next[0] = j;
213
      if (++f->bb[j].tmp > 2) eb++;
214
      f->bb[i].next[1] = i + 1;
215
      if (++f->bb[i + 1].tmp > 2) eb++;
216
    } else if (f->bb[i].last == num_insn - 1) { /* Last instruction doesn't have to do anything */
217
      f->bb[i].type |= BB_END;
218
    } else {
219
      f->bb[i].next[0] = i + 1;
220
      if (++f->bb[i + 1].tmp > 2) eb++;
221
    }
222
 
223 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_NEXT");
224 879 markom
 
225
  /* Build backward connections, but first insert artificial blocks
226
   * to handle more than 2 connections */
227 883 markom
  cucdebug (6, "artificial %i %i\n", f->num_bb, eb);
228 879 markom
  end_bb = f->num_bb + eb;
229
  for (i = f->num_bb - 1; i >= 0; i--) {
230
    j = f->bb[i].tmp;
231
    if (f->bb[i].tmp > 2) f->bb[i].tmp = -f->bb[i].tmp;
232
    f->bb[--end_bb] = f->bb[i];
233
    reloc[i] = end_bb;
234
    while (j-- > 2) {
235
      f->bb[--end_bb].first = f->bb[i].first;
236
      f->bb[end_bb].last = -1;
237 925 markom
      f->bb[end_bb].type &= ~BB_END;
238 879 markom
      f->bb[end_bb].next[0] = -1;
239
      f->bb[end_bb].next[1] = -1;
240
      f->bb[end_bb].tmp = 0;
241
      f->bb[end_bb].cnt = f->bb[i].cnt;
242
      f->bb[end_bb].ntim = 0;
243
    }
244
  }
245
  f->num_bb += eb;
246
 
247
  /* relocate jump instructions */
248
  for (i = 0; i < num_insn; i++)
249
    for (j = 0; j < MAX_OPERANDS; j++)
250
      if (insn[i].opt[j] & OPT_BB)
251
        insn[i].op[j] = reloc[insn[i].op[j]];
252 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT-reloc");
253 879 markom
  for (i = 0; i < f->num_bb; i++) {
254
    if (f->bb[i].next[0] >= 0) {
255
      int t = reloc[f->bb[i].next[0]];
256
      if (f->bb[t].tmp < 0) {
257
        f->bb[t].tmp = -f->bb[t].tmp;
258
        t -= f->bb[t].tmp - 2;
259
      } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2;
260
      f->bb[i].next[0] = t;
261
    }
262
    if (f->bb[i].next[1] >= 0) {
263
      int t = reloc[f->bb[i].next[1]];
264
      if (f->bb[t].tmp < 0) {
265
        f->bb[t].tmp = -f->bb[t].tmp;
266
        t -= f->bb[t].tmp - 2;
267
      } else if (f->bb[t].tmp > 2) t -= f->bb[t].tmp-- - 2;
268
      f->bb[i].next[1] = t;
269
    }
270
    /* artificial blocks do not have relocations, hardcode them */
271
    if (f->bb[i].last < 0) f->bb[i].next[0] = i + 1;
272
  }
273 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_INSERT");
274 879 markom
 
275
  /* Uncoditional branched do not continue to next block */
276
  for (i = 0; i < f->num_bb; i++) {
277
    cuc_insn *ii;
278
    if (f->bb[i].last < 0) continue;
279
    ii = &insn[f->bb[i].last];
280
    /* Unconditional branch? */
281
    if (ii->type & IT_BRANCH && ii->opt[1] & OPT_CONST) {
282
      change_insn_type (ii, II_NOP);
283
      if (f->bb[i].next[1] == i + 1) f->bb[i].next[0] = f->bb[i].next[1];
284
      f->bb[i].next[1] = -1;
285
    }
286
  }
287 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_UNCOND_JUMP");
288 879 markom
 
289
  /* Add backward connections */
290
  for (i = 0; i < f->num_bb; i++)
291
    f->bb[i].prev[0] = f->bb[i].prev[1] = -1;
292
 
293
  for (i = 0; i < f->num_bb; i++) {
294
    if (f->bb[i].next[0] >= 0) {
295
      int t = f->bb[i].next[0];
296
      if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i;
297
      else {
298
        assert (f->bb[t].prev[1] < 0);
299
        f->bb[t].prev[1] = i;
300
      }
301
    }
302
    if (f->bb[i].next[1] >= 0) {
303
      int t = f->bb[i].next[1];
304
      if (f->bb[t].prev[0] < 0) f->bb[t].prev[0] = i;
305
      else {
306
        assert (f->bb[t].prev[1] < 0);
307
        f->bb[t].prev[1] = i;
308
      }
309
    }
310
  }
311 932 markom
  /* Add START marker */
312
  assert (f->bb[0].prev[0] < 0);
313
  f->bb[0].prev[0]= BBID_START;
314
 
315 925 markom
  /* Add END marker */
316
  for (i = 0; i < f->num_bb; i++)
317
    if (f->bb[i].type & BB_END) {
318
      assert (f->bb[i].next[0] < 0);
319
      f->bb[i].next[0] = BBID_END;
320
    }
321 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "AFTER_PREV");
322 879 markom
}
323
 
324 905 markom
/* We do a quick check if there are some anomalies with references */
325
void cuc_check (cuc_func *f)
326
{
327
  int i, j, k;
328 930 markom
  if (cuc_debug) printf ("cuc_check\n");
329 905 markom
  for (i = 0; i < f->num_bb; i++) {
330 930 markom
    if (!f->bb[i].insn && f->bb[i].ninsn) goto err;
331
    for (j = 0; j < f->bb[i].ninsn; j++) {
332
      cuc_insn *ii = &f->bb[i].insn[j];
333
      if (ii->index == II_CMOV && ii->type & IT_COND) {
334
        k = 0;
335
        assert (ii->opt[k] & OPT_REGISTER);
336
        if ((signed)ii->op[k] >= 0 && ii->op[k] != FLAG_REG && ii->op[k] != LRBB_REG) {
337
          printf ("%x %x\n", ii->opt[0], ii->op[0]);
338
          goto err;
339
        }
340
      }
341 905 markom
      for (k = 0; k < MAX_OPERANDS; k++)
342 930 markom
        if (ii->opt[k] & OPT_REF) {
343
          int t = ii->op[k];
344
          if (REF_BB(t) >= f->num_bb || REF_I (t) >= f->bb[REF_BB(t)].ninsn
345
           || ii->index == II_CMOV && (
346
                (f->INSN(t).type & IT_COND) != (ii->type & IT_COND) && k < 3
347
              || !(f->INSN(t).type & IT_COND) && k == 3)) goto err;
348 905 markom
        }
349 930 markom
    }
350 905 markom
  }
351 930 markom
  return;
352
err:
353
  printf ("Anomaly detected at %x.%x[%i]\n", i, j, k);
354
  print_cuc_bb (f, "ANOMALY");
355
  exit (1);
356 905 markom
}
357
 
358 879 markom
/* Build basic blocks */
359
void build_bb (cuc_func *f)
360
{
361
  int i, j, k;
362
  for (i = 0; i < f->num_bb; i++) {
363
    if (f->bb[i].last < 0) f->bb[i].ninsn = MAX_REGS - 1;
364
    else f->bb[i].ninsn = f->bb[i].last - f->bb[i].first + 1 + MAX_REGS - 1;
365
    assert (f->bb[i].ninsn >= MAX_REGS - 1);
366
    f->bb[i].insn = (cuc_insn *) malloc (sizeof (cuc_insn) * f->bb[i].ninsn);
367
    assert (f->bb[i].insn);
368
    f->bb[i].nmemory = 0;
369
    f->bb[i].unrolled = 1;
370
 
371
    /* Save space for conditional moves, exclude r0, place lrbb instead */
372
    change_insn_type (&f->bb[i].insn[0], II_LRBB);
373
    strcpy (f->bb[i].insn[0].disasm, "lrbb");
374 930 markom
    f->bb[i].insn[0].type = IT_UNUSED | IT_COND;
375 879 markom
    f->bb[i].insn[0].dep = NULL;
376
    f->bb[i].insn[0].op[0] = LRBB_REG; f->bb[i].insn[0].opt[0] = OPT_REGISTER | OPT_DEST;
377
    f->bb[i].insn[0].opt[1] = OPT_LRBB;
378
    f->bb[i].insn[0].opt[2] = f->bb[i].insn[0].opt[3] = OPT_NONE;
379
    for (j = 1; j < MAX_REGS - 1; j++) {
380
      change_insn_type (&f->bb[i].insn[j], II_CMOV);
381
      strcpy (f->bb[i].insn[j].disasm, "cmov");
382 930 markom
      f->bb[i].insn[j].type = j == FLAG_REG || j == LRBB_REG ? IT_COND : 0;
383 879 markom
      f->bb[i].insn[j].dep = NULL;
384
      f->bb[i].insn[j].opt[0] = f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_REGISTER;
385
      f->bb[i].insn[j].opt[0] |= OPT_DEST;
386
      f->bb[i].insn[j].op[0] = f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = j;
387
      f->bb[i].insn[j].op[3] = LRBB_REG; f->bb[i].insn[j].opt[3] = OPT_REGISTER;
388
    }
389 897 markom
 
390
    /* Relocate instructions */
391 879 markom
    for (j = MAX_REGS - 1; j < f->bb[i].ninsn; j++) {
392
      f->bb[i].insn[j] = insn[f->bb[i].first + j - (MAX_REGS - 1)];
393
      for (k = 0; k < MAX_OPERANDS; k++)
394
        if (f->bb[i].insn[j].opt[k] & OPT_REF) {
395
          int b1;
396
          for (b1 = 0; b1 < i; b1++)
397 898 markom
            if (f->bb[b1].first <= (signed) f->bb[i].insn[j].op[k]
398
              && (signed)f->bb[i].insn[j].op[k] <= f->bb[b1].last) break;
399 879 markom
          assert (b1 < f->num_bb);
400
          f->bb[i].insn[j].op[k] = REF (b1, f->bb[i].insn[j].op[k] - f->bb[b1].first + MAX_REGS - 1);
401
        }
402
      if (f->bb[i].insn[j].type & IT_MEMORY) f->bb[i].nmemory++;
403
    }
404
  }
405 905 markom
  cuc_check (f);
406 879 markom
}
407
 
408
/* type == 0; keep predecessor condition
409
 * type == 1; keep successor condition
410
 * type == 2; join loop unrolled blocks */
411
static void join_bb (cuc_func *f, int pred, int succ, int type)
412
{
413 905 markom
  int i, j, k, n1, n2, ninsn, add_cond = 0;
414 879 markom
  unsigned long cond_op, cond_opt;
415
  cuc_insn *insn;
416
 
417 905 markom
  if (cuc_debug) cuc_check (f);
418 897 markom
  cucdebug (3, "%x <= %x+%x (%i)\n", pred, pred, succ, type);
419
  cucdebug (3, "%x %x\n", f->bb[pred].ninsn, f->bb[succ].ninsn);
420
  if (cuc_debug >= 3) fflush (stdout);
421 879 markom
 
422 905 markom
  n1 = f->bb[pred].ninsn;
423
  n2 = f->bb[succ].ninsn;
424
  if (n1 <= 0
425
   || !(f->bb[pred].insn[n1 - 1].type & IT_BRANCH)) type = 1;
426 879 markom
  if (type == 0 && f->bb[succ].prev[0] == f->bb[succ].next[0]) add_cond = 1;
427 905 markom
  if (type == 2) add_cond = 1;
428
 
429
  assert (f->bb[pred].next[0] == f->bb[succ].next[0] || type != 2); /* not supported */
430 879 markom
 
431 905 markom
  ninsn = n1 + n2 + (type == 1 ? 0 : 1) + (add_cond ? MAX_REGS : 0);
432 879 markom
 
433
  insn = (cuc_insn *) malloc (ninsn * sizeof (cuc_insn));
434 905 markom
  for (i = 0; i < n1; i++) insn[i] = f->bb[pred].insn[i];
435
  /* when type == 0, we move the last (jump) instruction to the end */
436 879 markom
  if (type == 0 || type == 2) {
437 905 markom
    /* Move first branch instruction to the end */
438
    assert (insn[n1 - 1].type & IT_BRANCH);
439
    insn[ninsn - 1] = insn[n1 - 1];
440
    cond_op = insn[n1 - 1].op[1];
441
    cond_opt = insn[n1 - 1].opt[1];
442
 
443
    /* Remove old branch */
444
    change_insn_type (&insn[n1 - 1], II_NOP);
445 879 markom
  }
446
  /* Copy second block */
447 905 markom
  for (i = 0; i < n2; i++) insn[i + n1] = f->bb[succ].insn[i];
448 902 markom
 
449
  /* and when type == 2, we may need to add sfor instruction, to quit when either is true */
450
  if (type == 2) {
451 905 markom
    /* Move second branch instruction to the end */
452
    if (insn[n1 + n2 - 1].type & IT_BRANCH) {
453
      insn[ninsn - 1] = insn[n1 + n2 - 1];
454
 
455
      /* Use conditional from cmov FLAG_REG, c_p, c_s, c_p */
456
      insn[ninsn - 1].op[1] = REF (pred, n1 + n2 + FLAG_REG); insn[ninsn - 1].opt[1] = OPT_REF;
457
 
458
      /* Remove old one */
459
      change_insn_type (&insn[n1 + n2 - 1], II_NOP);
460
    } else change_insn_type (&insn[ninsn - 1], II_NOP); /* do not use branch slot */
461 902 markom
  }
462
 
463 905 markom
#if 1
464 927 markom
  /* LRBB at start of succ BB is not valid anymore */
465 905 markom
  if (insn[n1].index == II_LRBB) {
466 925 markom
    if (type == 1) {
467
      /* We have two possibilities, how this could have happened:
468
         1. we just moved second predecessor of succ to pred,
469
            pred now having two predecessors => everything is ok
470
         2. we just moved second predecessor of succ to pred,
471
            now, having just one predecessor => LRBB is not needed anymore */
472
      if (f->bb[pred].prev[1] < 0) { /* handle second option */
473
        change_insn_type (&insn[n1], II_ADD);
474
        insn[n1].op[1] = 1; insn[n1].opt[1] = OPT_CONST;
475
        insn[n1].op[2] = 0; insn[n1].opt[2] = OPT_CONST;
476
        insn[n1].opt[3] = OPT_NONE;
477
      }
478
    } else {
479
      assert (0); /* not tested yet */
480
      change_insn_type (&insn[n1], II_NOP);
481
      for (i = n1; i < ninsn; i++)
482
        if (insn[i].index == II_CMOV && insn[i].op[3] == REF (pred, n1)) {
483
          assert (insn[i].opt[3] == OPT_REF);
484
          insn[i].op[3] = cond_op;
485
          insn[i].opt[3] = cond_opt;
486
          if (f->bb[pred].next[0] != succ) {
487
            unsigned long t; /* negate conditional -- exchange */
488
            assert (f->bb[pred].next[1] == succ);
489
            t = insn[i].op[1];
490
            insn[i].op[1] = insn[i].op[2];
491
            insn[i].op[2] = t;
492
            t = insn[i].opt[1];
493
            insn[i].opt[1] = insn[i].opt[2];
494
            insn[i].opt[2] = t;
495
          }
496 902 markom
        }
497 925 markom
    }
498 902 markom
  }
499 905 markom
#endif
500 879 markom
 
501
  for (i = 0; i < ninsn; i++) reloc[i] = -1;
502
 
503
  /* Add conditional instructions if required */
504
  if (add_cond) {
505
    recalc_last_used_reg (f, pred);
506
    recalc_last_used_reg (f, succ);
507
 
508
    /* r0 -- add nop for it */
509 905 markom
    change_insn_type (&insn[n1 + n2], II_NOP);
510 879 markom
    for (i = 1; i < MAX_REGS; i++) {
511 905 markom
      cuc_insn *ii = &insn[n1 + n2 + i];
512 879 markom
      int a = f->bb[pred].last_used_reg[i];
513
      int b = f->bb[succ].last_used_reg[i];
514
 
515
      if (b < 0) change_insn_type (ii, II_NOP);
516
      else if (a < 0) {
517
        change_insn_type (ii, II_ADD);
518 930 markom
        ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
519 879 markom
        ii->dep = NULL;
520
        ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
521
        ii->op[1] = b; ii->opt[1] = OPT_REF;
522
        ii->op[2] = 0; ii->opt[2] = OPT_CONST;
523 905 markom
        ii->opt[3] = OPT_NONE;
524 879 markom
      } else if (b >= 0) {
525
        change_insn_type (ii, II_CMOV);
526 930 markom
        ii->type = i == FLAG_REG || i == LRBB_REG ? IT_COND : 0;
527 879 markom
        ii->dep = NULL;
528
        ii->op[0] = i; ii->opt[0] = OPT_REGISTER | OPT_DEST;
529
        ii->op[1] = a; ii->opt[1] = OPT_REF;
530
        ii->op[2] = b; ii->opt[2] = OPT_REF;
531
        ii->op[3] = cond_op; ii->opt[3] = cond_opt;
532 905 markom
        reloc[REF_I(a)] = REF (pred, n1 + n2 + i);
533 879 markom
      }
534
      sprintf (ii->disasm, "cmov (join BB)");
535
    }
536
  }
537
 
538 905 markom
  if (cuc_debug) cuc_check (f);
539 925 markom
  if (f->bb[succ].type & BB_END) {
540
    f->bb[pred].type |= BB_END;
541
    if (insn[ninsn - 1].type & IT_BRANCH && insn[ninsn - 1].op[0] == succ) {
542
      assert (insn[ninsn - 1].opt[0] & OPT_BB);
543
      insn[ninsn - 1].op[0] = BBID_END;
544
    }
545
  }
546 879 markom
  i = 0;
547 925 markom
  assert (f->bb[pred].next[0] >= 0 && f->bb[pred].next[0] != BBID_END);
548 879 markom
  switch (type) {
549
  case 0:
550
    if (f->bb[pred].next[0] == succ) f->bb[pred].next[0] = f->bb[succ].next[0];
551
    if (f->bb[pred].next[1] == succ) f->bb[pred].next[1] = f->bb[succ].next[0];
552
    assert (f->bb[succ].next[1] < 0);
553
    break;
554
  case 1:
555
    f->bb[pred].next[0] = f->bb[succ].next[0];
556
    f->bb[pred].next[1] = f->bb[succ].next[1];
557
    break;
558 905 markom
  case 2:
559
    f->bb[pred].next[0] = f->bb[succ].next[0];
560
    f->bb[pred].next[1] = f->bb[succ].next[1];
561
    break;
562 879 markom
  }
563 924 markom
  if (f->bb[pred].next[0] < 0) f->bb[pred].next[0] = f->bb[pred].next[1];
564 879 markom
  if (f->bb[pred].next[0] == f->bb[pred].next[1]) f->bb[pred].next[1] = -1;
565 927 markom
 
566
  /* We just did something stupid -- we joined two predecessors into one;
567
     succ may need the information from which block we came.  We will repair
568
     this by converting LRBB to CMOV */
569 930 markom
  for (j = 0; j < 2; j++) {
570
    int nb = f->bb[pred].next[j];
571 927 markom
    int t;
572
 
573
    /* check just valid connections */
574
    if (nb < 0 || nb == BBID_END) continue;
575
 
576
    /* check type */
577
    if (f->bb[nb].prev[0] == pred && f->bb[nb].prev[1] == succ) t = 1;
578
    else if (f->bb[nb].prev[1] == pred && f->bb[nb].prev[0] == succ) t = 0;
579
    else continue;
580
 
581 930 markom
    /* check all LRBB instructions.  */
582
    for (i = 0; i < f->bb[nb].ninsn; i++)
583
      if (f->bb[nb].insn[i].index == II_LRBB) {
584
        cuc_insn *lrbb =&f->bb[nb].insn[i];
585
        change_insn_type (lrbb, II_CMOV);
586
        lrbb->op[1] = t; lrbb->opt[1] = OPT_CONST;
587
        lrbb->op[2] = 1 - t; lrbb->opt[2] = OPT_CONST;
588
        lrbb->op[3] = cond_op; lrbb->opt[3] = cond_opt;
589
        lrbb->type |= IT_COND;
590
      }
591 927 markom
  }
592
 
593 879 markom
  f->bb[succ].type = BB_DEAD;
594 924 markom
  //printf (" %x %x %x %x %x\n", f->bb[pred].next[0], f->bb[pred].next[1], f->bb[succ].next[0], f->bb[succ].next[1], insn[ninsn - 1].type);
595 905 markom
  /* remove branch instruction, if there is only one successor */
596 925 markom
  if (f->bb[pred].next[1] < 0 && insn[ninsn - 1].type & IT_BRANCH) {
597
    assert (f->bb[pred].next[0] != pred); /* end BB, loop should not be possible */
598 905 markom
    change_insn_type (&insn[ninsn - 1], II_NOP);
599 925 markom
  }
600 879 markom
 
601
  /* Set max count */
602
  if (f->bb[pred].cnt < f->bb[succ].cnt) f->bb[pred].cnt = f->bb[succ].cnt;
603
  f->bb[pred].ninsn = ninsn;
604 905 markom
  f->bb[succ].ninsn = 0;
605 879 markom
  free (f->bb[pred].insn); f->bb[pred].insn = NULL;
606
  free (f->bb[succ].insn); f->bb[succ].insn = NULL;
607
  f->bb[pred].insn = insn;
608
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
609
    if (f->bb[i].prev[0] == succ) f->bb[i].prev[0] = pred;
610
    if (f->bb[i].prev[1] == succ) f->bb[i].prev[1] = pred;
611
    if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1;
612
    for (j = 0; j < f->bb[i].ninsn; j++)
613
      for (k = 0; k < MAX_OPERANDS; k++)
614
        if (f->bb[i].insn[j].opt[k] & OPT_REF) {
615
          /* Check if we are referencing successor BB -> relocate to second part of
616
             the new block */
617
          if (REF_BB (f->bb[i].insn[j].op[k]) == succ) {
618 905 markom
            int t = f->bb[i].insn[j].op[k];
619
            int ndest = REF (pred, REF_I (t) + n1);
620
            //printf ("%x: %x %x\n", REF(i, j), t, ndest);
621 879 markom
 
622 905 markom
            /* We've found a reference to succ. block, being removed, relocate */
623
            f->bb[i].insn[j].op[k] = ndest;
624 879 markom
          } else if (REF_BB(f->bb[i].insn[j].op[k]) == pred) {
625
            if (i != pred && reloc[REF_I(f->bb[i].insn[j].op[k])] >= 0) {
626
              f->bb[i].insn[j].op[k] = reloc[REF_I(f->bb[i].insn[j].op[k])];
627
            }
628
          }
629
        }
630
  }
631
 
632 905 markom
  if (cuc_debug) cuc_check (f);
633 883 markom
  if (cuc_debug >= 3) print_cuc_bb (f, "join");
634 879 markom
}
635
 
636
/* Optimize basic blocks */
637 931 markom
int optimize_bb (cuc_func *f)
638 879 markom
{
639 931 markom
  int modified = 0;
640 879 markom
  int i, j;
641
remove_lrbb:
642
  /* we can remove lrbb instructions from blocks with just one predecessor */
643
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
644
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* exactly one predecessor */
645
      for (j = 0; j < f->bb[i].ninsn; j++)
646
        if (f->bb[i].insn[j].index == II_LRBB) {
647
          cuc_insn *t;
648 925 markom
          cucdebug (4, "-lrbb %x.%x\n", i, j);
649 879 markom
 
650
          /* Change to add LRBB, 0, 0 */
651
          change_insn_type (&f->bb[i].insn[j], II_ADD);
652
          f->bb[i].insn[j].type &= ~IT_VOLATILE;
653
          f->bb[i].insn[j].opt[1] = f->bb[i].insn[j].opt[2] = OPT_CONST;
654
          f->bb[i].insn[j].op[1] = f->bb[i].insn[j].op[2] = 0; /* always use left block */
655
          f->bb[i].insn[j].opt[3] = OPT_NONE;
656 931 markom
          modified = 1;
657 932 markom
          if (f->bb[i].prev[0] != BBID_START) {
658
            t = &f->bb[f->bb[i].prev[0]].insn[f->bb[f->bb[i].prev[0]].ninsn - 1];
659 879 markom
 
660 932 markom
            /* If the predecessor still has a conditional jump instruction, we must be careful.
661
               If next[0] == next[1] join them. Now we will link lrbb and correct the situation */
662
            if (t->type & IT_BRANCH) { /* We must set a reference to branch result */
663
              f->bb[i].insn[j].opt[1] = t->opt[1];
664
              f->bb[i].insn[j].op[1] = t->op[1];
665
              /* sometimes branch is not needed anymore */
666
              if (f->bb[f->bb[i].prev[0]].next[1] < 0) change_insn_type (t, II_NOP);
667
            }
668 879 markom
          }
669
        }
670
    }
671
  }
672
 
673 927 markom
  /* Ordering of joining types is cruical -- we should concat all directly connected BBs
674
     together first, so when we do a type != 1 joining, we can remove LRBB, directly by
675
     looking at number of its predeccessors */
676 879 markom
 
677
  /* Type 1 joining
678
     1. link between pred & succ
679
     2. no other pred's successors
680 925 markom
     3. no other succ's predecessors, except if pred has max one */
681 932 markom
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
682
    int p = f->bb[i].prev[0];
683
    if (p < 0 || p == BBID_START) continue;
684 925 markom
    /* one successor and max sum of 3 predecessors */
685 932 markom
    if (f->bb[p].next[0] >= 0 && f->bb[p].next[1] < 0
686
     && (f->bb[p].prev[1] < 0 || f->bb[i].prev[1] < 0)) {
687 925 markom
      /* First we will move all predecessors from succ to pred, and then we will do
688
         real type 1 joining */
689 932 markom
      if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START) {
690
        int p1 = f->bb[i].prev[1];
691 925 markom
        /* joining is surely not worth another extra memory access */
692 932 markom
        if (f->bb[p].nmemory) continue;
693
        if (f->bb[p].prev[0] >= 0) {
694
           assert (f->bb[p].prev[1] < 0);
695
           f->bb[p].prev[1] = p1;
696
        } else f->bb[p].prev[0] = p1;
697
        if (f->bb[p1].next[0] == i) f->bb[p1].next[0] = p;
698
        else if (f->bb[p1].next[1] == i) f->bb[p1].next[1] = p;
699 925 markom
        else assert (0);
700
        f->bb[i].prev[1] = -1;
701
      }
702 932 markom
      assert (p >= 0 && f->bb[i].prev[1] < 0); /* one predecessor */
703
      join_bb (f, p, i, 1);
704 931 markom
      modified = 1;
705 879 markom
      goto remove_lrbb;
706
    }
707 932 markom
  }
708 927 markom
 
709
  /* Type 0 joining
710
     1. link between pred & succ
711
     2. no memory accesses in succ
712
     3. optional pred's second successors
713
     4. max. one succ's successors */
714
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
715 932 markom
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START
716
     && f->bb[i].prev[1] < 0 /* one predecessor */
717 927 markom
     && f->bb[i].next[1] < 0 /* max. one successor */
718
     && f->bb[i].nmemory == 0) {                  /* and no memory acceses */
719
      join_bb (f, f->bb[i].prev[0], i, 0);
720 931 markom
      modified = 1;
721 927 markom
      goto remove_lrbb;
722
    }
723 879 markom
 
724
  /* Type 2 joining
725
     1. link between pred & succ
726
     2. succ has exactly one predeccessor
727
     3. pred & succ share common successor
728
     4. optional succ's second successor */
729
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD))
730
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[1] < 0) { /* one predecessor */
731
      int p = f->bb[i].prev[0];
732 932 markom
      if (p == BBID_START) continue;
733 905 markom
#if 0 /* not yet supported */
734
      if (f->bb[p].next[0] == i
735
       && (f->bb[i].next[1] == f->bb[p].next[1]
736
        || f->bb[i].next[1] == f->bb[p].next[0])) {
737
        join_bb (f, p, i, 2);
738 897 markom
        goto remove_lrbb;
739
      }
740 905 markom
#endif
741
      if (f->bb[p].next[1] == i
742
       && (f->bb[i].next[0] == f->bb[p].next[1]
743
        || f->bb[i].next[0] == f->bb[p].next[0])) {
744
        join_bb (f, p, i, 2);
745 931 markom
        modified = 1;
746 905 markom
        goto remove_lrbb;
747
      }
748 879 markom
    }
749 931 markom
  return modified;
750 879 markom
}
751
 
752
/* Removes BBs marked as dead */
753 931 markom
int remove_dead_bb (cuc_func *f)
754 879 markom
{
755
  int i, j, k, d = 0;
756
 
757
  for (i = 0; i < f->num_bb; i++) if (f->bb[i].type & BB_DEAD) {
758
    if (f->bb[i].insn) free (f->bb[i].insn);
759
    f->bb[i].insn = NULL;
760
    reloc[i] = -1;
761
  } else {
762
    reloc[i] = d;
763
    f->bb[d++] = f->bb[i];
764
  }
765 931 markom
  if (f->num_bb == d) return 0;
766 879 markom
  f->num_bb = d;
767
 
768
  /* relocate initial blocks */
769
  for (i = 0; i < f->num_init_bb; i++)
770
    f->init_bb_reloc[i] = reloc[f->init_bb_reloc[i]];
771
 
772
  /* repair references */
773
  for (i = 0; i < f->num_bb; i++) if (!(f->bb[i].type & BB_DEAD)) {
774 925 markom
          printf ("%x %x %x %x %x\n", i, f->bb[i].prev[0], f->bb[i].prev[1], f->bb[i].next[0], f->bb[i].next[1]);
775
          fflush (stdout);
776 932 markom
    if (f->bb[i].prev[0] >= 0 && f->bb[i].prev[0] != BBID_START)
777
      assert ((f->bb[i].prev[0] = reloc[f->bb[i].prev[0]]) >= 0);
778
    if (f->bb[i].prev[1] >= 0 && f->bb[i].prev[1] != BBID_START)
779
      assert ((f->bb[i].prev[1] = reloc[f->bb[i].prev[1]]) >= 0);
780 925 markom
    if (f->bb[i].next[0] >= 0 && f->bb[i].next[0] != BBID_END)
781
      assert ((f->bb[i].next[0] = reloc[f->bb[i].next[0]]) >= 0);
782
    if (f->bb[i].next[1] >= 0 && f->bb[i].next[1] != BBID_END)
783
      assert ((f->bb[i].next[1] = reloc[f->bb[i].next[1]]) >= 0);
784 879 markom
    if (f->bb[i].prev[0] == f->bb[i].prev[1]) f->bb[i].prev[1] = -1;
785
    if (f->bb[i].next[0] == f->bb[i].next[1]) f->bb[i].next[1] = -1;
786
 
787
    for (j = 0; j < f->bb[i].ninsn; j++)
788
      for (k = 0; k < MAX_OPERANDS; k++)
789 925 markom
        if (f->bb[i].insn[j].opt[k] & OPT_BB && (signed)f->bb[i].insn[j].op[k] >= 0) {
790
          if (f->bb[i].insn[j].op[k] != BBID_END)
791
            assert ((f->bb[i].insn[j].op[k] = reloc[f->bb[i].insn[j].op[k]]) >= 0);
792
        } else if (f->bb[i].insn[j].opt[k] & OPT_REF) {
793 879 markom
          int t = f->bb[i].insn[j].op[k];
794
          assert (reloc[REF_BB(t)] >= 0);
795
          f->bb[i].insn[j].op[k] = REF (reloc[REF_BB(t)], REF_I (t));
796
        }
797
  }
798 931 markom
  return 1;
799 879 markom
}
800
 
801
/* Recursive calculation of dependencies */
802
static int reg_dep_rec (cuc_func *f, int cur)
803
{
804
  int i, j;
805
  cuc_insn *insn = f->bb[cur].insn;
806
 
807
  //printf ("\n %i", cur); 
808
  /* Spread only, do not loop */
809
  if (f->bb[cur].tmp) return;
810
  f->bb[cur].tmp = 1;
811
  //printf ("!   ");
812
 
813
  for (i = 0; i < f->bb[cur].ninsn; i++) {
814
    /* Check for destination operand(s) */
815
    for (j = 0; j < MAX_OPERANDS; j++) if (insn[i].opt[j] & OPT_DEST)
816
      if ((insn[i].opt[j] & ~OPT_DEST) == OPT_REGISTER && (signed)insn[i].op[j] >= 0) {
817
        //printf ("%i:%i,%x ", insn[i].op[j], i, REF (cur, i));
818
        assert (insn[i].op[j] > 0 && insn[i].op[j] < MAX_REGS); /* r0 should never be dest */
819
        f->bb[cur].last_used_reg[insn[i].op[j]] = REF (cur, i);
820
      }
821
  }
822
 
823 925 markom
  if (f->bb[cur].next[0] >= 0 && f->bb[cur].next[0] != BBID_END)
824
    reg_dep_rec (f, f->bb[cur].next[0]);
825
  if (f->bb[cur].next[1] >= 0 && f->bb[cur].next[1] != BBID_END)
826
    reg_dep_rec (f, f->bb[cur].next[1]);
827 879 markom
}
828
 
829
/* Detect register dependencies */
830
void reg_dep (cuc_func *f)
831
{
832
  int i, b, c;
833
 
834
  /* Set dead blocks */
835
  for (b = 0; b < f->num_bb; b++) {
836
    f->bb[b].tmp = 0;
837
    for (i = 0; i < MAX_REGS; i++) f->bb[b].last_used_reg[i] = -1;
838
  }
839
 
840
  /* Start with first block and set dependecies of all reachable blocks */
841
  /* At the same time set last_used_regs */
842
  reg_dep_rec (f, 0);
843
 
844
  for (i = 0; i < f->num_bb; i++)
845
    if (f->bb[i].tmp) f->bb[i].tmp = 0;
846
    else f->bb[i].type |= BB_DEAD;
847
 
848
  /* Detect loops; mark BBs where loops must be broken */
849
  for (c = 0; c < f->num_bb; c++) {
850
    int min = 3, minb;
851
 
852
    /* search though all non-visited for minimum number of unvisited predecessors */
853
    for (b = 0; b < f->num_bb; b++) if (!f->bb[b].tmp) {
854
      int tmp = 0;
855 932 markom
      if (f->bb[b].prev[0] >= 0 && f->bb[b].prev[0] != BBID_START
856
       && !f->bb[f->bb[b].prev[0]].tmp) tmp++;
857
      if (f->bb[b].prev[1] >= 0 && f->bb[b].prev[1] != BBID_START
858
       && !f->bb[f->bb[b].prev[1]].tmp) tmp++;
859 879 markom
      if (tmp < min) {
860
        minb = b;
861
        min = tmp;
862
        if (tmp == 0) break; /* We already have the best one */
863
      }
864
    }
865
    b = minb;
866
    f->bb[b].tmp = 1; /* Mark visited */
867 883 markom
    cucdebug (3, "minb %i min %i\n", minb, min);
868 879 markom
    if (min) { /* We just broke the loop */
869
      f->bb[b].type |= BB_INLOOP;
870
    }
871
  }
872
 
873
  /* Set real predecessors in cmov instructions to previous blocks */
874
  for (b = 0; b < f->num_bb; b++)
875
    for (i = 1; i < MAX_REGS - 1; i++) {
876
      int pa, pb;
877
      assert (f->bb[b].insn[i].index ==  II_CMOV);
878
      assert (f->bb[b].insn[i].opt[0] == OPT_REGISTER | OPT_DEST);
879
      assert (f->bb[b].insn[i].op[0] == i);
880 932 markom
      if (f->bb[b].prev[0] < 0 || f->bb[b].prev[0] == BBID_START) pa = -1;
881 879 markom
      else pa = f->bb[f->bb[b].prev[0]].last_used_reg[i];
882 932 markom
      if (f->bb[b].prev[1] < 0 || f->bb[b].prev[1] == BBID_START) pb = -1;
883 879 markom
      else pb = f->bb[f->bb[b].prev[1]].last_used_reg[i];
884
 
885
      /* We do some very simple optimizations right away to make things more readable */
886
      if (pa < 0 && pb < 0) {
887
        /* Was not used at all */
888
        change_insn_type (&f->bb[b].insn[i], II_ADD);
889
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
890
        f->bb[b].insn[i].opt[3] = OPT_NONE;
891
      } else if (pa < 0) {
892
        change_insn_type (&f->bb[b].insn[i], II_ADD);
893
        assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST));
894
        f->bb[b].insn[i].op[1] = pb; f->bb[b].insn[i].opt[1] = OPT_REF;
895
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
896
        f->bb[b].insn[i].opt[3] = OPT_NONE;
897
      } else if (pb < 0) {
898
        change_insn_type (&f->bb[b].insn[i], II_ADD);
899
        assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST));
900
        f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF;
901
        f->bb[b].insn[i].op[2] = 0; f->bb[b].insn[i].opt[2] = OPT_CONST;
902
        f->bb[b].insn[i].opt[3] = OPT_NONE;
903
      } else {
904
        int t = REF (b, 0); /* lrbb should be first instruction */
905
        assert (f->INSN(t).index == II_LRBB);
906
 
907
        f->bb[b].insn[i].op[1] = pa; f->bb[b].insn[i].opt[1] = OPT_REF;
908
        assert (f->INSN(pa).opt[0] == (OPT_REGISTER | OPT_DEST));
909
 
910
        f->bb[b].insn[i].op[2] = pb; f->bb[b].insn[i].opt[2] = OPT_REF;
911
        assert (f->INSN(pb).opt[0] == (OPT_REGISTER | OPT_DEST));
912
 
913
        /* Update op[3] -- flag register */
914
        assert (f->bb[b].insn[i].opt[3] == OPT_REGISTER);
915
        assert (f->bb[b].insn[i].op[3] == LRBB_REG);
916
        assert (t >= 0);
917
        f->bb[b].insn[i].opt[3] = OPT_REF; /* Convert already used regs to references */
918
        f->bb[b].insn[i].op[3] = t;
919
        assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST));
920
      }
921
    }
922
 
923
  /* assign register references */
924
  for (b = 0; b < f->num_bb; b++) {
925
    /* rebuild last used reg array */
926
    f->bb[b].last_used_reg[0] = -1;
927
    if (f->bb[b].insn[0].index == II_LRBB) f->bb[b].last_used_reg[LRBB_REG] = 0;
928
    else f->bb[b].last_used_reg[LRBB_REG] = -1;
929
 
930
    for (i = 1; i < MAX_REGS - 1; i++)
931
      f->bb[b].last_used_reg[i] = -1;
932
 
933
    /* Create references */
934
    for (i = 0; i < f->bb[b].ninsn; i++) {
935
      int k;
936
      /* Check for source operands first */
937
      for (k = 0; k < MAX_OPERANDS; k++) {
938
        if (!(f->bb[b].insn[i].opt[k] & OPT_DEST))
939
        if (f->bb[b].insn[i].opt[k] & OPT_REGISTER) {
940
          int t = f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]];
941
 
942
          if (f->bb[b].insn[i].op[k] == 0) { /* Convert r0 to const0 */
943
            f->bb[b].insn[i].opt[k] = OPT_CONST;
944
            f->bb[b].insn[i].op[k] = 0;
945
          } else if (t >= 0) {
946
            f->bb[b].insn[i].opt[k] = OPT_REF; /* Convert already used regs to references */
947
            f->bb[b].insn[i].op[k] = t;
948
            assert (f->INSN(t).opt[0] == (OPT_REGISTER | OPT_DEST));
949
            //f->INSN(t).op[0] = -1;
950
          }
951
        } else if (f->bb[b].insn[i].opt[k] & OPT_REF) {
952
          //f->INSN(f->bb[b].insn[i].op[k]).op[0] = -1; /* Mark referenced */
953
          f->INSN(f->bb[b].insn[i].op[k]).type &= ~IT_UNUSED;
954
        }
955
      }
956
 
957
      /* Now check for destination operand(s) */
958
      for (k = 0; k < MAX_OPERANDS; k++) if (f->bb[b].insn[i].opt[k] & OPT_DEST)
959
        if ((f->bb[b].insn[i].opt[k] & ~OPT_DEST) == OPT_REGISTER
960
          && (int)f->bb[b].insn[i].op[k] >= 0) {
961
          int t = f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]];
962
          assert (f->bb[b].insn[i].op[k] != 0); /* r0 should never be dest */
963
          f->bb[b].last_used_reg[f->bb[b].insn[i].op[k]] = REF (b, i);
964
        }
965
    }
966
  }
967
 
968
  /* Remove all unused lrbb */
969
  for (b = 0; b < f->num_bb; b++)
970
    for (i = 0; i < f->bb[b].ninsn; i++)
971
      if (f->bb[b].insn[i].type & IT_UNUSED) change_insn_type (&f->bb[b].insn[i], II_NOP);
972
 
973
  /* SSAs with final register value are marked as outputs */
974
  assert (f->bb[f->num_bb - 1].type & BB_END);
975
  for (i = 0; i < MAX_REGS; i++) if (!call_saved[i]) {
976
    int t = f->bb[f->num_bb - 1].last_used_reg[i];
977
    /* Mark them volatile, so optimizer does not remove them */
978
    if (t >= 0) f->bb[REF_BB(t)].insn[REF_I(t)].type |= IT_OUTPUT;
979
  }
980
}
981
 
982 897 markom
/* split the BB, based on the group numbers in .tmp */
983
void expand_bb (cuc_func *f, int b)
984
{
985
  int n = f->num_bb;
986
  int mg = 0;
987
  int b1, i, j;
988
 
989
  for (i = 0; i < f->bb[b].ninsn; i++)
990
    if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp;
991
 
992
  /* Create copies */
993
  for (b1 = 1; b1 <= mg; b1++) {
994
    assert (f->num_bb < MAX_BB);
995
    cpy_bb (&f->bb[f->num_bb], &f->bb[b]);
996
    f->num_bb++;
997
  }
998
 
999
  /* Relocate */
1000
  for (b1 = 0; b1 < f->num_bb; b1++)
1001
    for (i = 0; i < f->bb[b1].ninsn; i++) {
1002
      dep_list *d = f->bb[b1].insn[i].dep;
1003
      for (j = 0; j < MAX_OPERANDS; j++)
1004
        if (f->bb[b1].insn[i].opt[j] & OPT_REF) {
1005
          int t = f->bb[b1].insn[i].op[j];
1006
          if (REF_BB(t) == b && f->INSN(t).tmp != 0)
1007
            f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t));
1008
        }
1009
      while (d) {
1010
        if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0)
1011
          d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref));
1012
        d = d->next;
1013
      }
1014
    }
1015
 
1016
  /* Delete unused instructions */
1017
  for (j = 0; j <= mg; j++) {
1018
    if (j == 0) b1 = b;
1019
    else b1 = n + j - 1;
1020
    for (i = 0; i < f->bb[b1].ninsn; i++) {
1021
      if (f->bb[b1].insn[i].tmp != j)
1022
        change_insn_type (&f->bb[b1].insn[i], II_NOP);
1023
      f->bb[b1].insn[i].tmp = 0;
1024
    }
1025
    if (j < mg) {
1026
      f->bb[b1].next[0] = n + j;
1027
      f->bb[b1].next[1] = -1;
1028
      f->bb[n + j].prev[0] = b1;
1029
      f->bb[n + j].prev[1] = -1;
1030
    } else {
1031
      i = f->bb[b1].next[0];
1032
      f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1;
1033
      f->bb[n + j].prev[1] = -1;
1034 925 markom
      if (i >= 0 && i != BBID_END) {
1035 897 markom
        if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
1036
        if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
1037
      }
1038
      i = f->bb[b1].next[1];
1039 925 markom
      if (i >= 0 && i != BBID_END) {
1040 897 markom
        if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
1041
        if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
1042
      }
1043
    }
1044
  }
1045
}
1046
 
1047 879 markom
/* Scans sequence of BBs and set bb[].cnt */
1048
void generate_bb_seq (cuc_func *f, char *mp_filename, char *bb_filename)
1049
{
1050
  FILE *fi, *fo;
1051
  struct mprofentry_struct *buf;
1052
  const int bufsize = 256;
1053
  unsigned long *bb_start;
1054
  unsigned long *bb_end;
1055
  int b, i, r;
1056
  int curbb, prevbb = -1;
1057
  unsigned long addr = -1;
1058
  unsigned long prevaddr = -1;
1059 897 markom
  int mssum = 0;
1060
  int mlsum = 0;
1061
  int mscnt = 0;
1062
  int mlcnt = 0;
1063 879 markom
 
1064
  assert (fi = fopen (mp_filename, "rb"));
1065
  assert (fo = fopen (bb_filename, "wb+"));
1066
 
1067
  assert (bb_start = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb));
1068
  assert (bb_end = (unsigned long *) malloc (sizeof (unsigned long) * f->num_bb));
1069
  for (b = 0; b < f->num_bb; b++) {
1070
    bb_start[b] = f->start_addr + f->bb[b].first * 4;
1071
    bb_end[b] = f->start_addr + f->bb[b].last * 4;
1072
    //printf ("%i %x %x\n", b, bb_start[b], bb_end[b]);
1073
    f->bb[0].cnt = 0;
1074
  }
1075
 
1076
  buf = (struct mprofentry_struct *) malloc (sizeof (struct mprofentry_struct) * bufsize);
1077
  assert (buf);
1078
 
1079
  //printf ("BBSEQ:\n");
1080
  do {
1081
    r = fread (buf, sizeof (struct mprofentry_struct), bufsize, fi);
1082
    //printf ("r%i : ", r);
1083
    for (i = 0; i < r; i++) {
1084
      if (buf[i].type & MPROF_FETCH) {
1085
        //printf ("%x, ", buf[i].addr);
1086
        if (buf[i].addr >= f->start_addr && buf[i].addr <= f->end_addr) {
1087
          assert (buf[i].type & MPROF_32);
1088
          prevaddr = addr;
1089
          addr = buf[i].addr;
1090
          for (b = 0; b < f->num_bb; b++)
1091
            if (bb_start[b] <= addr && addr <= bb_end[b]) break;
1092
          assert (b < f->num_bb);
1093
          curbb = b;
1094
          if (prevaddr + 4 != addr) prevbb = -1;
1095
        } else curbb = -1;
1096
 
1097
#warning TODO: do not count interrupts
1098
        if (curbb != prevbb && curbb >= 0) {
1099
          fwrite (&curbb, sizeof (unsigned long), 1, fo);
1100
          //printf (" [%i] ", curbb);
1101
          f->bb[curbb].cnt++;
1102
          prevbb = curbb;
1103
        }
1104 897 markom
      } else {
1105
        if (verify_memoryarea(buf[i].addr))
1106
          if (buf[i].type & MPROF_WRITE) mscnt++, mssum += cur_area->delayw;
1107
          else mlcnt++, mlsum += cur_area->delayw;
1108 879 markom
      }
1109
    }
1110
    //printf ("\n");
1111
  } while (r == bufsize);
1112
  //printf ("\n");
1113
 
1114 897 markom
  runtime.cuc.mdelay[0] = (1. * mlsum) / mlcnt;
1115
  runtime.cuc.mdelay[1] = (1. * mlsum) / mlcnt;
1116
  runtime.cuc.mdelay[2] = runtime.cuc.mdelay[3] = 1;
1117 883 markom
  f->num_runs = f->bb[0].cnt;
1118 879 markom
  fclose (fi);
1119
  fclose (fo);
1120
  free (buf);
1121
  free (bb_end);
1122
  free (bb_start);
1123
 
1124
  /* Initialize basic block relocations */
1125
  f->num_init_bb = f->num_bb;
1126
  //printf ("num_init_bb = %i\n", f->num_init_bb);
1127
  assert (f->init_bb_reloc = (int *)malloc (sizeof (int) * f->num_init_bb));
1128
  for (b = 0; b < f->num_init_bb; b++) f->init_bb_reloc[b] = b;
1129
}
1130
 
1131
/* Scans sequence of BBs and set counts for pre/unrolled loop for BB b */
1132
void count_bb_seq (cuc_func *f, int b, char *bb_filename, int *counts, int preroll, int unroll)
1133
{
1134
  FILE *fi;
1135
  const int bufsize = 256;
1136
  int i, r;
1137
  int *buf;
1138
  int cnt = 0;
1139
  int times = preroll - 1 + unroll;
1140
 
1141
  assert (fi = fopen (bb_filename, "rb"));
1142
  for (i = 0; i < times; i++) counts[i] = 0;
1143
  assert (buf = (int *) malloc (sizeof (int) * bufsize));
1144
 
1145
  do {
1146
    r = fread (buf, sizeof (int), bufsize, fi);
1147
    for (i = 0; i < r; i++) {
1148
      /* count consecutive acesses */
1149
      if (f->init_bb_reloc[buf[i]] == b) {
1150
        counts[cnt]++;
1151
        if (++cnt >= times) cnt = preroll - 1;
1152
      } else cnt = 0;
1153
    }
1154
  } while (r == bufsize);
1155
 
1156
  log ("Counts %i,%i :", preroll, unroll);
1157
  for (i = 0; i < times; i++) log ("%x ", counts[i]);
1158
  log ("\n");
1159
 
1160
  fclose (fi);
1161
  free (buf);
1162
}
1163
 
1164
/* relocate all accesses inside of BB b to back/fwd */
1165
static void relocate_bb (cuc_bb *bb, int b, int back, int fwd)
1166
{
1167
  int i, j;
1168
  for (i = 0; i < bb->ninsn; i++)
1169
    for (j = 0; j < MAX_OPERANDS; j++)
1170
      if (bb->insn[i].opt[j] & OPT_REF
1171
       && REF_BB (bb->insn[i].op[j]) == b) {
1172
        int t = REF_I (bb->insn[i].op[j]);
1173
        if (t < i) bb->insn[i].op[j] = REF (back, t);
1174
        else bb->insn[i].op[j] = REF (fwd, t);
1175
      }
1176
}
1177
 
1178
/* Unroll loop b unroll times and return new function. Original
1179
   function is unmodified. */
1180
static cuc_func *unroll_loop (cuc_func *f, int b, int unroll)
1181
{
1182
  int b1, t, i, j, prevb, prevart_b;
1183
  cuc_func *n = dup_func (f);
1184
  cuc_bb *ob = &f->bb[b];
1185
  cuc_insn *ii;
1186
 
1187
  assert (unroll > 1);
1188
  //printf ("unroll BB%i x %i (num_bb %i)\n", b, unroll, n->num_bb);
1189
  unroll--;
1190
  assert (n->num_bb + unroll * 2 < MAX_BB);
1191
 
1192
  prevb = b;
1193
  prevart_b = b;
1194
  /* Duplicate the BB */
1195
  for (t = 0; t < unroll; t++) {
1196
    cuc_bb *pb = &n->bb[prevart_b];
1197
    /* Add new block and set links */
1198
    b1 = n->num_bb++;
1199
    cpy_bb (&n->bb[b1], ob);
1200
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
1201
    n->bb[b1].type &= ~(BB_END | BB_INLOOP);
1202
 
1203
    /* Set predecessor's successor */
1204
    if (n->bb[prevb].next[0] == b) {
1205
      n->bb[prevb].next[0] = b1;
1206
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
1207
      else pb->next[1] = b1 + 1;
1208
      n->bb[b1].next[1] = b1 + 1;
1209
    } else if (n->bb[prevb].next[1] == b) {
1210
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
1211
      else pb->next[1] = b1 + 1;
1212
      n->bb[b1].next[0] = b1 + 1;
1213
      n->bb[prevb].next[1] = b1;
1214
    } else assert (0);
1215
 
1216
    /* Set predecessor */
1217
    n->bb[b1].prev[0] = prevb;
1218
    n->bb[b1].prev[1] = -1;
1219
 
1220
    /* Relocate backward references to current instance and forward references
1221
       to previous one */
1222
    relocate_bb (&n->bb[b1], b, b1, prevb);
1223
 
1224
    /* add artificial block, just to join accesses */
1225
    b1 = n->num_bb++;
1226
    cpy_bb (&n->bb[b1], ob);
1227
    n->bb[b1].cnt = 0;
1228
 
1229
    for (i = 0; i < ob->ninsn - 1; i++) {
1230
      ii = &n->bb[b1].insn[i];
1231
      if (ob->insn[i].opt[0] & OPT_DEST) {
1232
        change_insn_type (ii, II_CMOV);
1233
        ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
1234
        ii->op[1] = REF (prevart_b, i); ii->opt[1] = OPT_REF;
1235
        ii->op[2] = REF (b1 - 1, i); ii->opt[2] = OPT_REF;
1236
 
1237
        /* Take left one, if we should have finished the first iteration*/
1238
        if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
1239
          ii->op[3] = pb->insn[pb->ninsn - 1].op[1]; ii->opt[3] = pb->insn[pb->ninsn - 1].opt[1];
1240
        } else {
1241
          assert (pb->insn[pb->ninsn - 1].type & IT_COND);
1242
          ii->op[3] = REF (prevart_b, pb->ninsn - 1); ii->opt[3] = OPT_REF;
1243
        }
1244
        ii->dep = NULL;
1245 930 markom
        ii->type = ob->insn[i].type & IT_COND;
1246 879 markom
      } else {
1247
        change_insn_type (ii, II_NOP);
1248
      }
1249
    }
1250
 
1251 928 markom
    /* Add conditional or instruction at the end, prioritizing flags */
1252 879 markom
    ii = &n->bb[b1].insn[ob->ninsn - 1];
1253 928 markom
    change_insn_type (ii, II_CMOV);
1254 879 markom
    ii->op[0] = FLAG_REG; ii->opt[0] = OPT_REGISTER | OPT_DEST;
1255
    if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
1256
      ii->op[1] = pb->insn[pb->ninsn - 1].op[1];
1257
      ii->opt[1] = pb->insn[pb->ninsn - 1].opt[1];
1258
    } else {
1259
      ii->op[1] = REF (prevart_b, pb->ninsn - 1);
1260
      ii->opt[1] = OPT_REF;
1261
    }
1262
    if (n->bb[b1 - 1].insn[pb->ninsn - 1].type & IT_BRANCH) {
1263
      ii->op[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].op[1];
1264
      ii->opt[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].opt[1];
1265
    } else {
1266
      ii->op[2] = REF (b1 - 1, pb->ninsn - 1);
1267
      ii->opt[2] = OPT_REF;
1268
    }
1269 928 markom
    /* {z = x || y;} is same as {z = x ? x : y;} */
1270
    ii->op[3] = ii->op[1]; ii->opt[3] = ii->opt[1];
1271 930 markom
    ii->type |= IT_COND;
1272 879 markom
 
1273
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
1274
    n->bb[b1].type &= ~(BB_END | BB_INLOOP);
1275
    n->bb[b1].prev[0] = prevart_b;
1276
    n->bb[b1].prev[1] = b1 - 1;
1277 925 markom
    n->bb[b1].next[0] = -1;
1278 879 markom
    n->bb[b1].next[1] = -1;
1279
 
1280
    prevb = b1 - 1;
1281
    prevart_b = b1;
1282
  }
1283
  if (ob->type & BB_END) {
1284
    n->bb[prevart_b].type |= BB_END;
1285
    n->bb[b].type &= ~BB_END;
1286
  }
1287
 
1288 925 markom
  n->bb[prevart_b].next[0] = ob->next[0] == b ? ob->next[1] : ob->next[0];
1289 879 markom
  //print_cuc_bb (n, "unroll1");
1290
  /* repair BB after loop, to point back to latest artificial BB */
1291
  b1 = n->bb[prevart_b].next[0];
1292 925 markom
  if (b1 >= 0 && b1 != BBID_END) {
1293 897 markom
    if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = prevart_b;
1294
    else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = prevart_b;
1295 879 markom
    else assert (0);
1296
  }
1297
 
1298
  /* Relink back to start of the loop */
1299
  /* Set predecessor's successor */
1300
  if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = b;
1301
  else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = b;
1302
  else assert (0);
1303
 
1304
  /* Set predecessor */
1305
  if (n->bb[b].prev[0] == b) n->bb[b].prev[0] = prevb;
1306
  else if (n->bb[b].prev[1] == b) n->bb[b].prev[1] = prevb;
1307
  else assert (0);
1308
 
1309
  //print_cuc_bb (n, "unroll2");
1310
 
1311
  /* Relocate backward references to current instance and forward references
1312
     to previous one */
1313
  relocate_bb (&n->bb[b], b, b, prevb);
1314
 
1315
  /* Relocate all other blocks to point to latest prevart_b */
1316
  for (i = 0; i < f->num_bb; i++)
1317
    if (i != b) relocate_bb (&n->bb[i], b, prevart_b, prevart_b);
1318
 
1319
  return n;
1320
}
1321
 
1322
/* Preroll loop b preroll times and return new function. Original
1323
   function is unmodified. */
1324
static cuc_func *preroll_loop (cuc_func *f, int b, int preroll)
1325
{
1326
  int b1, t, i, j, prevb, prevart_b;
1327
  cuc_func *n = dup_func (f);
1328
  cuc_bb *ob = &f->bb[b];
1329
  cuc_insn *ii;
1330
 
1331
  assert (preroll > 1);
1332
  //printf ("preroll BB%i x %i (num_bb %i)\n", b, preroll, n->num_bb);
1333
  preroll--;
1334
  assert (n->num_bb + preroll * 2 < MAX_BB);
1335
 
1336
  prevb = b;
1337
  prevart_b = b;
1338
  /* Duplicate the BB */
1339
  for (t = 0; t < preroll; t++) {
1340
    cuc_bb *pb = &n->bb[prevart_b];
1341
    /* Add new block and set links */
1342
    b1 = n->num_bb++;
1343
    cpy_bb (&n->bb[b1], ob);
1344
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
1345
    n->bb[b1].type &= ~(BB_END | BB_INLOOP);
1346
 
1347
    /* Set predecessor's successor */
1348
    if (n->bb[prevb].next[0] == b) {
1349
      n->bb[prevb].next[0] = b1;
1350
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
1351
      else pb->next[1] = b1 + 1;
1352
      n->bb[b1].next[1] = b1 + 1;
1353
    } else if (n->bb[prevb].next[1] == b) {
1354
      if (pb->next[0] < 0) pb->next[0] = b1 + 1;
1355
      else pb->next[1] = b1 + 1;
1356
      n->bb[b1].next[0] = b1 + 1;
1357
      n->bb[prevb].next[1] = b1;
1358
    } else assert (0);
1359
 
1360
    /* Set predecessor */
1361
    n->bb[b1].prev[0] = prevb;
1362
    n->bb[b1].prev[1] = -1;
1363
 
1364
    /* Relocate backward references to current instance and forward references
1365
       to previous one */
1366
    relocate_bb (&n->bb[b1], b, b1, prevb);
1367
 
1368
    /* add artificial block, just to join accesses */
1369
    b1 = n->num_bb++;
1370
    cpy_bb (&n->bb[b1], ob);
1371
    n->bb[b1].cnt = 0;
1372
 
1373
    for (i = 0; i < ob->ninsn - 1; i++) {
1374
      ii = &n->bb[b1].insn[i];
1375
      if (ob->insn[i].opt[0] & OPT_DEST) {
1376
        change_insn_type (ii, II_CMOV);
1377
        ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
1378
        ii->op[1] = REF (prevart_b, i); ii->opt[1] = OPT_REF;
1379
        ii->op[2] = REF (b1 - 1, i); ii->opt[2] = OPT_REF;
1380
 
1381
        /* Take left one, if we should have finished the first iteration*/
1382
        if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
1383
          ii->op[3] = pb->insn[pb->ninsn - 1].op[1]; ii->opt[3] = pb->insn[pb->ninsn - 1].opt[1];
1384
        } else {
1385
          assert (pb->insn[pb->ninsn - 1].type & IT_COND);
1386
          ii->op[3] = REF (prevart_b, pb->ninsn - 1); ii->opt[3] = OPT_REF;
1387
        }
1388
        ii->dep = NULL;
1389 930 markom
        ii->type = ob->insn[i].type & IT_COND;
1390 879 markom
      } else {
1391
        change_insn_type (ii, II_NOP);
1392
      }
1393
    }
1394
 
1395 928 markom
    /* Add conditional or instruction at the end, prioritizing flags */
1396 879 markom
    ii = &n->bb[b1].insn[ob->ninsn - 1];
1397 928 markom
    change_insn_type (ii, II_CMOV);
1398 879 markom
    ii->op[0] = FLAG_REG; ii->opt[0] = OPT_REGISTER | OPT_DEST;
1399
    if (pb->insn[pb->ninsn - 1].type & IT_BRANCH) {
1400
      ii->op[1] = pb->insn[pb->ninsn - 1].op[1];
1401
      ii->opt[1] = pb->insn[pb->ninsn - 1].opt[1];
1402
    } else {
1403
      ii->op[1] = REF (prevart_b, pb->ninsn - 1);
1404
      ii->opt[1] = OPT_REF;
1405
    }
1406
    if (n->bb[b1 - 1].insn[pb->ninsn - 1].type & IT_BRANCH) {
1407
      ii->op[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].op[1];
1408
      ii->opt[2] = n->bb[b1 - 1].insn[pb->ninsn - 1].opt[1];
1409
    } else {
1410
      ii->op[2] = REF (b1 - 1, pb->ninsn - 1);
1411
      ii->opt[2] = OPT_REF;
1412
    }
1413 928 markom
    /* {z = x || y;} is same as {z = x ? x : y;} */
1414
    ii->op[3] = ii->op[1]; ii->opt[3] = ii->opt[1];
1415 930 markom
    ii->type |= IT_COND;
1416 879 markom
 
1417
    /* Only one should be in loop, so we remove any INLOOP flags from duplicates */
1418
    n->bb[b1].type &= ~(BB_END | BB_INLOOP);
1419
    n->bb[b1].prev[0] = prevart_b;
1420
    n->bb[b1].prev[1] = b1 - 1;
1421 925 markom
    n->bb[b1].next[0] = -1;
1422 879 markom
    n->bb[b1].next[1] = -1;
1423
 
1424
    prevb = b1 - 1;
1425
    prevart_b = b1;
1426
  }
1427
  if (ob->type & BB_END) {
1428
    n->bb[prevart_b].type |= BB_END;
1429
    n->bb[b].type &= ~BB_END;
1430
  }
1431 925 markom
  n->bb[prevart_b].next[0] = ob->next[0] == b ? ob->next[1] : ob->next[0];
1432 879 markom
 
1433
  //print_cuc_bb (n, "preroll1");
1434
  /* repair BB after loop, to point back to latest artificial BB */
1435
  b1 = n->bb[prevart_b].next[0];
1436 925 markom
  if (b1 >= 0 && b1 != BBID_END) {
1437 897 markom
    if (n->bb[b1].prev[0] == b) n->bb[b1].prev[0] = prevart_b;
1438
    else if (n->bb[b1].prev[1] == b) n->bb[b1].prev[1] = prevart_b;
1439 879 markom
    else assert (0);
1440
  }
1441
 
1442
  /* Relink to itself */
1443
  /* Set predecessor's successor */
1444
  if (n->bb[prevb].next[0] == b) n->bb[prevb].next[0] = prevb;
1445
  else if (n->bb[prevb].next[1] == b) n->bb[prevb].next[1] = prevb;
1446
  else assert (0);
1447
  n->bb[prevb].prev[1] = prevb;
1448
 
1449
  if (n->bb[b].prev[0] == b) {
1450
    n->bb[b].prev[0] = n->bb[b].prev[1];
1451
    n->bb[b].prev[1] = -1;
1452
  } else if (n->bb[b].prev[1] == b) {
1453
    n->bb[b].prev[1] = -1;
1454
  }
1455
 
1456
  //print_cuc_bb (n, "preroll2");
1457
 
1458
  /* Relocate backward references to current instance and forward references
1459
     to previous one */
1460
  relocate_bb (&n->bb[b], b, b, prevb);
1461
 
1462
  /* Relocate all other blocks to point to latest prevart_b */
1463
  for (i = 0; i < f->num_bb; i++)
1464
    if (i != b) relocate_bb (&n->bb[i], b, prevart_b, prevart_b);
1465
 
1466
  return n;
1467
}
1468
 
1469
/* Unroll loop b unroll times and return new function. Original
1470
   function is unmodified. */
1471
cuc_func *preunroll_loop (cuc_func *f, int b, int preroll, int unroll, char *bb_filename)
1472
{
1473
  int b1, i;
1474
  cuc_func *n, *t;
1475
  int *counts;
1476
  int *bb_reloc;
1477
 
1478
  if (preroll > 1) {
1479
    t = preroll_loop (f, b, preroll);
1480
    b1 = t->num_bb - 2;
1481
    if (unroll > 1) {
1482
      //print_cuc_bb (t, "preunroll1");
1483
      n = unroll_loop (t, b1, unroll);
1484
      free_func (t);
1485
    } else n = t;
1486
  } else {
1487
    b1 = b;
1488 897 markom
    if (unroll > 1) n = unroll_loop (f, b1, unroll);
1489
    else return dup_func (f);
1490 879 markom
  }
1491
 
1492 897 markom
  /* Assign new counts to functions */
1493 879 markom
  assert (counts = (int *)malloc (sizeof (int) * (preroll - 1 + unroll)));
1494
  count_bb_seq (n, b, bb_filename, counts, preroll, unroll);
1495
  for (i = 0; i < preroll - 1 + unroll; i++) {
1496
    if (i == 0) b1 = b;
1497
    else b1 = f->num_bb + (i - 1) * 2;
1498
    n->bb[b1].cnt = counts[i];
1499
  }
1500
 
1501
  //print_cuc_bb (n, "preunroll");
1502
  free (counts);
1503
  return n;
1504
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.