OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [tags/] [nog_patch_61/] [or1ksim/] [cuc/] [insn.c] - Blame information for rev 1062

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 879 markom
/* insn.c -- OpenRISC Custom Unit Compiler, instruction support
2
 *    Copyright (C) 2002 Marko Mlinar, markom@opencores.org
3
 *
4
 *    This file is part of OpenRISC 1000 Architectural Simulator.
5
 *
6
 *    This program is free software; you can redistribute it and/or modify
7
 *    it under the terms of the GNU General Public License as published by
8
 *    the Free Software Foundation; either version 2 of the License, or
9
 *    (at your option) any later version.
10
 *
11
 *    This program is distributed in the hope that it will be useful,
12
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 *    GNU General Public License for more details.
15
 *
16
 *    You should have received a copy of the GNU General Public License
17
 *    along with this program; if not, write to the Free Software
18
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
19
 
20
#include <stdio.h>
21
#include <stdlib.h>
22
#include <stdarg.h>
23
#include <assert.h>
24 897 markom
#include "sim-config.h"
25 879 markom
#include "cuc.h"
26
#include "insn.h"
27
 
28
/* Table of known instructions.  Watch out for indexes I_*! */
29
const cuc_known_insn known[II_LAST + 1] = {
30
{"add", 1, "assign \1 = \2 + \3;"},
31
{"sub", 0, "assign \1 = \2 - \3;"},
32
{"and", 1, "assign \1 = \2 & \3;"},
33
{"or",  1, "assign \1 = \2 | \3;"},
34
{"xor", 1, "assign \1 = \2 ^ \3;"},
35
{"mul", 1, "assign \1 = \2 * \3;"},
36
 
37
{"srl", 0, "assign \1 = \2 >> \3;"},
38
{"sll", 0, "assign \1 = \2 << \3;"},
39
{"sra", 0, "assign \1 = ({32{\2[31]}} << (6'd32-{1'b0, \3}))\n\
40
                 | \2 >> \3;"},
41
 
42 926 markom
{"lb",  0, "always @(posedge clk)"},
43
{"lh",  0, "always @(posedge clk)"},
44
{"lw",  0, "always @(posedge clk)"},
45 879 markom
{"sb",  0, "/* mem8[\2] = \1 */"},
46
{"sh",  0, "/* mem16[\2] = \1 */"},
47
{"sw",  0, "/* mem32[\2] = \1 */"},
48
 
49
{"sfeq", 1, "assign \1 = \2 == \3;"},
50
{"sfne", 1, "assign \1 = \2 != \3;"},
51
{"sfle", 0, "assign \1 = \2 <= \3;"},
52
{"sflt", 0, "assign \1 = \2 < \3;"},
53 905 markom
{"sfge", 0, "assign \1 = \2 >= \3;"},
54 879 markom
{"sfgt", 0, "assign \1 = \2 > \3;"},
55
{"bf",  0, ""},
56
 
57
{"lrbb", 0,"always @(posedge clk or posedge rst)"},
58
{"cmov", 0,"assign \1 = \4 ? \2 : \3;"},
59 926 markom
{"reg", 0, "always @(posedge clk)"},
60 879 markom
 
61 937 markom
{"nop", 1, ""},
62 917 markom
{"call", 0, "/* function call */"}};
63 879 markom
 
64
/* Find known instruction and attach them to insn */
65
void change_insn_type (cuc_insn *i, int index)
66
{
67
  int j;
68
  assert (index >= 0 && index <= II_LAST);
69
  i->index = index;
70
  if (i->index == II_NOP) {
71
    for (j = 0; j < MAX_OPERANDS; j++) i->opt[j] = OPT_NONE;
72
    i->type = 0;
73
    i->dep = NULL;
74 937 markom
    i->disasm[0] = '\0';
75 879 markom
  }
76
}
77
 
78
/* Returns instruction name */
79
const char *cuc_insn_name (cuc_insn *ii) {
80
  if (ii->index < 0 || ii->index > II_LAST) return "???";
81
  else return known[ii->index].name;
82
}
83 883 markom
 
84 897 markom
/* Prints out instructions */
85 1062 markom
void print_insns (int bb, cuc_insn *insn, int ninsn, int verbose)
86 897 markom
{
87
  int i, j;
88
  for (i = 0; i < ninsn; i++) {
89 1062 markom
    char tmp[10];
90 897 markom
    dep_list *l = insn[i].dep;
91 1062 markom
    sprintf (tmp, "[%x_%x]", bb, i);
92
    PRINTF ("%-8s%c %-4s ", tmp, insn[i].index >= 0 ? ':' : '?', cuc_insn_name (&insn[i]));
93 897 markom
    if (verbose) {
94 997 markom
      PRINTF ("%-20s insn = %08x, index = %i, type = %04x ",
95 897 markom
                      insn[i].disasm, insn[i].insn, insn[i].index, insn[i].type);
96 997 markom
    } else PRINTF ("type = %04x ", insn[i].type);
97 897 markom
    for (j = 0; j < MAX_OPERANDS; j++) {
98 997 markom
      if (insn[i].opt[j] & OPT_DEST) PRINTF ("*");
99 897 markom
      switch (insn[i].opt[j] & ~OPT_DEST) {
100
        case OPT_NONE: break;
101 937 markom
        case OPT_CONST: if (insn[i].type & IT_COND && (insn[i].index == II_CMOV
102 997 markom
                         || insn[i].index == II_ADD)) PRINTF ("%x, ", insn[i].op[j]);
103
                        else PRINTF ("0x%08x, ", insn[i].op[j]); break;
104 1062 markom
        case OPT_JUMP: PRINTF ("J%x, ", insn[i].op[j]); break;
105 997 markom
        case OPT_REGISTER: PRINTF ("r%i, ", insn[i].op[j]); break;
106 1062 markom
        case OPT_REF: PRINTF ("[%x_%x], ", REF_BB(insn[i].op[j]), REF_I(insn[i].op[j])); break;
107 997 markom
        case OPT_BB: PRINTF ("BB "); print_bb_num (insn[i].op[j]); PRINTF (", "); break;
108
        case OPT_LRBB: PRINTF ("LRBB, "); break;
109 897 markom
        default:
110 1062 markom
          fprintf (stderr, "Invalid operand type %s(%x_%x) = %x\n",
111 897 markom
                         cuc_insn_name (&insn[i]), i, j, insn[i].opt[j]);
112
          assert (0);
113
      }
114
    }
115
    if (l) {
116 997 markom
      PRINTF ("\n\tdep:");
117 897 markom
      while (l) {
118 1062 markom
        PRINTF (" [%x_%x],", REF_BB (l->ref), REF_I (l->ref));
119 897 markom
        l = l->next;
120
      }
121
    }
122 997 markom
    PRINTF ("\n");
123 897 markom
  }
124
}
125
 
126
void add_dep (dep_list **list, int dep)
127
{
128
  dep_list *ndep;
129
  dep_list **tmp = list;
130
 
131
  while (*tmp) {
132
    if ((*tmp)->ref == dep) return; /* already there */
133
    tmp = &((*tmp)->next);
134
  }
135
  ndep = (dep_list *)malloc (sizeof (dep_list));
136
  ndep->ref = dep;
137
  ndep->next = NULL;
138
  *tmp = ndep;
139
}
140
 
141
void dispose_list (dep_list **list)
142
{
143
  while (*list) {
144
    dep_list *tmp = *list;
145
    *list = tmp->next;
146
    free (tmp);
147
  }
148
}
149
 
150
void add_data_dep (cuc_func *f)
151
{
152
  int b, i, j;
153
  dep_list *tmp;
154
  for (b = 0; b < f->num_bb; b++) {
155
    cuc_insn *insn = f->bb[b].insn;
156
    for (i = 0; i < f->bb[b].ninsn; i++)
157
      for (j = 0; j < MAX_OPERANDS; j++) {
158
        fflush (stdout);
159
        if (insn[i].opt[j] & OPT_REF) {
160
          /* Copy list from predecessor */
161
          dep_list *l = f->INSN(insn[i].op[j]).dep;
162
          while (l) {
163
            add_dep (&insn[i].dep, l->ref);
164
            l = l->next;
165
          }
166
          /* add predecessor */
167
          add_dep (&insn[i].dep, insn[i].op[j]);
168
        }
169
      }
170
  }
171
}
172
 
173 953 markom
/* Inserts n nops before insn 'ref' */
174
void insert_insns (cuc_func *f, int ref, int n)
175
{
176
  int b1, i, j;
177
  int b = REF_BB(ref);
178
  int ins = REF_I(ref);
179
 
180
  assert (b < f->num_bb);
181
  assert (ins <= f->bb[b].ninsn);
182
  assert (f->bb[b].ninsn + n < MAX_INSNS);
183 996 markom
  if (cuc_debug >= 8) print_cuc_bb (f, "PREINSERT");
184 953 markom
  f->bb[b].insn = (cuc_insn *) realloc (f->bb[b].insn,
185
                                        (f->bb[b].ninsn + n) * sizeof (cuc_insn));
186
 
187
  /* Set up relocations */
188
  for (i = 0; i < f->bb[b].ninsn; i++)
189
    if (i < ins) reloc[i] = i;
190
    else reloc[i] = i + n;
191
 
192
  /* Move instructions, based on relocations */
193
  for (i = f->bb[b].ninsn - 1; i >= 0; i--) f->bb[b].insn[reloc[i]] = f->bb[b].insn[i];
194
  for (i = 0; i < n; i++) change_insn_type (&f->bb[b].insn[ins + i], II_NOP);
195
 
196
  f->bb[b].ninsn += n;
197
  for (b1 = 0; b1 < f->num_bb; b1++) {
198
    dep_list *d = f->bb[b1].mdep;
199
    while (d) {
200
      if (REF_BB (d->ref) == b && REF_I (d->ref) >= ins)
201
        d->ref = REF (b, REF_I (d->ref) + n);
202
      d = d->next;
203
    }
204
    for (i = 0; i < f->bb[b1].ninsn; i++) {
205
      d = f->bb[b1].insn[i].dep;
206
      while (d) {
207
        if (REF_BB (d->ref) == b && REF_I (d->ref) >= ins)
208
          d->ref = REF (b, REF_I (d->ref) + n);
209
        d = d->next;
210
      }
211
      for (j = 0; j < MAX_OPERANDS; j++)
212
        if (f->bb[b1].insn[i].opt[j] & OPT_REF && REF_BB (f->bb[b1].insn[i].op[j]) == b
213
         && REF_I (f->bb[b1].insn[i].op[j]) >= ins)
214
          f->bb[b1].insn[i].op[j] = REF (b, REF_I (f->bb[b1].insn[i].op[j]) + n);
215
    }
216
  }
217
  for (i = 0; i < f->nmsched; i++)
218
    if (REF_BB(f->msched[i]) == b) f->msched[i] = REF (b, reloc[REF_I (f->msched[i])]);
219 996 markom
  if (cuc_debug >= 8) print_cuc_bb (f, "POSTINSERT");
220 953 markom
  cuc_check (f);
221
}
222
 
223 897 markom
/* returns nonzero, if instruction was simplified */
224
int apply_edge_condition (cuc_insn *ii)
225
{
226
  unsigned int c = ii->op[2];
227
 
228 931 markom
  switch (ii->index) {
229
  case II_AND:
230 897 markom
    if (ii->opt[2] & OPT_CONST && c == 0) {
231
      change_insn_type (ii, II_ADD);
232
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
233
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
234
      return 1;
235 932 markom
    } else if (ii->opt[2] & OPT_CONST && c == 0xffffffff) {
236
      change_insn_type (ii, II_ADD);
237
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
238
      return 1;
239 931 markom
    } else break;
240
  case II_OR:
241 932 markom
    if (ii->opt[2] & OPT_CONST && c == 0x0) {
242 897 markom
      change_insn_type (ii, II_ADD);
243
      ii->op[1] = c; ii->opt[1] = OPT_CONST;
244
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
245
      return 1;
246 932 markom
    } else if (ii->opt[2] & OPT_CONST && c == 0xffffffff) {
247
      change_insn_type (ii, II_ADD);
248
      ii->op[1] = 0xffffffff; ii->opt[1] = OPT_CONST;
249
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
250
      return 1;
251 931 markom
    } else break;
252
  case II_SUB:
253 897 markom
    if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) {
254
      change_insn_type (ii, II_ADD);
255
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
256
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
257
      return 1;
258 931 markom
    } else break;
259
  case II_MUL:
260 897 markom
    if (ii->opt[2] & OPT_CONST && c == 0) {
261
      change_insn_type (ii, II_ADD);
262
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
263
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
264
      return 1;
265
    } else
266
    if (ii->opt[2] & OPT_CONST && c == 1) {
267
      change_insn_type (ii, II_ADD);
268
      ii->op[1] = c; ii->opt[1] = OPT_CONST;
269
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
270
      return 1;
271
    } else
272
    if (ii->opt[2] & OPT_CONST && c == 0xffffffff) {
273
      change_insn_type (ii, II_SUB);
274
      ii->op[2] = ii->op[1]; ii->opt[2] = ii->opt[1];
275
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
276
      return 1;
277 931 markom
    } else break;
278
  case II_SRL:
279 897 markom
    if (ii->opt[2] & OPT_CONST && c == 0) {
280
      change_insn_type (ii, II_ADD);
281
      ii->op[1] = c; ii->opt[1] = OPT_CONST;
282
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
283
      return 1;
284
    } else if (ii->opt[2] & OPT_CONST && c >= 32) {
285
      change_insn_type (ii, II_ADD);
286
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
287
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
288
      return 1;
289 931 markom
    } else break;
290
  case II_SLL:
291 897 markom
    if (ii->opt[2] & OPT_CONST && c == 0) {
292
      change_insn_type (ii, II_ADD);
293
      ii->op[1] = c; ii->opt[1] = OPT_CONST;
294
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
295
      return 1;
296
    } else if (ii->opt[2] & OPT_CONST && c >= 32) {
297
      change_insn_type (ii, II_ADD);
298
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
299
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
300
      return 1;
301 931 markom
    } else break;
302
  case II_SRA:
303 897 markom
    if (ii->opt[2] & OPT_CONST && c == 0) {
304
      change_insn_type (ii, II_ADD);
305
      ii->op[1] = c; ii->opt[1] = OPT_CONST;
306
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
307
      return 1;
308 931 markom
    } else break;
309
  case II_SFEQ:
310
    if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
311
      change_insn_type (ii, II_ADD);
312
      ii->op[1] = ii->op[1] == ii->op[2]; ii->opt[1] = OPT_CONST;
313
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
314
      return 1;
315
    } else break;
316
  case II_SFNE:
317
    if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
318
      change_insn_type (ii, II_ADD);
319
      ii->op[1] = ii->op[1] != ii->op[2]; ii->opt[1] = OPT_CONST;
320
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
321
      return 1;
322
    } else break;
323
  case II_SFLE:
324
    if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
325
      change_insn_type (ii, II_ADD);
326
      ii->op[1] = ii->op[1] <= ii->op[2]; ii->opt[1] = OPT_CONST;
327
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
328
      return 1;
329 973 markom
    } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) {
330
      change_insn_type (ii, II_SFEQ);
331 931 markom
    } else break;
332
  case II_SFLT:
333
    if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
334
      change_insn_type (ii, II_ADD);
335
      ii->op[1] = ii->op[1] < ii->op[2]; ii->opt[1] = OPT_CONST;
336
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
337
      return 1;
338 973 markom
    } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) {
339
      change_insn_type (ii, II_ADD);
340
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
341
    } break;
342 931 markom
  case II_SFGE:
343
    if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
344
      change_insn_type (ii, II_ADD);
345
      ii->op[1] = ii->op[1] >= ii->op[2]; ii->opt[1] = OPT_CONST;
346
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
347
      return 1;
348 973 markom
    } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) {
349
      change_insn_type (ii, II_ADD);
350
      ii->op[1] = 1; ii->opt[1] = OPT_CONST;
351 931 markom
    } else break;
352
  case II_SFGT:
353
    if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
354
      change_insn_type (ii, II_ADD);
355
      ii->op[1] = ii->op[1] > ii->op[2]; ii->opt[1] = OPT_CONST;
356
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
357
      return 1;
358 973 markom
    } else if (ii->opt[2] && OPT_CONST && ii->op[2] == 0) {
359
      change_insn_type (ii, II_SFNE);
360 931 markom
    } else break;
361
  case II_CMOV:
362 897 markom
    if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) {
363
      change_insn_type (ii, II_ADD);
364
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
365
      ii->opt[3] = OPT_NONE;
366
      return 1;
367
    }
368 905 markom
    if (ii->opt[3] & OPT_CONST) {
369
      change_insn_type (ii, II_ADD);
370
      if (ii->op[3]) {
371
        ii->op[2] = 0; ii->opt[2] = OPT_CONST;
372
      } else {
373
        ii->op[1] = 0; ii->opt[1] = OPT_CONST;
374
      }
375
      ii->opt[3] = OPT_NONE;
376
      return 1;
377
    }
378 927 markom
    if (ii->type & IT_COND) {
379
      if (ii->opt[1] & OPT_CONST && ii->opt[2] & OPT_CONST) {
380
        if (ii->op[1] && !ii->op[2]) {
381
          change_insn_type (ii, II_ADD);
382
          ii->op[1] = ii->op[3]; ii->opt[1] = ii->opt[3];
383
          ii->op[2] = 0; ii->opt[2] = OPT_CONST;
384
          ii->opt[3] = OPT_NONE;
385
          return 1;
386
        }
387
        if (ii->op[1] && ii->op[2]) {
388
          change_insn_type (ii, II_ADD);
389
          ii->op[1] = 1; ii->opt[1] = OPT_CONST;
390
          ii->op[2] = 0; ii->opt[2] = OPT_CONST;
391
          ii->opt[3] = OPT_NONE;
392
          return 1;
393
        }
394
        if (!ii->op[1] && !ii->op[2]) {
395
          change_insn_type (ii, II_ADD);
396
          ii->op[1] = 0; ii->opt[1] = OPT_CONST;
397
          ii->op[2] = 0; ii->opt[2] = OPT_CONST;
398
          ii->opt[3] = OPT_NONE;
399 930 markom
          return 1;
400 927 markom
        }
401
      }
402 931 markom
      if (ii->op[1] == ii->op[3] && ii->opt[1] == ii->opt[3]) {
403
        ii->op[1] = 1; ii->opt[1] = OPT_CONST;
404
        return 1;
405
      }
406
      if (ii->op[2] == ii->op[3] && ii->opt[2] == ii->opt[3]) {
407
        ii->op[2] = 0; ii->opt[2] = OPT_CONST;
408
        return 1;
409
      }
410 927 markom
    }
411 931 markom
    break;
412 897 markom
  }
413
  return 0;
414
}
415
 
416 902 markom
/* First primary input */
417
static unsigned long tmp_op, tmp_opt;
418
 
419
/* Recursive function that searches for primary inputs;
420
   returns 0 if cmov can be replaced by add */
421
static int cmov_needed (cuc_func *f, int ref)
422
{
423
  cuc_insn *ii = &f->INSN(ref);
424
  int j;
425
 
426 903 markom
  cucdebug (4, " %x", ref);
427 902 markom
  /* mark visited, if already marked, we have a loop, ignore */
428
  if (ii->tmp) return 0;
429
  ii->tmp = 1;
430
 
431
  /* handle normal movs separately */
432 917 markom
  if (ii->index == II_ADD && !(ii->type & IT_VOLATILE)
433 902 markom
    && ii->opt[2] == OPT_CONST && ii->op[2] == 0) {
434
    if (ii->opt[1] == OPT_REF) {
435
      if (cmov_needed (f, ii->op[1])) {
436
        ii->tmp = 0;
437
        return 1;
438
      }
439
    } else {
440
      if (tmp_opt == OPT_NONE) {
441
        tmp_op = ii->op[1];
442
        tmp_opt = ii->opt[1];
443
      } else if (tmp_opt != ii->opt[1] || tmp_op != ii->op[1]) {
444
        ii->tmp = 0;
445
        return 1;
446
      }
447
    }
448
    ii->tmp = 0;
449
    return 0;
450
  }
451
 
452
  /* Is this instruction CMOV? no => add to primary inputs */
453
  if (ii->index != II_CMOV || ii->type & IT_VOLATILE)
454
    if (tmp_opt == OPT_NONE) {
455
      tmp_op = ref;
456
      tmp_opt = OPT_REF;
457
      ii->tmp = 0;
458
      return 0;
459
    } else if (tmp_opt != OPT_REF || tmp_op != ref) {
460
      ii->tmp = 0;
461
      return 1;
462 917 markom
    } else {
463
      ii->tmp = 0;
464
      return 0;
465 902 markom
    }
466
 
467
  for (j = 1; j < 3; j++) {
468 903 markom
    cucdebug (4, "(%x:%i)", ref, j);
469 902 markom
    if (ii->opt[j] == OPT_REF) {
470
      if (cmov_needed (f, ii->op[j])) {
471
        ii->tmp = 0;
472
        return 1;
473
      }
474
    } else {
475
      if (tmp_opt == OPT_NONE) {
476
        tmp_op = ii->op[j];
477
        tmp_opt = ii->opt[j];
478
      } else if (tmp_opt != ii->opt[j] || tmp_op != ii->op[j]) {
479
        ii->tmp = 0;
480
        return 1;
481
      }
482
    }
483
  }
484
 
485
  ii->tmp = 0;
486
  return 0;
487
}
488
 
489
/* Search and optimize complex cmov assignments */
490 931 markom
int optimize_cmovs (cuc_func *f)
491 902 markom
{
492 931 markom
  int modified = 0;
493 902 markom
  int b, i, j;
494
 
495
  /* Mark all instructions unvisited */
496
  for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD))
497
    for (i = 0; i < f->bb[b].ninsn; i++) f->bb[b].insn[i].tmp = 0;
498
 
499
  for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) {
500
    for (i = 0; i < f->bb[b].ninsn; i++) {
501
      cuc_insn *ii = &f->bb[b].insn[i];
502
      if (ii->index == II_CMOV && !(ii->type & IT_VOLATILE)) {
503
        tmp_opt = OPT_NONE;
504 903 markom
        cucdebug (4, "\n");
505 902 markom
        if (!cmov_needed (f, REF(b, i))) {
506
          assert (tmp_opt != OPT_NONE);
507
          change_insn_type (ii, II_ADD);
508
          ii->op[1] = tmp_op; ii->opt[1] = tmp_opt;
509
          ii->op[2] = 0; ii->opt[2] = OPT_CONST;
510
          ii->opt[3] = OPT_NONE;
511 931 markom
          modified = 1;
512 902 markom
        }
513
      }
514
    }
515
  }
516 931 markom
  return modified;
517 902 markom
}
518
 
519 930 markom
/* returns number of instructions, using instruction ref */
520
static int insn_uses (cuc_func *f, int ref)
521
{
522
  int b, i, j;
523
  int cnt = 0;
524
  for (b = 0; b < f->num_bb; b++)
525
    for (i = 0; i < f->bb[b].ninsn; i++)
526
      for (j = 0; j < MAX_OPERANDS; j++)
527
        if (f->bb[b].insn[i].opt[j] & OPT_REF && f->bb[b].insn[i].op[j] == ref) cnt++;
528
  return cnt;
529
}
530
 
531
/* handles some common CMOV, CMOV-CMOV cases;
532
   returns nonzero if anything optimized */
533
static int optimize_cmov_more (cuc_func *f, int ref)
534
{
535
  int t = 0;
536
  cuc_insn *ii = &f->INSN(ref);
537
  assert (ii->index == II_CMOV);
538
 
539
  /* In case of x = cmov x, y; or x = cmov y, x; we have
540
     asynchroneous loop -> remove it */
541
  if ((ii->opt[1] & OPT_REF) && ii->op[1] == ref) t = 1;
542
  if ((ii->opt[2] & OPT_REF) && ii->op[2] == ref) t = 2;
543
  if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) t = 2;
544
  if (t) {
545
    change_insn_type (ii, II_ADD);
546
    cucdebug (2, "%8x:cmov     %i\n", ref, t);
547
    ii->opt[t] = OPT_CONST;
548
    ii->op[t] = 0;
549
    ii->opt[3] = OPT_NONE;
550
    return 1;
551
  }
552
  if (!(ii->type & IT_COND)) {
553
    for (t = 1; t <= 2; t++) {
554
      /*   cmov L, X, Y, C1
555
           cmov Z, L, Y, C2
556
         can be replaced with simpler:
557
           cmov L, C1, C2, C2
558
           cmov Z, X, Y, L */
559
      if (ii->opt[t] == OPT_REF && f->INSN(ii->op[t]).index == II_CMOV) {
560
        int r = ii->op[t];
561
        unsigned long x, xt, y, yt;
562
        cuc_insn *prev = &f->INSN(r);
563
        cuc_check (f);
564
        cucdebug (3, "%x-%x\n", ref, r);
565
        assert (!(prev->type & IT_COND));
566
        if (prev->op[3 - t] != ii->op[3 - t] || prev->opt[3 - t] != ii->opt[3 - t]
567
         || insn_uses (f, r) > 1) continue;
568
        cucdebug (3, "%x-%x cmov more\n", ref, r);
569
        prev->type |= IT_COND;
570
        x = prev->op[t]; xt = prev->opt[t];
571
        y = prev->op[3 - t]; yt = prev->opt[3 - t];
572
        prev->op[t] = ii->op[3]; prev->opt[t] = ii->opt[3];     /* C2 */
573
        ii->op[3] = r; ii->opt[3] = OPT_REF;                    /* L */
574
        prev->op[3 - t] = prev->op[3]; prev->opt[3 - t] = prev->opt[3]; /* C1 */
575
        prev->op[3] = prev->op[t]; prev->opt[3] = prev->opt[t]; /* C2 */
576
        ii->op[t] = x; ii->opt[t] = xt;                         /* X */
577
        ii->op[3 - t] = y; ii->opt[3 - t] = yt;                 /* Y */
578
        prev->op[0] = -1; prev->opt[0] = OPT_REGISTER | OPT_DEST;
579
        cuc_check (f);
580
        return 1;
581
      }
582
    }
583
  }
584
 
585 938 markom
  if (ii->opt[3] & OPT_REF) {
586 930 markom
    cuc_insn *prev = &f->INSN(ii->op[3]);
587
    assert (prev->type & IT_COND);
588
    if (prev->index == II_CMOV) {
589 931 markom
      /* negated conditional:
590
           cmov x, 0, 1, y
591
           cmov z, a, b, x
592
         is replaced by
593
           cmov z, b, a, y */
594
      if (prev->opt[1] & OPT_CONST && prev->opt[2] & OPT_CONST
595
       && !prev->op[1] && prev->op[2]) {
596
        unsigned long t;
597
        t = ii->op[1]; ii->op[1] = ii->op[2]; ii->op[2] = t;
598
        t = ii->opt[1]; ii->opt[1] = ii->opt[2]; ii->opt[2] = t;
599
        ii->op[3] = prev->op[3]; ii->opt[3] = prev->opt[3];
600
      }
601 930 markom
    } else if (prev->index == II_ADD) {
602 931 markom
      /*   add x, y, 0
603
           cmov z, a, b, x
604
        is replaced by
605
           cmov z, a, b, y */
606 930 markom
      if (prev->opt[2] & OPT_CONST && prev->op[2] == 0) {
607
        ii->op[3] = prev->op[1]; ii->opt[3] = prev->opt[1];
608
        return 1;
609
      }
610
    }
611
  }
612 1046 markom
 
613
  /* Check if both choices can be pushed through */
614 1047 markom
  if (ii->opt[1] & OPT_REF && ii->opt[2] & OPT_REF
615
    /* Usually doesn't make sense to move conditionals though => more area */
616
    && !(ii->type & IT_COND)) {
617 1046 markom
    cuc_insn *a, *b;
618
    a = &f->INSN(ii->op[1]);
619
    b = &f->INSN(ii->op[2]);
620
    if (a->index == b->index && !(a->type & IT_VOLATILE) && !(b->type & IT_VOLATILE)) {
621
      int diff = -1;
622
      int i;
623
      for (i = 0; i < MAX_OPERANDS; i++)
624
        if (a->opt[i] != b->opt[i] || !(a->op[i] == b->op[i] || a->opt[i] & OPT_REGISTER)) {
625
          if (diff == -1) diff = i; else diff = -2;
626
        }
627
      /* If diff == -1, it will be eliminated by CSE */
628
      if (diff >= 0) {
629
        cuc_insn tmp, cmov;
630
        int ref2 = REF (REF_BB (ref), REF_I (ref) + 1);
631
        insert_insns (f, ref, 1);
632
        a = &f->INSN(f->INSN(ref2).op[1]);
633
        b = &f->INSN(f->INSN(ref2).op[2]);
634
        cucdebug (4, "ref = %x %x %x\n", ref, f->INSN(ref2).op[1], f->INSN(ref2).op[2]);
635
        if (cuc_debug >= 7) {
636
          print_cuc_bb (f, "AAA");
637
          cuc_check (f);
638
        }
639
        tmp = *a;
640
        cmov = f->INSN(ref2);
641
        tmp.op[diff] = ref; tmp.opt[diff] = OPT_REF;
642 1062 markom
        cmov.op[0] = -1; cmov.opt[0] = OPT_REGISTER | OPT_DEST;
643 1046 markom
        cmov.op[1] = a->op[diff]; cmov.opt[1] = a->opt[diff];
644
        cmov.op[2] = b->op[diff]; cmov.opt[2] = b->opt[diff];
645
        change_insn_type (&cmov, II_CMOV);
646
        cmov.type &= ~IT_COND;
647
        cucdebug (4, "ref2 = %x %x %x\n", ref2, cmov.op[1], cmov.op[2]);
648
        if (cmov.opt[1] & OPT_REF && cmov.opt[2] & OPT_REF
649
         && f->INSN(cmov.op[1]).type & IT_COND) {
650
          assert (f->INSN(cmov.op[2]).type & IT_COND);
651
          cmov.type |= IT_COND;
652
        }
653
        f->INSN(ref) = cmov;
654
        f->INSN(ref2) = tmp;
655
        if (cuc_debug >= 6) print_cuc_bb (f, "BBB");
656
        cuc_check (f);
657 1059 markom
        return 1;
658 1046 markom
      }
659
    }
660
  }
661 930 markom
  return 0;
662
}
663
 
664 897 markom
/* Optimizes dataflow tree */
665 931 markom
int optimize_tree (cuc_func *f)
666 897 markom
{
667
  int b, i, j;
668
  int modified;
669 931 markom
  int gmodified = 0;
670 897 markom
 
671
  do {
672
    modified = 0;
673 930 markom
    if (cuc_debug) cuc_check (f);
674 897 markom
    for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) {
675
      for (i = 0; i < f->bb[b].ninsn; i++) {
676
        cuc_insn *ii = &f->bb[b].insn[i];
677
        /* We tend to have the third parameter const if instruction is cumutative */
678 929 markom
        if ((ii->opt[1] & OPT_CONST) && !(ii->opt[2] & OPT_CONST)) {
679
          int cond = ii->index == II_SFEQ || ii->index == II_SFNE
680
                  || ii->index == II_SFLT || ii->index == II_SFLE
681
                  || ii->index == II_SFGT || ii->index == II_SFGE;
682
          if (known[ii->index].comutative || cond) {
683
            unsigned long t = ii->opt[1];
684
            ii->opt[1] = ii->opt[2];
685
            ii->opt[2] = t;
686
            t = ii->op[1];
687
            ii->op[1] = ii->op[2];
688
            ii->op[2] = t;
689
            modified = 1; cucdebug (2, "%08x:<>\n", REF(b, i));
690
            if (cond) {
691
              if (ii->index == II_SFEQ) ii->index = II_SFNE;
692
              else if (ii->index == II_SFNE) ii->index = II_SFEQ;
693
              else if (ii->index == II_SFLE) ii->index = II_SFGT;
694
              else if (ii->index == II_SFLT) ii->index = II_SFGE;
695
              else if (ii->index == II_SFGE) ii->index = II_SFLT;
696
              else if (ii->index == II_SFGT) ii->index = II_SFLE;
697
              else assert (0);
698
            }
699
          }
700 897 markom
        }
701
 
702
        /* Try to do the promotion */
703
        /* We have two consecutive expressions, containing constants,
704
         * if previous is a simple expression we can handle it simply: */
705
        for (j = 0; j < MAX_OPERANDS; j++)
706
          if (ii->opt[j] & OPT_REF) {
707
            cuc_insn *t = &f->INSN(ii->op[j]);
708
            if (f->INSN(ii->op[j]).index == II_ADD
709
             && f->INSN(ii->op[j]).opt[2] & OPT_CONST
710
             && f->INSN(ii->op[j]).op[2] == 0
711 931 markom
             && !(ii->type & IT_MEMORY && t->type & IT_MEMADD)) {
712 897 markom
            /* do not promote through add-mem, and branches */
713
              modified = 1; cucdebug (2, "%8x:promote%i %8x %8x\n", REF (b, i), j, ii->op[j], t->op[1]);
714
              ii->op[j] = t->op[1]; ii->opt[j] = t->opt[1];
715
            }
716
          }
717 930 markom
 
718
        /* handle some CMOV cases more deeply */
719
        if (ii->index == II_CMOV && optimize_cmov_more (f, REF (b, i))) {
720
          modified = 1;
721
          continue;
722 897 markom
        }
723
 
724
        /* Do nothing to volatile instructions */
725
        if (ii->type & IT_VOLATILE) continue;
726
 
727
        /* Check whether we can simplify the instruction */
728
        if (apply_edge_condition (ii)) {
729
          modified = 1;
730
          continue;
731
        }
732
        /* We cannot do anything more if at least one is not constant */
733
        if (!(ii->opt[2] & OPT_CONST)) continue;
734
 
735
        if (ii->opt[1] & OPT_CONST) { /* We have constant expression */
736
          unsigned long value;
737
          int ok = 1;
738
          /* Was constant expression already? */
739
          if (ii->index == II_ADD && !ii->op[2]) continue;
740
 
741
          if (ii->index == II_ADD) value = ii->op[1] + ii->op[2];
742
          else if (ii->index == II_SUB) value = ii->op[1] - ii->op[2];
743
          else if (ii->index == II_SLL) value = ii->op[1] << ii->op[2];
744
          else if (ii->index == II_SRL) value = ii->op[1] >> ii->op[2];
745
          else if (ii->index == II_MUL) value = ii->op[1] * ii->op[2];
746
          else if (ii->index == II_OR) value = ii->op[1] | ii->op[2];
747
          else if (ii->index == II_XOR) value = ii->op[1] ^ ii->op[2];
748
          else if (ii->index == II_AND) value = ii->op[1] & ii->op[2];
749
          else ok = 0;
750
          if (ok) {
751
            change_insn_type (ii, II_ADD);
752
            ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
753
            ii->op[1] = value; ii->opt[1] = OPT_CONST;
754
            ii->op[2] = 0; ii->opt[2] = OPT_CONST;
755
            modified = 1; cucdebug (2, "%8x:const\n", REF (b, i));
756
          }
757
        } else if (ii->opt[1] & OPT_REF) {
758
          cuc_insn *prev = &f->INSN(ii->op[1]);
759 930 markom
          /* Is this just a move? */
760 897 markom
          if (ii->index == II_ADD
761
           && !(ii->type & IT_MEMADD) && ii->op[2] == 0) {
762
            int b1, i1, j1;
763
            cucdebug (2, "%8x:link      %8x: ", REF(b, i), ii->op[1]);
764 1059 markom
            if (!(prev->type & (IT_OUTPUT | IT_VOLATILE))) {
765
              assert (ii->opt[0] & OPT_DEST);
766
              prev->op[0] = ii->op[0]; prev->opt[0] = ii->opt[0];
767
              prev->type |= ii->type & IT_OUTPUT;
768
              for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD))
769
                for (i1 = 0; i1 < f->bb[b1].ninsn; i1++)
770
                  for (j1 = 0; j1 < MAX_OPERANDS; j1++)
771
                    if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF)
772
                     && f->bb[b1].insn[i1].op[j1] == REF(b, i)) {
773
                      cucdebug (2, "%x ", REF (b1, i1));
774
                      f->bb[b1].insn[i1].op[j1] = ii->op[1];
775
                    }
776
              cucdebug (2, "\n");
777
              change_insn_type (ii, II_NOP);
778
            }
779 897 markom
          } else if (prev->opt[2] & OPT_CONST) {
780
            /* Handle some common cases */
781
            /* add - add joining */
782
            if (ii->index == II_ADD && prev->index == II_ADD) {
783
              ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
784
              ii->op[2] += prev->op[2];
785
              modified = 1; cucdebug (2, "%8x: add-add\n", REF(b, i));
786
            } else /* add - sub joining */
787
            if (ii->index == II_ADD && prev->index == II_SUB) {
788
              change_insn_type (&insn[i], II_SUB);
789
              ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
790
              ii->op[2] += prev->op[2];
791
              modified = 1; cucdebug (2, "%8x: add-sub\n", REF(b, i));
792
            } else /* sub - add joining */
793
            if (ii->index == II_SUB && prev->index == II_ADD) {
794
              ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
795
              ii->op[2] += prev->op[2];
796
              modified = 1; cucdebug (2, "%8x: sub-add\n", REF(b, i));
797 929 markom
            } else /* add - sfxx joining */
798
            if (prev->index == II_ADD && (
799
                ii->index == II_SFEQ || ii->index == II_SFNE
800
             || ii->index == II_SFLT || ii->index == II_SFLE
801
             || ii->index == II_SFGT || ii->index == II_SFGE)) {
802 1041 markom
              if (ii->opt[2] & OPT_CONST && ii->op[2] < 0x80000000) {
803 929 markom
                ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
804
                ii->op[2] -= prev->op[2];
805
                modified = 1; cucdebug (2, "%8x: add-sfxx\n", REF(b, i));
806
              }
807
            } else /* sub - sfxx joining */
808
            if (prev->index == II_SUB && (
809
                ii->index == II_SFEQ || ii->index == II_SFNE
810
             || ii->index == II_SFLT || ii->index == II_SFLE
811
             || ii->index == II_SFGT || ii->index == II_SFGE)) {
812 1046 markom
              if (ii->opt[2] & OPT_CONST && ii->op[2] < 0x80000000) {
813 929 markom
                ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
814
                ii->op[2] += prev->op[2];
815
                modified = 1; cucdebug (2, "%8x: sub-sfxx\n", REF(b, i));
816
              }
817 897 markom
            }
818
          }
819
        }
820
      }
821
    }
822 931 markom
    if (modified) gmodified = 1;
823 897 markom
  } while (modified);
824 931 markom
  return gmodified;
825 897 markom
}
826
 
827
/* Remove nop instructions */
828 931 markom
int remove_nops (cuc_func *f)
829 897 markom
{
830
  int b;
831 931 markom
  int modified = 0;
832 897 markom
  for (b = 0; b < f->num_bb; b++) {
833
    int c, d = 0, i, j;
834
    cuc_insn *insn = f->bb[b].insn;
835
    for (i = 0; i < f->bb[b].ninsn; i++)
836
      if (insn[i].index != II_NOP) {
837
        reloc [i] = d;
838
        insn[d++] = insn[i];
839
      } else {
840
        reloc[i] = d; /* For jumps only */
841
      }
842 931 markom
    if (f->bb[b].ninsn != d) modified = 1;
843 897 markom
    f->bb[b].ninsn = d;
844
 
845
    /* Relocate references from all basic blocks */
846
    for (c = 0; c < f->num_bb; c++)
847
      for (i = 0; i < f->bb[c].ninsn; i++) {
848
        dep_list *d = f->bb[c].insn[i].dep;
849
        for (j = 0; j < MAX_OPERANDS; j++)
850
          if ((f->bb[c].insn[i].opt[j] & OPT_REF)
851
            && REF_BB(f->bb[c].insn[i].op[j]) == b)
852
            f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]);
853
 
854
        while (d) {
855
          if (REF_BB(d->ref) == b) d->ref = REF (b, reloc[REF_I (d->ref)]);
856
          d = d->next;
857
        }
858
      }
859
  }
860 931 markom
  return modified;
861 897 markom
}
862
 
863 932 markom
static void unmark_tree (cuc_func *f, int ref)
864
{
865
  cuc_insn *ii = &f->INSN(ref);
866 973 markom
  cucdebug (5, "%x ", ref);
867 932 markom
  if (ii->type & IT_UNUSED) {
868
    int j;
869
    ii->type &= ~IT_UNUSED;
870
    for (j = 0; j < MAX_OPERANDS; j++)
871
      if (ii->opt[j] & OPT_REF) unmark_tree (f, ii->op[j]);
872
  }
873
}
874
 
875 897 markom
/* Remove unused assignments */
876 931 markom
int remove_dead (cuc_func *f)
877 897 markom
{
878
  int b, i, j;
879
  for (b = 0; b < f->num_bb; b++)
880
    for (i = 0; i < f->bb[b].ninsn; i++)
881 932 markom
      f->bb[b].insn[i].type |= IT_UNUSED;
882 897 markom
 
883 932 markom
  for (b = 0; b < f->num_bb; b++)
884
    for (i = 0; i < f->bb[b].ninsn; i++) {
885
      cuc_insn *ii = &f->bb[b].insn[i];
886
      if (ii->type & IT_VOLATILE || ii->type & IT_OUTPUT
887
       || II_IS_LOAD (ii->index) && (f->memory_order == MO_NONE || f->memory_order == MO_WEAK)
888
       || II_IS_STORE (ii->index)) {
889
        unmark_tree (f, REF (b, i));
890 973 markom
        cucdebug (5, "\n");
891 932 markom
      }
892
    }
893 897 markom
 
894
  for (b = 0; b < f->num_bb; b++)
895
    for (i = 0; i < f->bb[b].ninsn; i++)
896
      if (f->bb[b].insn[i].type & IT_UNUSED) {
897
        change_insn_type (&f->bb[b].insn[i], II_NOP);
898
      }
899
 
900 931 markom
  return remove_nops (f);
901 897 markom
}
902
 
903
/* Removes trivial register assignments */
904 931 markom
int remove_trivial_regs (cuc_func *f)
905 897 markom
{
906
  int b, i;
907 939 markom
  for (i = 0; i < MAX_REGS; i++) f->saved_regs[i] = caller_saved[i];
908 897 markom
 
909
  for (b = 0; b < f->num_bb; b++) {
910
    cuc_insn *insn = f->bb[b].insn;
911
    for (i = 0; i < f->bb[b].ninsn; i++) {
912
      if (insn[i].index == II_ADD
913
        && insn[i].opt[0] & OPT_REGISTER
914
        && insn[i].opt[1] & OPT_REGISTER && insn[i].op[0] == insn[i].op[1]
915
        && insn[i].opt[2] & OPT_CONST && insn[i].op[2] == 0) {
916
          if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1;
917
          change_insn_type (&insn[i], II_NOP);
918
        }
919
    }
920
  }
921
  if (cuc_debug >= 2) {
922 997 markom
    PRINTF ("saved regs ");
923
    for (i = 0; i < MAX_REGS; i++) PRINTF ("%i:%i ", i, f->saved_regs[i]);
924
    PRINTF ("\n");
925 897 markom
  }
926 931 markom
  return remove_nops (f);
927 897 markom
}
928
 
929
/* Determine inputs and outputs */
930
void set_io (cuc_func *f)
931
{
932
  int b, i, j;
933
  /* Determine register usage */
934
  for (i = 0; i < MAX_REGS; i++) {
935
    f->lur[i] = -1;
936
    f->used_regs[i] = 0;
937
  }
938 902 markom
  if (cuc_debug > 5) print_cuc_bb (f, "SET_IO");
939 897 markom
  for (b = 0; b < f->num_bb; b++) {
940
    for (i = 0; i < f->bb[b].ninsn; i++)
941
      for (j = 0; j < MAX_OPERANDS; j++)
942
        if (f->bb[b].insn[i].opt[j] & OPT_REGISTER && f->bb[b].insn[i].op[j] >= 0)
943
          if (f->bb[b].insn[i].opt[j] & OPT_DEST) f->lur[f->bb[b].insn[i].op[j]] = REF (b, i);
944
          else f->used_regs[f->bb[b].insn[i].op[j]] = 1;
945
  }
946
}
947
 
948
/* relocate all accesses inside of BB b to back/fwd */
949
static void relocate_bb (cuc_bb *bb, int b, int back, int fwd)
950
{
951
  int i, j;
952
  for (i = 0; i < bb->ninsn; i++)
953
    for (j = 0; j < MAX_OPERANDS; j++)
954
      if (bb->insn[i].opt[j] & OPT_REF
955
       && REF_BB (bb->insn[i].op[j]) == b) {
956
        int t = REF_I (bb->insn[i].op[j]);
957
        if (t < i) bb->insn[i].op[j] = REF (back, t);
958
        else bb->insn[i].op[j] = REF (fwd, t);
959
      }
960
}
961
 
962
/* Latch outputs in loops */
963
void add_latches (cuc_func *f)
964
{
965
  int b, i, j;
966
 
967
  //print_cuc_bb (f, "ADD_LATCHES a");
968
  /* Cuts the tree and marks registers */
969
  mark_cut (f);
970
 
971
  /* Split BBs with more than one group */
972
  for (b = 0; b < f->num_bb; b++) expand_bb (f, b);
973
  remove_nops (f);
974
  //print_cuc_bb (f, "ADD_LATCHES 0");
975
 
976
  /* Convert accesses in BB_INLOOP type block to latched */
977
  for (b = 0; b < f->num_bb; b++) {
978
    int j;
979
    for (i = 0; i < f->bb[b].ninsn; i++)
980
      for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] == OPT_REF) {
981
        int t = f->bb[b].insn[i].op[j];
982
        /* If we are pointing to a INLOOP block from outside, or forward
983
           (= previous loop iteration) we must register that data */
984
        if ((f->bb[REF_BB(t)].type & BB_INLOOP || config.cuc.no_multicycle)
985
         && !(f->INSN(t).type & (IT_BRANCH | IT_COND))
986
         && (REF_BB(t) != b || REF_I(t) >= i)) {
987
          f->INSN(t).type |= IT_LATCHED;
988
        }
989
      }
990
  }
991
  //print_cuc_bb (f, "ADD_LATCHES 1");
992
 
993
  /* Add latches at the end of blocks as needed */
994
  for (b = 0; b < f->num_bb; b++) {
995
    int nreg = 0;
996
    cuc_insn *insn;
997
    for (i = 0; i < f->bb[b].ninsn; i++)
998
      if (f->bb[b].insn[i].type & IT_LATCHED) nreg++;
999
    if (nreg) {
1000
      insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (f->bb[b].ninsn + nreg));
1001
      j = 0;
1002
      for (i = 0; i < f->bb[b].ninsn; i++) {
1003
        insn[i] = f->bb[b].insn[i];
1004
        if (insn[i].type & IT_LATCHED) {
1005
          cuc_insn *ii = &insn[f->bb[b].ninsn + j++];
1006
          change_insn_type (ii, II_REG);
1007
          ii->op[0] = -1; ii->opt[0] = OPT_DEST | OPT_REGISTER;
1008
          ii->op[1] = REF (b, i); ii->opt[1] = OPT_REF;
1009
          ii->opt[2] = ii->opt[3] = OPT_NONE;
1010
          ii->dep = NULL;
1011
          ii->type = IT_VOLATILE;
1012
          sprintf (ii->disasm, "reg %i_%i", b, i);
1013
        }
1014
      }
1015
      f->bb[b].ninsn += nreg;
1016
      free (f->bb[b].insn);
1017
      f->bb[b].insn = insn;
1018
    }
1019
  }
1020
  //print_cuc_bb (f, "ADD_LATCHES 2");
1021
 
1022
  /* Repair references */
1023
  for (b = 0; b < f->num_bb; b++)
1024
    for (i = 0; i < f->bb[b].ninsn; i++)
1025
      for (j = 0; j < MAX_OPERANDS; j++)
1026
        /* If destination instruction is latched, use register instead */
1027
        if (f->bb[b].insn[i].opt[j] == OPT_REF
1028
         && f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) {
1029
          int b1, i1;
1030
          b1 = REF_BB (f->bb[b].insn[i].op[j]);
1031
          //cucdebug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]);
1032
          if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) {
1033
            for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) {
1034
              assert (f->bb[b1].insn[i1].index == II_REG);
1035
              if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) {
1036
                f->bb[b].insn[i].op[j] = REF (b1, i1);
1037
                break;
1038
              }
1039
            }
1040
          }
1041
        }
1042
}
1043
 
1044 883 markom
/* CSE -- common subexpression elimination */
1045 931 markom
int cse (cuc_func *f)
1046 883 markom
{
1047 931 markom
  int modified = 0;
1048 883 markom
  int b, i, j, b1, i1, b2, i2, j2;
1049
  for (b1 = 0; b1 < f->num_bb; b1++)
1050 930 markom
    for (i1 = 0; i1 < f->bb[b1].ninsn; i1++) if (f->bb[b1].insn[i1].index != II_NOP
1051
       && f->bb[b1].insn[i1].index != II_LRBB && !(f->bb[b1].insn[i1].type & IT_MEMORY)
1052
       && !(f->bb[b1].insn[i1].type & IT_MEMADD))
1053 883 markom
      for (b2 = 0; b2 < f->num_bb; b2++)
1054 930 markom
        for (i2 = 0; i2 < f->bb[b2].ninsn; i2++)
1055
          if (f->bb[b2].insn[i2].index != II_NOP && f->bb[b2].insn[i2].index != II_LRBB
1056
           && !(f->bb[b2].insn[i2].type & IT_MEMORY) && !(f->bb[b2].insn[i2].type & IT_MEMADD)
1057
           && (b1 != b2 || i2 > i1)) {
1058
            cuc_insn *ii1 = &f->bb[b1].insn[i1];
1059
            cuc_insn *ii2 = &f->bb[b2].insn[i2];
1060
            int ok = 1;
1061 883 markom
 
1062 930 markom
            /* Do we have an exact match? */
1063
            if (ii1->index != ii2->index) continue;
1064
            if (ii2->type & IT_VOLATILE) continue;
1065
 
1066
            /* Check all operands also */
1067
            for (j = 0; j < MAX_OPERANDS; j++) {
1068
              if (ii1->opt[j] != ii2->opt[j]) {
1069
                ok = 0;
1070
                break;
1071
              }
1072
              if (ii1->opt[j] & OPT_DEST) continue;
1073
              if (ii1->opt[j] != OPT_NONE && ii1->op[j] != ii2->op[j]) {
1074
                ok = 0;
1075
                break;
1076
              }
1077
            }
1078
 
1079
            if (ok) {
1080
              /* remove duplicated instruction and relink the references */
1081
              cucdebug (3, "%x - %x are same\n", REF(b1, i1), REF(b2, i2));
1082
              change_insn_type (ii2, II_NOP);
1083 931 markom
              modified = 1;
1084 930 markom
              for (b = 0; b < f->num_bb; b++)
1085
                for (i = 0; i < f->bb[b].ninsn; i++)
1086
                  for (j = 0; j < MAX_OPERANDS; j++)
1087
                    if (f->bb[b].insn[i].opt[j] & OPT_REF
1088
                     && f->bb[b].insn[i].op[j] == REF (b2, i2))
1089
                      f->bb[b].insn[i].op[j] = REF (b1, i1);
1090
            }
1091
          }
1092 931 markom
  return modified;
1093 883 markom
}
1094
 
1095
static int count_cmovs (cuc_insn *ii, int match)
1096
{
1097
  int c = 0, j;
1098
  if (match & 2) {
1099
    for (j = 0; j < MAX_OPERANDS; j++)
1100
      if (ii->opt[j] & OPT_DEST) c++;
1101
  }
1102
  if (match & 1) {
1103
    for (j = 0; j < MAX_OPERANDS; j++)
1104
      if (!(ii->opt[j] & OPT_DEST) && ii->opt[j] & OPT_REF) c++;
1105
  } else {
1106
    for (j = 0; j < MAX_OPERANDS; j++)
1107
      if (!(ii->opt[j] & OPT_DEST) && ii->opt[j] != OPT_NONE) c++;
1108
  }
1109
  return c;
1110
}
1111
 
1112 897 markom
static void search_csm (int iter, cuc_func *f, cuc_shared_list *list);
1113
static cuc_shared_list *main_list;
1114 883 markom
static int *iteration;
1115
 
1116 897 markom
/* CSM -- common subexpression matching -- resource sharing
1117
   We try to match tree of instruction inside a BB with as many
1118
   matches as possible. All possibilities are collected and
1119
   options, making situation worse are removed */
1120 883 markom
void csm (cuc_func *f)
1121
{
1122
  int b, i, j;
1123
  int cnt;
1124 897 markom
  cuc_shared_list *list;
1125 883 markom
  cuc_timings timings;
1126
 
1127
  analyse_timings (f, &timings);
1128
  main_list = NULL;
1129
  for (b = 0; b < f->num_bb; b++) {
1130
    assert (iteration = (int *)malloc (sizeof (int) * f->bb[b].ninsn));
1131
    for (i = 0; i < f->bb[b].ninsn; i++) {
1132
      int cnt = 0, cntc = 0;
1133
      double size = 0., sizec = 0.;
1134
      int j2 = 0;
1135
      for (j = 0; j < f->bb[b].ninsn; j++)
1136
        if (f->bb[b].insn[i].index == f->bb[b].insn[j].index) {
1137
          int ok = 1;
1138
          for (j2 = 0; j2 < MAX_OPERANDS; j2++) if (!(f->bb[b].insn[j].opt[j2] & OPT_REF))
1139
            if (f->bb[b].insn[j].opt[j2] != f->bb[b].insn[i].opt[j2]
1140
             || f->bb[b].insn[j].op[j2] != f->bb[b].insn[i].opt[j2]) {
1141
              ok = 0;
1142
              break;
1143
            }
1144
          if (ok) {
1145
            cntc++;
1146
            sizec = sizec + insn_size (&f->bb[b].insn[j]);
1147
          } else {
1148
            cnt++;
1149
            size = size + insn_size (&f->bb[b].insn[j]);
1150
          }
1151
          iteration[j] = 0;
1152
        } else iteration[j] = -1;
1153
      if (cntc > 1) {
1154 897 markom
        assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list)));
1155 883 markom
        list->next = main_list;
1156
        list->from = NULL;
1157
        list->ref = REF (b, i);
1158
        list->cnt = cnt;
1159
        list->cmatch = 1;
1160
        list->cmovs = count_cmovs (&f->bb[b].insn[i], 3);
1161
        list->osize = sizec;
1162
        list->size = ii_size (f->bb[b].insn[i].index, 1);
1163
        main_list = list;
1164
        search_csm (0, f, list);
1165
      }
1166
      if (cnt > 1) {
1167 897 markom
        assert (list = (cuc_shared_list *)malloc (sizeof (cuc_shared_list)));
1168 883 markom
        list->next = main_list;
1169
        list->from = NULL;
1170
        list->ref = REF (b, i);
1171
        list->cnt = cnt + cntc;
1172
        list->cmatch = 0;
1173
        list->cmovs = count_cmovs (&f->bb[b].insn[i], 2);
1174
        list->osize = size + sizec;
1175
        list->size = ii_size (f->bb[b].insn[i].index, 0);
1176
        main_list = list;
1177
        search_csm (0, f, list);
1178
      }
1179
    }
1180
    free (iteration);
1181
  }
1182
 
1183
  for (list = main_list; list; list = list->next) list->dead = 0;
1184
  cnt = 0;
1185
  for (list = main_list; list; list = list->next) if (!list->dead) cnt++;
1186
  cucdebug (1, "noptions = %i\n", cnt);
1187
 
1188
  /* Now we will check the real size of the 'improvements'; if the size
1189
     actually increases, we abandom the option */
1190
  for (list = main_list; list; list = list->next)
1191
    if (list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size >= list->osize) list->dead = 1;
1192
 
1193
  cnt = 0;
1194
  for (list = main_list; list; list = list->next) if (!list->dead) cnt++;
1195
  cucdebug (1, "noptions = %i\n", cnt);
1196
 
1197
  /* Count number of instructions grouped */
1198
  for (list = main_list; list; list = list->next) {
1199 897 markom
    cuc_shared_list *l = list;
1200 883 markom
    int c = 0;
1201
    while (l) {
1202
      c++;
1203
      if (f->INSN(l->ref).type & (IT_VOLATILE | IT_MEMORY | IT_MEMADD)) list->dead = 1;
1204
      l = l->from;
1205
    }
1206
    list->ninsn = c;
1207
  }
1208
 
1209
  cnt = 0;
1210
  for (list = main_list; list; list = list->next)
1211
    if (!list->dead) cnt++;
1212
  cucdebug (1, "noptions = %i\n", cnt);
1213
 
1214
#if 1
1215
  /* We can get a lot of options here, so we will delete duplicates */
1216
  for (list = main_list; list; list = list->next) if (!list->dead) {
1217 897 markom
    cuc_shared_list *l;
1218 883 markom
    for (l = list->next; l; l = l->next) if (!l->dead) {
1219
      int ok = 1;
1220 897 markom
      cuc_shared_list *t1 = list;
1221
      cuc_shared_list *t2 = l;
1222 883 markom
      while (ok && t1 && t2) {
1223
        if (f->INSN(t1->ref).index == f->INSN(t2->ref).index) {
1224
          /* If other operands are matching, we must check for them also */
1225
          if (t1->cmatch) {
1226
            int j;
1227
            for (j = 0; j < MAX_OPERANDS; j++)
1228
              if (!(f->INSN(t1->ref).opt[j] & OPT_REF) || !(f->INSN(t2->ref).opt[j] & OPT_REF)
1229
               || f->INSN(t1->ref).opt[j] != f->INSN(t2->ref).opt[j]
1230
               || f->INSN(t1->ref).op[j] != f->INSN(t2->ref).op[j]) {
1231
                ok = 0;
1232
                break;
1233
              }
1234
          }
1235
 
1236
          /* This option is duplicate, remove */
1237
          if (ok) t1->dead = 1;
1238
        }
1239
        t1 = t1->from;
1240
        t2 = t2->from;
1241
      }
1242
    }
1243
  }
1244
  cnt = 0;
1245
  for (list = main_list; list; list = list->next) if (!list->dead) cnt++;
1246
  cucdebug (1, "noptions = %i\n", cnt);
1247
#endif
1248
  /* Print out */
1249
  for (list = main_list; list; list = list->next) if (!list->dead) {
1250 897 markom
    cuc_shared_list *l = list;
1251 883 markom
    cucdebug (1, "%-4s cnt %3i ninsn %3i size %8.1f osize %8.1f cmovs %3i @",
1252
           cuc_insn_name (&f->INSN(list->ref)), list->cnt, list->ninsn,
1253
           list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size, list->osize, list->cmovs);
1254
    while (l) {
1255
      cucdebug (1, "%c%x,", l->cmatch ? '.' : '!', l->ref);
1256
      l = l->from;
1257
    }
1258
    cucdebug (1, "\n");
1259
  }
1260
 
1261
  /* Calculate estimated timings */
1262
  for (b = 0; b < f->num_bb; b++) {
1263
    cnt = 0;
1264
    for (list = main_list; list; list = list->next)
1265
      if (!list->dead && REF_BB(list->ref) == b) cnt++;
1266
 
1267
    f->bb[b].ntim = cnt;
1268
    if (!cnt) {
1269
      f->bb[b].tim = NULL;
1270
      continue;
1271
    }
1272
    assert (f->bb[b].tim = (cuc_timings *)malloc (sizeof (cuc_timings) * cnt));
1273
 
1274
    cnt = 0;
1275
    for (list = main_list; list; list = list->next) if (!list->dead && REF_BB(list->ref) == b) {
1276 897 markom
      cuc_shared_list *l = list;
1277 883 markom
      f->bb[b].tim[cnt].b = b;
1278
      f->bb[b].tim[cnt].preroll = f->bb[b].tim[cnt].unroll = 1;
1279
      f->bb[b].tim[cnt].nshared = list->ninsn;
1280 897 markom
      assert (f->bb[b].tim[cnt].shared = (cuc_shared_item *)
1281
            malloc (sizeof(cuc_shared_item) * list->ninsn));
1282
      for (i =  0; i < list->ninsn; i++, l = l->from) {
1283
        f->bb[b].tim[cnt].shared[i].ref = l->ref;
1284
        f->bb[b].tim[cnt].shared[i].cmatch = l->cmatch;
1285
      }
1286 883 markom
      f->bb[b].tim[cnt].new_time = timings.new_time + f->bb[b].cnt * (list->cnt - 1);
1287 897 markom
      f->bb[b].tim[cnt].size = timings.size +
1288
             list->cmovs * ii_size (II_CMOV, 0) * (list->cnt - 1) + list->size - list->osize;
1289 883 markom
      cnt++;
1290
    }
1291
  }
1292
}
1293
 
1294
/* Recursive function for searching through instruction graph */
1295 897 markom
static void search_csm (int iter, cuc_func *f, cuc_shared_list *list)
1296 883 markom
{
1297
  int b, i, j, i1;
1298 897 markom
  cuc_shared_list *l;
1299 883 markom
  b = REF_BB(list->ref);
1300
  i = REF_I(list->ref);
1301
 
1302
  for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] & OPT_REF) {
1303
    int t = f->bb[b].insn[i].op[j];
1304
    int cnt = 0, cntc = 0;
1305
    double size = 0., sizec = 0.;
1306
 
1307
    /* Mark neighbours */
1308
    for (i1 = 0; i1 < f->bb[b].ninsn; i1++) {
1309
      if (iteration[i1] == iter && f->bb[b].insn[i1].opt[j] & OPT_REF) {
1310
        int t2 = f->bb[b].insn[i1].op[j];
1311
        if (f->INSN(t).index == f->INSN(t2).index && f->INSN(t2).opt[j] & OPT_REF) {
1312
          int j2;
1313
          int ok = 1;
1314
          iteration[REF_I(t2)] = iter + 1;
1315
          for (j2 = 0; j2 < MAX_OPERANDS; j2++) if (!(f->bb[b].insn[i1].opt[j2] & OPT_REF))
1316
            if (f->bb[b].insn[i1].opt[j2] != f->bb[b].insn[i].opt[j2]
1317
             || f->bb[b].insn[i1].op[j2] != f->bb[b].insn[i].opt[j2]) {
1318
              ok = 0;
1319
              break;
1320
            }
1321
          if (ok) {
1322
            cntc++;
1323
            sizec = sizec + insn_size (&f->bb[b].insn[i1]);
1324
          } else {
1325
            cnt++;
1326
            size = size + insn_size (&f->bb[b].insn[i1]);
1327
          }
1328
        }
1329
      }
1330
    }
1331
 
1332
    if (cntc > 1) {
1333 897 markom
      assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list)));
1334 883 markom
      l->next = main_list;
1335
      main_list = l;
1336
      l->from = list;
1337
      l->ref = t;
1338
      l->cnt = cnt;
1339
      l->cmatch = 1;
1340
      l->cmovs = list->cmovs + count_cmovs (&f->bb[b].insn[i], 1) - 1;
1341
      l->size = list->size + ii_size (f->bb[b].insn[i].index, 1);
1342
      l->osize = sizec;
1343
      search_csm (iter + 1, f, l);
1344
    }
1345
    if (cnt > 1) {
1346 897 markom
      assert (l = (cuc_shared_list *)malloc (sizeof (cuc_shared_list)));
1347 883 markom
      l->next = main_list;
1348
      main_list = l;
1349
      l->from = list;
1350
      l->ref = t;
1351
      l->cnt = cnt + cntc;
1352
      l->cmatch = 0;
1353
      l->osize = size + sizec;
1354
      l->cmovs = list->cmovs + count_cmovs (&f->bb[b].insn[i], 0) - 1;
1355
      l->size = list->size + ii_size (f->bb[b].insn[i].index, 0);
1356
      search_csm (iter + 1, f, l);
1357
    }
1358
 
1359
    /* Unmark them back */
1360
    for (i1 = 0; i1 < f->bb[b].ninsn; i1++) if (iteration[i1] > iter) iteration[i1] = -1;
1361
  }
1362
}
1363
 
1364 897 markom
/* Displays shared instructions */
1365
void print_shared (cuc_func *rf, cuc_shared_item *shared, int nshared)
1366
{
1367
  int i, first = 1;
1368
  for (i = 0; i < nshared; i++) {
1369 997 markom
    PRINTF ("%s%s%s", first ? "" : "-", cuc_insn_name (&rf->INSN(shared[i].ref)),
1370 897 markom
                    shared[i].cmatch ? "!" : "");
1371
    first = 0;
1372
  }
1373
}
1374
 
1375
/* Common subexpression matching -- resource sharing, generation pass
1376
 
1377
   Situation here is much simpler than with analysis -- we know the instruction sequence
1378
   we are going to share, but we are going to do this on whole function, not just one BB.
1379
   We can find sequence in reference function, as pointed from "shared" */
1380
void csm_gen (cuc_func *f, cuc_func *rf, cuc_shared_item *shared, int nshared)
1381
{
1382
  int b, i, j, cnt = 0;
1383
#warning   some code here (2)
1384 997 markom
  PRINTF ("Replacing: ");
1385 897 markom
  print_shared (rf, shared, nshared);
1386
 
1387
  for (b = 0; b < f->num_bb; b++)
1388
    for (i = 0; i < f->bb[b].ninsn; i++) {
1389
    }
1390
 
1391 997 markom
  PRINTF ("\nFound %i matches.\n", cnt);
1392 897 markom
}
1393
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.