OpenCores
URL https://opencores.org/ocsvn/or1k/or1k/trunk

Subversion Repositories or1k

[/] [or1k/] [tags/] [nog_patch_42/] [or1ksim/] [cuc/] [cuc.c] - Blame information for rev 883

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 879 markom
/* cuc.c -- OpenRISC Custom Unit Compiler
2
 *    Copyright (C) 2002 Marko Mlinar, markom@opencores.org
3
 *
4
 *    This file is part of OpenRISC 1000 Architectural Simulator.
5
 *
6
 *    This program is free software; you can redistribute it and/or modify
7
 *    it under the terms of the GNU General Public License as published by
8
 *    the Free Software Foundation; either version 2 of the License, or
9
 *    (at your option) any later version.
10
 *
11
 *    This program is distributed in the hope that it will be useful,
12
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 *    GNU General Public License for more details.
15
 *
16
 *    You should have received a copy of the GNU General Public License
17
 *    along with this program; if not, write to the Free Software
18
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
19
 
20
/* Main file, including code optimization and command prompt */
21
 
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <stdarg.h>
25
#include <assert.h>
26
#include "sim-config.h"
27
#include "cuc.h"
28
#include "insn.h"
29
#include "profiler.h"
30 883 markom
#include "opcode/or32.h"
31 879 markom
 
32
FILE *flog;
33 883 markom
int cuc_debug = 0;
34 879 markom
 
35
/* Last used registers by software convention */
36
const int call_saved[MAX_REGS] = {
37
  0, 0, 0, 1, 1, 1, 1, 1,
38
  1, 1, 0, 1, 0, 1, 0, 1,
39
  0, 1, 0, 1, 0, 1, 0, 1,
40
  0, 1, 0, 1, 0, 1, 0, 1,
41
  1, 1};
42
 
43
/* Prints out instructions */
44
void print_insns (cuc_insn *insn, int ninsn, int verbose)
45
{
46
  int i, j;
47
  for (i = 0; i < ninsn; i++) {
48
    dep_list *l = insn[i].dep;
49
    printf ("%4x%c %-4s ", i, insn[i].index >= 0 ? ':' : '?', cuc_insn_name (&insn[i]));
50
    if (verbose) {
51
      printf ("%-20s insn = %08x, index = %i, type = %04x ",
52
                      insn[i].disasm, insn[i].insn, insn[i].index, insn[i].type);
53
    } else printf ("type = %04x ", insn[i].type);
54
    for (j = 0; j < MAX_OPERANDS; j++) {
55
      if (insn[i].opt[j] & OPT_DEST) printf ("*");
56
      switch (insn[i].opt[j] & ~OPT_DEST) {
57
        case OPT_NONE: break;
58
        case OPT_CONST: printf ("0x%08x, ", insn[i].op[j]); break;
59
        case OPT_JUMP: printf ("J%x ", insn[i].op[j]); break;
60
        case OPT_REGISTER: printf ("r%i, ", insn[i].op[j]); break;
61
        case OPT_REF: printf ("[%x.%x], ", REF_BB(insn[i].op[j]), REF_I(insn[i].op[j])); break;
62
        case OPT_BB: printf ("BB%x, ", insn[i].op[j]); break;
63
        case OPT_LRBB: printf ("LRBB, "); break;
64
        default:
65
          fprintf (stderr, "Invalid operand type %s(%x.%x) = %x\n",
66
                         cuc_insn_name (&insn[i]), i, j, insn[i].opt[j]);
67
          assert (0);
68
      }
69
    }
70
    if (l) {
71
      printf ("\n\tdep:");
72
      while (l) {
73
        printf (" [%x.%x],", REF_BB (l->ref), REF_I (l->ref));
74
        l = l->next;
75
      }
76
    }
77
    printf ("\n");
78
  }
79
}
80
 
81
void add_dep (dep_list **list, int dep)
82
{
83
  dep_list *ndep;
84
  dep_list **tmp = list;
85
 
86
  while (*tmp) {
87
    if ((*tmp)->ref == dep) return; /* already there */
88
    tmp = &((*tmp)->next);
89
  }
90
  ndep = (dep_list *)malloc (sizeof (dep_list));
91
  ndep->ref = dep;
92
  ndep->next = NULL;
93
  *tmp = ndep;
94
}
95
 
96
void dispose_list (dep_list **list)
97
{
98
  while (*list) {
99
    dep_list *tmp = *list;
100
    *list = tmp->next;
101
    free (tmp);
102
  }
103
}
104
 
105
void add_data_dep (cuc_func *f)
106
{
107
  int b, i, j;
108
  dep_list *tmp;
109
  for (b = 0; b < f->num_bb; b++) {
110
    cuc_insn *insn = f->bb[b].insn;
111
    for (i = 0; i < f->bb[b].ninsn; i++)
112
      for (j = 0; j < MAX_OPERANDS; j++) {
113
        fflush (stdout);
114
        if (insn[i].opt[j] & OPT_REF) {
115
          /* Copy list from predecessor */
116
          dep_list *l = f->INSN(insn[i].op[j]).dep;
117
          while (l) {
118
            add_dep (&insn[i].dep, l->ref);
119
            l = l->next;
120
          }
121
          /* add predecessor */
122
          add_dep (&insn[i].dep, insn[i].op[j]);
123
        }
124
      }
125
  }
126
}
127
 
128
/* returns nonzero, if instruction was simplified */
129
int apply_edge_condition (cuc_insn *ii)
130
{
131
  unsigned int c = ii->op[2];
132
 
133
  if (ii->index == II_AND) {
134
    if (ii->opt[2] & OPT_CONST && c == 0) {
135
      change_insn_type (ii, II_ADD);
136
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
137
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
138
      return 1;
139
    }
140
  } else if (ii->index == II_OR) {
141
    if (ii->opt[2] & OPT_CONST && c == 0xffffffff) {
142
      change_insn_type (ii, II_ADD);
143
      ii->op[1] = c; ii->opt[1] = OPT_CONST;
144
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
145
      return 1;
146
    }
147
  } else if (ii->index == II_SUB) {
148
    if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) {
149
      change_insn_type (ii, II_ADD);
150
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
151
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
152
      return 1;
153
    }
154
  } else if (ii->index == II_MUL) {
155
    if (ii->opt[2] & OPT_CONST && c == 0) {
156
      change_insn_type (ii, II_ADD);
157
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
158
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
159
      return 1;
160
    } else
161
    if (ii->opt[2] & OPT_CONST && c == 1) {
162
      change_insn_type (ii, II_ADD);
163
      ii->op[1] = c; ii->opt[1] = OPT_CONST;
164
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
165
      return 1;
166
    } else
167
    if (ii->opt[2] & OPT_CONST && c == 0xffffffff) {
168
      change_insn_type (ii, II_SUB);
169
      ii->op[2] = ii->op[1]; ii->opt[2] = ii->opt[1];
170
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
171
      return 1;
172
    }
173
  } else if (ii->index == II_SRL) {
174
    if (ii->opt[2] & OPT_CONST && c == 0) {
175
      change_insn_type (ii, II_ADD);
176
      ii->op[1] = c; ii->opt[1] = OPT_CONST;
177
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
178
      return 1;
179
    } else if (ii->opt[2] & OPT_CONST && c >= 32) {
180
      change_insn_type (ii, II_ADD);
181
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
182
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
183
      return 1;
184
    }
185
  } else if (ii->index == II_SLL) {
186
    if (ii->opt[2] & OPT_CONST && c == 0) {
187
      change_insn_type (ii, II_ADD);
188
      ii->op[1] = c; ii->opt[1] = OPT_CONST;
189
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
190
      return 1;
191
    } else if (ii->opt[2] & OPT_CONST && c >= 32) {
192
      change_insn_type (ii, II_ADD);
193
      ii->op[1] = 0; ii->opt[1] = OPT_CONST;
194
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
195
      return 1;
196
    }
197
  } else if (ii->index == II_SRA) {
198
    if (ii->opt[2] & OPT_CONST && c == 0) {
199
      change_insn_type (ii, II_ADD);
200
      ii->op[1] = c; ii->opt[1] = OPT_CONST;
201
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
202
      return 1;
203
    }
204
  } else if (ii->index == II_CMOV) {
205
    if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) {
206
      change_insn_type (ii, II_ADD);
207
      ii->op[2] = 0; ii->opt[2] = OPT_CONST;
208
      ii->opt[3] = OPT_NONE;
209
      return 1;
210
    }
211
  }
212
  return 0;
213
}
214
 
215
/* Optimizes dataflow tree */
216
void optimize_tree (cuc_func *f)
217
{
218
  int b, i, j;
219
  int modified;
220
 
221
  do {
222
    modified = 0;
223
    for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) {
224
      for (i = 0; i < f->bb[b].ninsn; i++) {
225
        cuc_insn *ii = &f->bb[b].insn[i];
226
        /* We tend to have the third parameter const if instruction is cumutative */
227
        if ((ii->opt[1] & OPT_CONST) && !(ii->opt[2] & OPT_CONST)
228
            && known[ii->index].comutative) {
229
          unsigned long t = ii->opt[1];
230
          ii->opt[1] = ii->opt[2];
231
          ii->opt[2] = t;
232
          t = ii->op[1];
233
          ii->op[1] = ii->op[2];
234
          ii->op[2] = t;
235 883 markom
          modified = 1; cucdebug (2, "%08x:<>\n", REF(b, i));
236 879 markom
        }
237
 
238
        /* Try to do the promotion */
239
        /* We have two consecutive expressions, containing constants,
240
         * if previous is a simple expression we can handle it simply: */
241
        for (j = 0; j < MAX_OPERANDS; j++)
242
          if (ii->opt[j] & OPT_REF) {
243
            cuc_insn *t = &f->INSN(ii->op[j]);
244
            if (f->INSN(ii->op[j]).index == II_ADD
245
             && f->INSN(ii->op[j]).opt[2] & OPT_CONST
246
             && f->INSN(ii->op[j]).op[2] == 0
247
             && !(ii->type & IT_MEMORY && t->type & IT_MEMADD)
248
             && !(ii->type & IT_BRANCH) && !(t->type & IT_COND)) {
249
            /* do not promote through add-mem, and branches */
250 883 markom
              modified = 1; cucdebug (2, "%8x:promote%i %8x %8x\n", REF (b, i), j, ii->op[j], t->op[1]);
251 879 markom
              ii->op[j] = t->op[1]; ii->opt[j] = t->opt[1];
252
            }
253
          }
254
 
255
        /* In case of x = cmov x, y; or x = cmov y, x; we have
256
           asynchroneous loop -> remove it */
257
        if (ii->index == II_CMOV) {
258
          int f = 0;
259
          if ((ii->opt[1] & OPT_REF) && ii->op[1] == REF (b, i)) f = 1;
260
          if ((ii->opt[2] & OPT_REF) && ii->op[2] == REF (b, i)) f = 2;
261
          if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) f = 2;
262
          if (f) {
263
            change_insn_type (ii, II_ADD);
264 883 markom
            cucdebug (2, "%8x:cmov     %i\n", REF(b, i), f);
265 879 markom
            ii->opt[f] = OPT_CONST;
266
            ii->op[f] = 0;
267
            ii->opt[3] = OPT_NONE;
268
            modified = 1;
269
            continue;
270
          }
271
        }
272
 
273
        /* Do nothing to volatile instructions */
274
        if (ii->type & IT_VOLATILE) continue;
275
 
276
        /* Check whether we can simplify the instruction */
277
        if (apply_edge_condition (ii)) {
278
          modified = 1;
279
          continue;
280
        }
281
        /* We cannot do anything more if at least one is not constant */
282
        if (!(ii->opt[2] & OPT_CONST)) continue;
283
 
284
        if (ii->opt[1] & OPT_CONST) { /* We have constant expression */
285
          unsigned long value;
286
          int ok = 1;
287
          /* Was constant expression already? */
288
          if (ii->index == II_ADD && !ii->op[2]) continue;
289
 
290
          if (ii->index == II_ADD) value = ii->op[1] + ii->op[2];
291
          else if (ii->index == II_SUB) value = ii->op[1] - ii->op[2];
292
          else if (ii->index == II_SLL) value = ii->op[1] << ii->op[2];
293
          else if (ii->index == II_SRL) value = ii->op[1] >> ii->op[2];
294
          else if (ii->index == II_MUL) value = ii->op[1] * ii->op[2];
295
          else if (ii->index == II_OR) value = ii->op[1] | ii->op[2];
296
          else if (ii->index == II_XOR) value = ii->op[1] ^ ii->op[2];
297
          else if (ii->index == II_AND) value = ii->op[1] & ii->op[2];
298
          else ok = 0;
299
          if (ok) {
300
            change_insn_type (ii, II_ADD);
301
            ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
302
            ii->op[1] = value; ii->opt[1] = OPT_CONST;
303
            ii->op[2] = 0; ii->opt[2] = OPT_CONST;
304 883 markom
            modified = 1; cucdebug (2, "%8x:const\n", REF (b, i));
305 879 markom
          }
306
        } else if (ii->opt[1] & OPT_REF) {
307
          cuc_insn *prev = &f->INSN(ii->op[1]);
308
          /* Is this just a link? */
309
          if (ii->index == II_ADD
310
           && !(ii->type & IT_MEMADD) && ii->op[2] == 0) {
311
            int b1, i1, j1;
312 883 markom
            cucdebug (2, "%8x:link      %8x: ", REF(b, i), ii->op[1]);
313 879 markom
            for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD))
314
              for (i1 = 0; i1 < f->bb[b1].ninsn; i1++)
315
                for (j1 = 0; j1 < MAX_OPERANDS; j1++)
316
                  if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF)
317
                   && f->bb[b1].insn[i1].op[j1] == REF(b, i)) {
318 883 markom
                    cucdebug (2, "%x ", REF (b1, i1));
319 879 markom
                    f->bb[b1].insn[i1].op[j1] = ii->op[1];
320
                  }
321 883 markom
            cucdebug (2, "\n");
322 879 markom
            change_insn_type (ii, II_NOP);
323
          } else if (prev->opt[2] & OPT_CONST) {
324
            /* Handle some common cases */
325
            /* add - add joining */
326
            if (ii->index == II_ADD && prev->index == II_ADD) {
327
              ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
328
              ii->op[2] += prev->op[2];
329 883 markom
              modified = 1; cucdebug (2, "%8x: add-add\n", REF(b, i));
330 879 markom
            } else /* add - sub joining */
331
            if (ii->index == II_ADD && prev->index == II_SUB) {
332
              change_insn_type (&insn[i], II_SUB);
333
              ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
334
              ii->op[2] += prev->op[2];
335 883 markom
              modified = 1; cucdebug (2, "%8x: add-sub\n", REF(b, i));
336 879 markom
            } else /* sub - add joining */
337
            if (ii->index == II_SUB && prev->index == II_ADD) {
338
              ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
339
              ii->op[2] += prev->op[2];
340 883 markom
              modified = 1; cucdebug (2, "%8x: sub-add\n", REF(b, i));
341 879 markom
            }
342
          }
343
        }
344
      }
345
    }
346
  } while (modified);
347
}
348
 
349
/* Remove nop instructions */
350
void remove_nops (cuc_func *f)
351
{
352
  int b;
353
  for (b = 0; b < f->num_bb; b++) {
354
    int c, d = 0, i, j;
355
    cuc_insn *insn = f->bb[b].insn;
356
    for (i = 0; i < f->bb[b].ninsn; i++)
357
      if (insn[i].index != II_NOP) {
358
        reloc [i] = d;
359
        insn[d++] = insn[i];
360
      } else {
361
        reloc[i] = d; /* For jumps only */
362
      }
363
    f->bb[b].ninsn = d;
364
 
365
    /* Relocate references from all basic blocks */
366
    for (c = 0; c < f->num_bb; c++)
367 883 markom
      for (i = 0; i < f->bb[c].ninsn; i++) {
368
        dep_list *d = f->bb[c].insn[i].dep;
369 879 markom
        for (j = 0; j < MAX_OPERANDS; j++)
370
          if ((f->bb[c].insn[i].opt[j] & OPT_REF)
371
            && REF_BB(f->bb[c].insn[i].op[j]) == b)
372
            f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]);
373 883 markom
 
374
        while (d) {
375
          if (REF_BB(d->ref) == b) d->ref = REF (b, reloc[REF_I (d->ref)]);
376
          d = d->next;
377
        }
378
      }
379 879 markom
  }
380
}
381
 
382
/* Remove unused assignments */
383
void remove_dead (cuc_func *f)
384
{
385
  int b, i, j;
386
  for (b = 0; b < f->num_bb; b++)
387
    for (i = 0; i < f->bb[b].ninsn; i++)
388
      if (!(f->bb[b].insn[i].type & (IT_VOLATILE | IT_OUTPUT)))
389
        f->bb[b].insn[i].type |= IT_UNUSED;
390
 
391
  for (b = 0; b < f->num_bb; b++) {
392
    for (i = 0; i < f->bb[b].ninsn; i++)
393
      for (j = 0; j < MAX_OPERANDS; j++)
394
        if (f->bb[b].insn[i].opt[j] & OPT_REF) {
395
          f->INSN(f->bb[b].insn[i].op[j]).type &= ~IT_UNUSED;
396
        }
397
  }
398
 
399
  for (b = 0; b < f->num_bb; b++)
400
    for (i = 0; i < f->bb[b].ninsn; i++)
401
      if (f->bb[b].insn[i].type & IT_UNUSED) {
402
        change_insn_type (&f->bb[b].insn[i], II_NOP);
403
      }
404
 
405
  remove_nops (f);
406
}
407
 
408
/* Removes trivial register assignments */
409
void remove_trivial_regs (cuc_func *f)
410
{
411
  int b, i;
412
  for (i = 0; i < MAX_REGS; i++) f->saved_regs[i] = call_saved[i];
413
 
414
  for (b = 0; b < f->num_bb; b++) {
415
    cuc_insn *insn = f->bb[b].insn;
416
    for (i = 0; i < f->bb[b].ninsn; i++) {
417
      if (insn[i].index == II_ADD
418
        && insn[i].opt[0] & OPT_REGISTER
419
        && insn[i].opt[1] & OPT_REGISTER && insn[i].op[0] == insn[i].op[1]
420
        && insn[i].opt[2] & OPT_CONST && insn[i].op[2] == 0) {
421
          if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1;
422
          change_insn_type (&insn[i], II_NOP);
423
        }
424
    }
425
  }
426 883 markom
  if (cuc_debug >= 2) {
427 879 markom
    printf ("saved regs ");
428
    for (i = 0; i < MAX_REGS; i++) printf ("%i:%i ", i, f->saved_regs[i]);
429
    printf ("\n");
430
  }
431
  remove_nops (f);
432
}
433
 
434 883 markom
/* Determine inputs and outputs */
435
void set_io (cuc_func *f)
436
{
437
  int b, i, j;
438
  /* Determine register usage */
439
  for (i = 0; i < MAX_REGS; i++) {
440
    f->lur[i] = -1;
441
    f->used_regs[i] = 0;
442
  }
443
  for (b = 0; b < f->num_bb; b++) {
444
    for (i = 0; i < f->bb[b].ninsn; i++)
445
      for (j = 0; j < MAX_OPERANDS; j++)
446
        if (f->bb[b].insn[i].opt[j] & OPT_REGISTER && f->bb[b].insn[i].op[j] >= 0)
447
          if (f->bb[b].insn[i].opt[j] & OPT_DEST) f->lur[f->bb[b].insn[i].op[j]] = REF (b, i);
448
          else f->used_regs[f->bb[b].insn[i].op[j]] = 1;
449
  }
450
}
451
 
452 879 markom
/* relocate all accesses inside of BB b to back/fwd */
453
static void relocate_bb (cuc_bb *bb, int b, int back, int fwd)
454
{
455
  int i, j;
456
  for (i = 0; i < bb->ninsn; i++)
457
    for (j = 0; j < MAX_OPERANDS; j++)
458
      if (bb->insn[i].opt[j] & OPT_REF
459
       && REF_BB (bb->insn[i].op[j]) == b) {
460
        int t = REF_I (bb->insn[i].op[j]);
461
        if (t < i) bb->insn[i].op[j] = REF (back, t);
462
        else bb->insn[i].op[j] = REF (fwd, t);
463
      }
464
}
465
 
466
/* split the BB, based on the group numbers in .tmp */
467
void expand_bb (cuc_func *f, int b)
468
{
469
  int n = f->num_bb;
470
  int mg = 0;
471
  int b1, i, j;
472
 
473
  for (i = 0; i < f->bb[b].ninsn; i++)
474
    if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp;
475
 
476
  /* Create copies */
477
  for (b1 = 1; b1 <= mg; b1++) {
478
    assert (f->num_bb < MAX_BB);
479
    cpy_bb (&f->bb[f->num_bb], &f->bb[b]);
480
    f->num_bb++;
481
  }
482
 
483
  /* Relocate */
484
  for (b1 = 0; b1 < f->num_bb; b1++)
485
    for (i = 0; i < f->bb[b1].ninsn; i++) {
486 883 markom
      dep_list *d = f->bb[b1].insn[i].dep;
487 879 markom
      for (j = 0; j < MAX_OPERANDS; j++)
488
        if (f->bb[b1].insn[i].opt[j] & OPT_REF) {
489
          int t = f->bb[b1].insn[i].op[j];
490
          if (REF_BB(t) == b && f->INSN(t).tmp != 0)
491
            f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t));
492
        }
493 883 markom
      while (d) {
494
        if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0)
495
          d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref));
496
        d = d->next;
497
      }
498 879 markom
    }
499
 
500
  /* Delete unused instructions */
501
  for (j = 0; j <= mg; j++) {
502
    if (j == 0) b1 = b;
503
    else b1 = n + j - 1;
504
    for (i = 0; i < f->bb[b1].ninsn; i++) {
505
      if (f->bb[b1].insn[i].tmp != j)
506
        change_insn_type (&f->bb[b1].insn[i], II_NOP);
507
      f->bb[b1].insn[i].tmp = 0;
508
    }
509
    if (j < mg) {
510
      f->bb[b1].next[0] = n + j;
511
      f->bb[b1].next[1] = -1;
512
      f->bb[n + j].prev[0] = b1;
513
      f->bb[n + j].prev[1] = -1;
514
    } else {
515
      i = f->bb[b1].next[0];
516
      f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1;
517
      f->bb[n + j].prev[1] = -1;
518
      if (i >= 0) {
519
        if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
520
        if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
521
      }
522
      i = f->bb[b1].next[1];
523
      if (i >= 0) {
524
        if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
525
        if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
526
      }
527
    }
528
  }
529
}
530
 
531
/* Latch outputs in loops */
532
void add_latches (cuc_func *f)
533
{
534
  int b, i, j;
535
 
536
  //print_cuc_bb (f, "ADD_LATCHES a");
537
  /* Cuts the tree and marks registers */
538
  mark_cut (f);
539
 
540
  /* Split BBs with more than one group */
541
  for (b = 0; b < f->num_bb; b++) expand_bb (f, b);
542
  remove_nops (f);
543
  //print_cuc_bb (f, "ADD_LATCHES 0");
544
 
545
  /* Convert accesses in BB_INLOOP type block to latched */
546
  for (b = 0; b < f->num_bb; b++) {
547
    int j;
548
    for (i = 0; i < f->bb[b].ninsn; i++)
549
      for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] == OPT_REF) {
550
        int t = f->bb[b].insn[i].op[j];
551
        /* If we are pointing to a INLOOP block from outside, or forward
552
           (= previous loop iteration) we must register that data */
553
        if ((f->bb[REF_BB(t)].type & BB_INLOOP || no_multicycle)
554
         && !(f->INSN(t).type & (IT_BRANCH | IT_COND))
555
         && (REF_BB(t) != b || REF_I(t) >= i)) {
556
          f->INSN(t).type |= IT_LATCHED;
557
        }
558
      }
559
  }
560
  //print_cuc_bb (f, "ADD_LATCHES 1");
561
 
562
  /* Add latches at the end of blocks as needed */
563
  for (b = 0; b < f->num_bb; b++) {
564
    int nreg = 0;
565
    cuc_insn *insn;
566
    for (i = 0; i < f->bb[b].ninsn; i++)
567
      if (f->bb[b].insn[i].type & IT_LATCHED) nreg++;
568
    if (nreg) {
569
      insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (f->bb[b].ninsn + nreg));
570
      j = 0;
571
      for (i = 0; i < f->bb[b].ninsn; i++) {
572
        insn[i] = f->bb[b].insn[i];
573
        if (insn[i].type & IT_LATCHED) {
574
          cuc_insn *ii = &insn[f->bb[b].ninsn + j++];
575
          change_insn_type (ii, II_REG);
576
          ii->op[0] = -1; ii->opt[0] = OPT_DEST | OPT_REGISTER;
577
          ii->op[1] = REF (b, i); ii->opt[1] = OPT_REF;
578
          ii->opt[2] = ii->opt[3] = OPT_NONE;
579
          ii->dep = NULL;
580
          ii->type = IT_VOLATILE;
581
          sprintf (ii->disasm, "reg %i_%i", b, i);
582
        }
583
      }
584
      f->bb[b].ninsn += nreg;
585
      free (f->bb[b].insn);
586
      f->bb[b].insn = insn;
587
    }
588
  }
589
  //print_cuc_bb (f, "ADD_LATCHES 2");
590
 
591
  /* Repair references */
592
  for (b = 0; b < f->num_bb; b++)
593
    for (i = 0; i < f->bb[b].ninsn; i++)
594
      for (j = 0; j < MAX_OPERANDS; j++)
595
        /* If destination instruction is latched, use register instead */
596
        if (f->bb[b].insn[i].opt[j] == OPT_REF
597
         && f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) {
598
          int b1, i1;
599
          b1 = REF_BB (f->bb[b].insn[i].op[j]);
600 883 markom
          //cucdebug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]);
601 879 markom
          if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) {
602
            for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) {
603
              assert (f->bb[b1].insn[i1].index == II_REG);
604
              if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) {
605
                f->bb[b].insn[i].op[j] = REF (b1, i1);
606
                break;
607
              }
608
            }
609
          }
610
        }
611
}
612
 
613
cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j)
614
{
615
  cuc_func *func;
616 883 markom
  cucdebug (2, "BB%i unroll %i times preroll %i times\n", b, j, i);
617 879 markom
  func = preunroll_loop (f, b, i, j, bb_filename);
618 883 markom
  if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_PREUNROLL");
619 879 markom
 
620
  log ("Optimizing.\n");
621
  optimize_tree (func);
622 883 markom
  if (cuc_debug >= 6) //print_cuc_bb (func, "AFTER_OPT_TREE1");
623 879 markom
  remove_nops (func);
624 883 markom
  if (cuc_debug >= 6) //print_cuc_bb (func, "NO_NOPS");
625 879 markom
  remove_dead (func);
626 883 markom
  if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD1");
627 879 markom
  optimize_bb (func);
628 883 markom
  if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_OPT_BB");
629 879 markom
  remove_dead_bb (func);
630 883 markom
  if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD_BB");
631 879 markom
  optimize_tree (func);
632 883 markom
  if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_OPT_TREE");
633
  log ("Common subexpression elimination.\n");
634
  cse (func);
635
  if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_CSE");
636 879 markom
  remove_dead (func);
637 883 markom
  if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD");
638 879 markom
  remove_trivial_regs (func);
639 883 markom
  if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL");
640
  add_latches (func);
641
  if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES");
642
  set_io (func);
643 879 markom
  add_memory_dep (func, memory_order);
644 883 markom
  if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP");
645 879 markom
  add_data_dep (func);
646 883 markom
  if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP");
647 879 markom
  schedule_memory (func, memory_order);
648 883 markom
  if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM");
649 879 markom
 
650
  analyse_timings (func, timings);
651 883 markom
  cucdebug (2, "new_time = %i, old_time = %i, size = %f\n",
652 879 markom
           timings->new_time, func->orig_time, timings->size);
653
  log ("new time = %icyc, old_time = %icyc, size = %.0f gates\n",
654
         timings->new_time, func->orig_time, timings->size);
655
  //output_verilog (func, argv[1]);
656
  free_func (func);
657
  timings->b = b;
658
  timings->unroll = j;
659
  timings->preroll = i;
660 883 markom
  timings->nshared = 0;
661 879 markom
  return timings;
662
}
663
 
664
int tim_comp (cuc_timings *a, cuc_timings *b)
665
{
666
  if (a->new_time < b->new_time) return -1;
667
  else if (a->new_time > b->new_time) return 1;
668
  else return 0;
669
}
670
 
671
cuc_func *analyse_function (char *module_name, long orig_time,
672
                unsigned long start_addr, unsigned long end_addr)
673
{
674
  cuc_timings timings;
675
  cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func));
676
  cuc_func *saved;
677
  int b, i, j;
678
  char tmp1[256];
679
  char tmp2[256];
680
 
681
  func->orig_time = orig_time;
682
  func->start_addr = start_addr;
683
  func->end_addr = end_addr;
684
 
685
  sprintf (tmp1, "%s.bin", module_name);
686 883 markom
  cucdebug (2, "Loading %s.bin\n", module_name);
687 879 markom
  cuc_load (tmp1);
688
 
689
  log ("Detecting basic blocks\n");
690
  detect_bb (func);
691 883 markom
  if (cuc_debug >= 2) print_cuc_insns ("WITH_BB_LIMITS", 0);
692 879 markom
 
693
  //sprintf (tmp1, "%s.bin.mp", module_name);
694
  sprintf (tmp2, "%s.bin.bb", module_name);
695
  generate_bb_seq (func, config.sim.mprof_fn, tmp2);
696
 
697
  build_bb (func);
698 883 markom
  if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_BUILD_BB");
699 879 markom
  reg_dep (func);
700
 
701
  log ("Detecting dependencies\n");
702 883 markom
  if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_REG_DEP");
703 879 markom
  optimize_tree (func);
704
  log ("Optimizing.\n");
705 883 markom
  if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_OPT_TREE1");
706 879 markom
  remove_nops (func);
707 883 markom
  if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
708 879 markom
  remove_dead (func);
709 883 markom
  if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_DEAD1");
710 879 markom
  optimize_bb (func);
711 883 markom
  if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_BB");
712 879 markom
  remove_dead_bb (func);
713 883 markom
  if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_DEAD_BB");
714 879 markom
  optimize_tree (func);
715 883 markom
  if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_OPT_TREE");
716
  log ("Common subexpression elimination.\n");
717
  cse (func);
718
  if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_CSE");
719 879 markom
  remove_dead (func);
720 883 markom
  if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD");
721 879 markom
  remove_trivial_regs (func);
722 883 markom
  if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL");
723 879 markom
 
724 883 markom
  csm (func);
725 879 markom
  assert (saved = dup_func (func));
726 883 markom
 
727
  timings.preroll = timings.unroll = 1;
728
  timings.nshared = 0;
729
  add_latches (func);
730
  set_io (func);
731
 
732
  if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES");
733
  analyse_timings (func, &timings);
734 879 markom
  add_memory_dep (func, memory_order);
735 883 markom
  if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP");
736 879 markom
  add_data_dep (func);
737 883 markom
  if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP");
738 879 markom
  schedule_memory (func, memory_order);
739 883 markom
  if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM");
740 879 markom
 
741 883 markom
  //output_verilog (func, module_name);
742 879 markom
  free_func (func);
743 883 markom
  log ("Base option: pre%i,un%i,sha%i: %icyc %.1f\n",
744
        timings.preroll, timings.unroll, timings.nshared, timings.new_time, timings.size);
745
  saved->timings = timings;
746 879 markom
 
747
#if 1
748
  /* detect and unroll simple loops */
749
  for (b = 0; b < saved->num_bb; b++) {
750
    cuc_timings t[MAX_UNROLL * MAX_PREROLL];
751
    cuc_timings *ut;
752
    cuc_timings *cut = &t[0];
753
    int nt = 1;
754
    double csize;
755
 
756
    /* Is it a loop? */
757
    if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue;
758
    t[0] = timings;
759
    t[0].b = b;
760
    t[0].preroll = 1;
761
    t[0].unroll = 1;
762 883 markom
    t[0].nshared = 0;
763 879 markom
 
764
    sprintf (tmp1, "%s.bin.bb", module_name);
765
    i = 1;
766
    do {
767
      cuc_timings *pt;
768
      cuc_timings *cpt = cut;
769
      j = 1;
770
 
771
      do {
772
        pt = cpt;
773
        cpt = preunroll_bb (tmp1, saved, &t[nt++], b, ++j, i);
774
      } while (j <= MAX_PREROLL && pt->new_time >= cpt->new_time);
775
      i++;
776
      ut = cut;
777
      cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i);
778
    } while (i <= MAX_UNROLL && ut->new_time >= cut->new_time);
779
 
780
    /* Sort the timings */
781 883 markom
#if 0
782
    if (cuc_debug >= 3)
783 879 markom
    for (i = 0; i < nt; i++) printf ("%i:%i,%i: %icyc\n",
784
                    t[i].b, t[i].preroll, t[i].unroll, t[i].new_time);
785 883 markom
#endif
786 879 markom
 
787
    qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
788
 
789
    /* Delete timings, that have worst time and bigger size than other */
790
    j = 1;
791
    csize = t[0].size;
792
    for (i = 1; i < nt; i++)
793
      if (t[i].size < csize) t[j++] = t[i];
794
    nt = j;
795 883 markom
 
796
    cucdebug (1, "Available options\n");
797
    for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
798
        t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
799
    /* Add results from CSM */
800
    j = nt;
801
    for (i = 0; i < saved->bb[b].ntim; i++) {
802
      int i1;
803
      for (i1 = 0; i1 < nt; i1++) {
804
        t[j] = t[i1];
805
        t[j].size += saved->bb[b].tim[i].size - timings.size;
806
        t[j].new_time += saved->bb[b].tim[i].new_time - timings.new_time;
807
        t[j].nshared = saved->bb[b].tim[i].nshared;
808
        t[j].shared = saved->bb[b].tim[i].shared;
809
        if (++j >= MAX_UNROLL * MAX_PREROLL) goto full;
810
      }
811
    }
812
 
813
full:
814
    nt = j;
815 879 markom
 
816 883 markom
    cucdebug (1, "Available options:\n");
817
    for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
818
        t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
819 879 markom
 
820 883 markom
    /* Sort again with new timings added */
821
    qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
822
 
823
    /* Delete timings, that have worst time and bigger size than other */
824
    j = 1;
825
    csize = t[0].size;
826
    for (i = 1; i < nt; i++)
827
      if (t[i].size < csize) t[j++] = t[i];
828
    nt = j;
829
 
830
    cucdebug (1, "Available options:\n");
831
    for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
832
                               t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
833
 
834
    if (saved->bb[b].ntim) free (saved->bb[b].tim);
835 879 markom
    saved->bb[b].ntim = nt;
836
    assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt));
837
 
838
    /* Copy options in reverse order -- smallest first */
839
    for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i];
840 883 markom
 
841
    log ("Available options:\n");
842
    for (i = 0; i < saved->bb[b].ntim; i++) {
843
      log ("%i:pre%i,un%i,sha%i: %icyc %.1f\n",
844
        saved->bb[b].tim[i].b, saved->bb[b].tim[i].preroll, saved->bb[b].tim[i].unroll,
845
        saved->bb[b].tim[i].nshared, saved->bb[b].tim[i].new_time, saved->bb[b].tim[i].size);
846
    }
847 879 markom
  }
848
#endif
849
  return saved;
850
}
851
 
852 883 markom
void options_cmd (cuc_func *f, char *name)
853
{
854
  int b, i;
855
  printf ("%-12s    :pre%i,un%i,sha%i:  time = %i cyc;  size = %.f gates (old time = %i)\n", name,
856
        f->timings.preroll, f->timings.unroll, f->timings.nshared,
857
        f->timings.new_time, f->timings.size, f->orig_time);
858
  for (b = 0; b < f->num_bb; b++) {
859
    /* Print out results */
860
    for (i = 0; i < f->bb[b].ntim; i++) {
861
      printf ("%-12sBB%-2i:pre%i,un%i,sha%i:  time = %i cyc;  size = %.f gates\n", name,
862
        f->bb[b].tim[i].b, f->bb[b].tim[i].preroll, f->bb[b].tim[i].unroll,
863
        f->bb[b].tim[i].nshared, f->bb[b].tim[i].new_time, f->bb[b].tim[i].size);
864
    }
865
  }
866
}
867
 
868 879 markom
/* Dumps specified function to file (hex) */
869
unsigned long extract_function (char *out_fn, unsigned long start_addr)
870
{
871
  FILE *fo;
872
  unsigned long a = start_addr;
873
  int x = 0;
874
  assert (fo = fopen (out_fn, "wt+"));
875
 
876
  do {
877
    unsigned long d = evalsim_mem32 (a);
878
    int index = insn_decode (d);
879
    assert (index >= 0);
880
    if (x) x++;
881
    if (strcmp (insn_name (index), "l.jr") == 0) x = 1;
882
    a += 4;
883
    fprintf (fo, "%08x\n", d);
884
  } while (x < 2);
885
 
886
  fclose (fo);
887
  return a - 4;
888
}
889
 
890
static cuc_func *func[MAX_FUNCS];
891
 
892
void main_cuc (char *filename)
893
{
894 883 markom
  int i, j;
895 879 markom
  char tmp1[256];
896
 
897 883 markom
  printf ("Entering OpenRISC Custom Unit Compiler command prompt\n");
898
  printf ("Using profile file \"%s\" and memory profile file \"%s\".\n", config.sim.prof_fn, config.sim.mprof_fn);
899 879 markom
  sprintf (tmp1, "%s.log", filename);
900 883 markom
  printf ("Analyzing. (log file \"%s\").\n", tmp1);
901 879 markom
  assert (flog = fopen (tmp1, "wt+"));
902
 
903
  /* Loads in the specified timings table */
904
  load_timing_table ("virtex.tim");
905
 
906
  prof_set (1, 0);
907
  assert (prof_acquire (config.sim.prof_fn) == 0);
908
 
909
  cycle_duration = 40.;
910
 
911
  /* Try all functions except "total" */
912
  for (i = 0; i < prof_nfuncs - 1; i++) {
913
    long orig_time;
914
    unsigned long start_addr, end_addr;
915
    orig_time = prof_func[i].cum_cycles;
916
    start_addr = prof_func[i].addr;
917
 
918
    /* Extract the function from the binary */
919
    sprintf (tmp1, "%s.bin", prof_func[i].name);
920
    end_addr = extract_function (tmp1, start_addr);
921
 
922
    log ("Testing function %s (%08x - %08x)\n", prof_func[i].name, start_addr, end_addr);
923
    func[i] = analyse_function (prof_func[i].name, orig_time, start_addr, end_addr);
924
  }
925
 
926 883 markom
  while (1) {
927
    char *s;
928
    printf ("(cuc) ");
929
    fflush (stdout);
930
    fgets(tmp1, sizeof tmp1, stdin);
931
    for (s = tmp1; *s != '\0' && *s != '\n' && *s != '\r'; s++);
932
    *s = '\0';
933
 
934
    if (strcmp (tmp1, "q") == 0 || strcmp (tmp1, "quit") == 0) {
935
      break;
936
    } else if (strcmp (tmp1, "p") == 0 || strcmp (tmp1, "profile") == 0) {
937
      printf ("----------------------------------------------------------------------------\n");
938
      printf ("|function name            |addr    |# calls |avg cycles  | old% | impr. f. |\n");
939
      printf ("|-------------------------+--------+--------+------------+------+----------|\n");
940
      for (j = 0; j < prof_nfuncs; j++) {
941
        int bestcyc = 0, besti = 0;
942
        for (i = 0; i < prof_nfuncs; i++)
943
          if (prof_func[i].cum_cycles > bestcyc) {
944
            bestcyc = prof_func[i].cum_cycles;
945
            besti = i;
946
          }
947
        i = besti;
948
        printf ("| %-24s|%08X|%8i|%12.1f| %3.0f%% |",
949
                prof_func[i].name, prof_func[i].addr, prof_func[i].calls,
950
                ((double)prof_func[i].cum_cycles / prof_func[i].calls),
951
                (100. * prof_func[i].cum_cycles / prof_cycles));
952
        if (func[i]) {
953
          printf ("%9.2f |\n", 1.f *  prof_func[i].cum_cycles / func[i]->timings.new_time);
954
        } else printf ("    N/A   |\n");
955
        prof_func[i].cum_cycles = -1;
956
      }
957
      printf ("----------------------------------------------------------------------------\n");
958
      printf ("Total %i functions, %i cycles.\n", prof_nfuncs, prof_cycles);
959
    } else if (strncmp (tmp1, "d", 1) == 0 || strncmp (tmp1, "debug", 5) == 0) {
960
      sscanf (tmp1, "%*s %i", &cuc_debug);
961
      if (cuc_debug < 0) cuc_debug = 0;
962
      if (cuc_debug > 9) cuc_debug = 9;
963
    } else if (strcmp (tmp1, "g") == 0 || strcmp (tmp1, "generate") == 0) {
964
      for (i = 0; i < prof_nfuncs; i++);
965
    } else if (strcmp (tmp1, "o") == 0 || strcmp (tmp1, "options") == 0) {
966
      printf ("Available options:\n");
967
      for (i = 0; i < prof_nfuncs; i++)
968
        if (func[i]) options_cmd (func[i], prof_func[i].name);
969
    } else {
970
      if (strcmp (tmp1, "h") != 0 && strcmp (tmp1, "help") != 0)
971
        printf ("Unknown command.\n");
972
      printf ("OpenRISC Custom Unit Compiler command prompt\n");
973
      printf ("h|help       displays this help\n");
974
      printf ("q|quit       returns to or1ksim prompt\n");
975
      printf ("p|profile    displays function profiling\n");
976
      printf ("d|debug #    sets debug level (0-9)\n");
977
      printf ("o|options    displays available options\n");
978
      printf ("g|generate   generates verilog file\n");
979
    }
980
  }
981
 
982 879 markom
  /* Dispose memory */
983
  for (i = 0; i < prof_nfuncs -1; i++)
984
    if (func[i]) free_func (func[i]);
985
 
986
  fclose (flog);
987
}
988
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.