1 |
879 |
markom |
/* cuc.c -- OpenRISC Custom Unit Compiler
|
2 |
|
|
* Copyright (C) 2002 Marko Mlinar, markom@opencores.org
|
3 |
|
|
*
|
4 |
|
|
* This file is part of OpenRISC 1000 Architectural Simulator.
|
5 |
|
|
*
|
6 |
|
|
* This program is free software; you can redistribute it and/or modify
|
7 |
|
|
* it under the terms of the GNU General Public License as published by
|
8 |
|
|
* the Free Software Foundation; either version 2 of the License, or
|
9 |
|
|
* (at your option) any later version.
|
10 |
|
|
*
|
11 |
|
|
* This program is distributed in the hope that it will be useful,
|
12 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
|
|
* GNU General Public License for more details.
|
15 |
|
|
*
|
16 |
|
|
* You should have received a copy of the GNU General Public License
|
17 |
|
|
* along with this program; if not, write to the Free Software
|
18 |
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
19 |
|
|
|
20 |
|
|
/* Main file, including code optimization and command prompt */
|
21 |
|
|
|
22 |
|
|
#include <stdio.h>
|
23 |
|
|
#include <stdlib.h>
|
24 |
|
|
#include <stdarg.h>
|
25 |
|
|
#include <assert.h>
|
26 |
|
|
#include "sim-config.h"
|
27 |
|
|
#include "cuc.h"
|
28 |
|
|
#include "insn.h"
|
29 |
|
|
#include "profiler.h"
|
30 |
883 |
markom |
#include "opcode/or32.h"
|
31 |
879 |
markom |
|
32 |
|
|
FILE *flog;
|
33 |
883 |
markom |
int cuc_debug = 0;
|
34 |
879 |
markom |
|
35 |
|
|
/* Last used registers by software convention */
|
36 |
|
|
const int call_saved[MAX_REGS] = {
|
37 |
|
|
0, 0, 0, 1, 1, 1, 1, 1,
|
38 |
|
|
1, 1, 0, 1, 0, 1, 0, 1,
|
39 |
|
|
0, 1, 0, 1, 0, 1, 0, 1,
|
40 |
|
|
0, 1, 0, 1, 0, 1, 0, 1,
|
41 |
|
|
1, 1};
|
42 |
|
|
|
43 |
|
|
/* Prints out instructions */
|
44 |
|
|
void print_insns (cuc_insn *insn, int ninsn, int verbose)
|
45 |
|
|
{
|
46 |
|
|
int i, j;
|
47 |
|
|
for (i = 0; i < ninsn; i++) {
|
48 |
|
|
dep_list *l = insn[i].dep;
|
49 |
|
|
printf ("%4x%c %-4s ", i, insn[i].index >= 0 ? ':' : '?', cuc_insn_name (&insn[i]));
|
50 |
|
|
if (verbose) {
|
51 |
|
|
printf ("%-20s insn = %08x, index = %i, type = %04x ",
|
52 |
|
|
insn[i].disasm, insn[i].insn, insn[i].index, insn[i].type);
|
53 |
|
|
} else printf ("type = %04x ", insn[i].type);
|
54 |
|
|
for (j = 0; j < MAX_OPERANDS; j++) {
|
55 |
|
|
if (insn[i].opt[j] & OPT_DEST) printf ("*");
|
56 |
|
|
switch (insn[i].opt[j] & ~OPT_DEST) {
|
57 |
|
|
case OPT_NONE: break;
|
58 |
|
|
case OPT_CONST: printf ("0x%08x, ", insn[i].op[j]); break;
|
59 |
|
|
case OPT_JUMP: printf ("J%x ", insn[i].op[j]); break;
|
60 |
|
|
case OPT_REGISTER: printf ("r%i, ", insn[i].op[j]); break;
|
61 |
|
|
case OPT_REF: printf ("[%x.%x], ", REF_BB(insn[i].op[j]), REF_I(insn[i].op[j])); break;
|
62 |
|
|
case OPT_BB: printf ("BB%x, ", insn[i].op[j]); break;
|
63 |
|
|
case OPT_LRBB: printf ("LRBB, "); break;
|
64 |
|
|
default:
|
65 |
|
|
fprintf (stderr, "Invalid operand type %s(%x.%x) = %x\n",
|
66 |
|
|
cuc_insn_name (&insn[i]), i, j, insn[i].opt[j]);
|
67 |
|
|
assert (0);
|
68 |
|
|
}
|
69 |
|
|
}
|
70 |
|
|
if (l) {
|
71 |
|
|
printf ("\n\tdep:");
|
72 |
|
|
while (l) {
|
73 |
|
|
printf (" [%x.%x],", REF_BB (l->ref), REF_I (l->ref));
|
74 |
|
|
l = l->next;
|
75 |
|
|
}
|
76 |
|
|
}
|
77 |
|
|
printf ("\n");
|
78 |
|
|
}
|
79 |
|
|
}
|
80 |
|
|
|
81 |
|
|
void add_dep (dep_list **list, int dep)
|
82 |
|
|
{
|
83 |
|
|
dep_list *ndep;
|
84 |
|
|
dep_list **tmp = list;
|
85 |
|
|
|
86 |
|
|
while (*tmp) {
|
87 |
|
|
if ((*tmp)->ref == dep) return; /* already there */
|
88 |
|
|
tmp = &((*tmp)->next);
|
89 |
|
|
}
|
90 |
|
|
ndep = (dep_list *)malloc (sizeof (dep_list));
|
91 |
|
|
ndep->ref = dep;
|
92 |
|
|
ndep->next = NULL;
|
93 |
|
|
*tmp = ndep;
|
94 |
|
|
}
|
95 |
|
|
|
96 |
|
|
void dispose_list (dep_list **list)
|
97 |
|
|
{
|
98 |
|
|
while (*list) {
|
99 |
|
|
dep_list *tmp = *list;
|
100 |
|
|
*list = tmp->next;
|
101 |
|
|
free (tmp);
|
102 |
|
|
}
|
103 |
|
|
}
|
104 |
|
|
|
105 |
|
|
void add_data_dep (cuc_func *f)
|
106 |
|
|
{
|
107 |
|
|
int b, i, j;
|
108 |
|
|
dep_list *tmp;
|
109 |
|
|
for (b = 0; b < f->num_bb; b++) {
|
110 |
|
|
cuc_insn *insn = f->bb[b].insn;
|
111 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++)
|
112 |
|
|
for (j = 0; j < MAX_OPERANDS; j++) {
|
113 |
|
|
fflush (stdout);
|
114 |
|
|
if (insn[i].opt[j] & OPT_REF) {
|
115 |
|
|
/* Copy list from predecessor */
|
116 |
|
|
dep_list *l = f->INSN(insn[i].op[j]).dep;
|
117 |
|
|
while (l) {
|
118 |
|
|
add_dep (&insn[i].dep, l->ref);
|
119 |
|
|
l = l->next;
|
120 |
|
|
}
|
121 |
|
|
/* add predecessor */
|
122 |
|
|
add_dep (&insn[i].dep, insn[i].op[j]);
|
123 |
|
|
}
|
124 |
|
|
}
|
125 |
|
|
}
|
126 |
|
|
}
|
127 |
|
|
|
128 |
|
|
/* returns nonzero, if instruction was simplified */
|
129 |
|
|
int apply_edge_condition (cuc_insn *ii)
|
130 |
|
|
{
|
131 |
|
|
unsigned int c = ii->op[2];
|
132 |
|
|
|
133 |
|
|
if (ii->index == II_AND) {
|
134 |
|
|
if (ii->opt[2] & OPT_CONST && c == 0) {
|
135 |
|
|
change_insn_type (ii, II_ADD);
|
136 |
|
|
ii->op[1] = 0; ii->opt[1] = OPT_CONST;
|
137 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
138 |
|
|
return 1;
|
139 |
|
|
}
|
140 |
|
|
} else if (ii->index == II_OR) {
|
141 |
|
|
if (ii->opt[2] & OPT_CONST && c == 0xffffffff) {
|
142 |
|
|
change_insn_type (ii, II_ADD);
|
143 |
|
|
ii->op[1] = c; ii->opt[1] = OPT_CONST;
|
144 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
145 |
|
|
return 1;
|
146 |
|
|
}
|
147 |
|
|
} else if (ii->index == II_SUB) {
|
148 |
|
|
if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) {
|
149 |
|
|
change_insn_type (ii, II_ADD);
|
150 |
|
|
ii->op[1] = 0; ii->opt[1] = OPT_CONST;
|
151 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
152 |
|
|
return 1;
|
153 |
|
|
}
|
154 |
|
|
} else if (ii->index == II_MUL) {
|
155 |
|
|
if (ii->opt[2] & OPT_CONST && c == 0) {
|
156 |
|
|
change_insn_type (ii, II_ADD);
|
157 |
|
|
ii->op[1] = 0; ii->opt[1] = OPT_CONST;
|
158 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
159 |
|
|
return 1;
|
160 |
|
|
} else
|
161 |
|
|
if (ii->opt[2] & OPT_CONST && c == 1) {
|
162 |
|
|
change_insn_type (ii, II_ADD);
|
163 |
|
|
ii->op[1] = c; ii->opt[1] = OPT_CONST;
|
164 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
165 |
|
|
return 1;
|
166 |
|
|
} else
|
167 |
|
|
if (ii->opt[2] & OPT_CONST && c == 0xffffffff) {
|
168 |
|
|
change_insn_type (ii, II_SUB);
|
169 |
|
|
ii->op[2] = ii->op[1]; ii->opt[2] = ii->opt[1];
|
170 |
|
|
ii->op[1] = 0; ii->opt[1] = OPT_CONST;
|
171 |
|
|
return 1;
|
172 |
|
|
}
|
173 |
|
|
} else if (ii->index == II_SRL) {
|
174 |
|
|
if (ii->opt[2] & OPT_CONST && c == 0) {
|
175 |
|
|
change_insn_type (ii, II_ADD);
|
176 |
|
|
ii->op[1] = c; ii->opt[1] = OPT_CONST;
|
177 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
178 |
|
|
return 1;
|
179 |
|
|
} else if (ii->opt[2] & OPT_CONST && c >= 32) {
|
180 |
|
|
change_insn_type (ii, II_ADD);
|
181 |
|
|
ii->op[1] = 0; ii->opt[1] = OPT_CONST;
|
182 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
183 |
|
|
return 1;
|
184 |
|
|
}
|
185 |
|
|
} else if (ii->index == II_SLL) {
|
186 |
|
|
if (ii->opt[2] & OPT_CONST && c == 0) {
|
187 |
|
|
change_insn_type (ii, II_ADD);
|
188 |
|
|
ii->op[1] = c; ii->opt[1] = OPT_CONST;
|
189 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
190 |
|
|
return 1;
|
191 |
|
|
} else if (ii->opt[2] & OPT_CONST && c >= 32) {
|
192 |
|
|
change_insn_type (ii, II_ADD);
|
193 |
|
|
ii->op[1] = 0; ii->opt[1] = OPT_CONST;
|
194 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
195 |
|
|
return 1;
|
196 |
|
|
}
|
197 |
|
|
} else if (ii->index == II_SRA) {
|
198 |
|
|
if (ii->opt[2] & OPT_CONST && c == 0) {
|
199 |
|
|
change_insn_type (ii, II_ADD);
|
200 |
|
|
ii->op[1] = c; ii->opt[1] = OPT_CONST;
|
201 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
202 |
|
|
return 1;
|
203 |
|
|
}
|
204 |
|
|
} else if (ii->index == II_CMOV) {
|
205 |
|
|
if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) {
|
206 |
|
|
change_insn_type (ii, II_ADD);
|
207 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
208 |
|
|
ii->opt[3] = OPT_NONE;
|
209 |
|
|
return 1;
|
210 |
|
|
}
|
211 |
|
|
}
|
212 |
|
|
return 0;
|
213 |
|
|
}
|
214 |
|
|
|
215 |
|
|
/* Optimizes dataflow tree */
|
216 |
|
|
void optimize_tree (cuc_func *f)
|
217 |
|
|
{
|
218 |
|
|
int b, i, j;
|
219 |
|
|
int modified;
|
220 |
|
|
|
221 |
|
|
do {
|
222 |
|
|
modified = 0;
|
223 |
|
|
for (b = 0; b < f->num_bb; b++) if (!(f->bb[b].type & BB_DEAD)) {
|
224 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++) {
|
225 |
|
|
cuc_insn *ii = &f->bb[b].insn[i];
|
226 |
|
|
/* We tend to have the third parameter const if instruction is cumutative */
|
227 |
|
|
if ((ii->opt[1] & OPT_CONST) && !(ii->opt[2] & OPT_CONST)
|
228 |
|
|
&& known[ii->index].comutative) {
|
229 |
|
|
unsigned long t = ii->opt[1];
|
230 |
|
|
ii->opt[1] = ii->opt[2];
|
231 |
|
|
ii->opt[2] = t;
|
232 |
|
|
t = ii->op[1];
|
233 |
|
|
ii->op[1] = ii->op[2];
|
234 |
|
|
ii->op[2] = t;
|
235 |
883 |
markom |
modified = 1; cucdebug (2, "%08x:<>\n", REF(b, i));
|
236 |
879 |
markom |
}
|
237 |
|
|
|
238 |
|
|
/* Try to do the promotion */
|
239 |
|
|
/* We have two consecutive expressions, containing constants,
|
240 |
|
|
* if previous is a simple expression we can handle it simply: */
|
241 |
|
|
for (j = 0; j < MAX_OPERANDS; j++)
|
242 |
|
|
if (ii->opt[j] & OPT_REF) {
|
243 |
|
|
cuc_insn *t = &f->INSN(ii->op[j]);
|
244 |
|
|
if (f->INSN(ii->op[j]).index == II_ADD
|
245 |
|
|
&& f->INSN(ii->op[j]).opt[2] & OPT_CONST
|
246 |
|
|
&& f->INSN(ii->op[j]).op[2] == 0
|
247 |
|
|
&& !(ii->type & IT_MEMORY && t->type & IT_MEMADD)
|
248 |
|
|
&& !(ii->type & IT_BRANCH) && !(t->type & IT_COND)) {
|
249 |
|
|
/* do not promote through add-mem, and branches */
|
250 |
883 |
markom |
modified = 1; cucdebug (2, "%8x:promote%i %8x %8x\n", REF (b, i), j, ii->op[j], t->op[1]);
|
251 |
879 |
markom |
ii->op[j] = t->op[1]; ii->opt[j] = t->opt[1];
|
252 |
|
|
}
|
253 |
|
|
}
|
254 |
|
|
|
255 |
|
|
/* In case of x = cmov x, y; or x = cmov y, x; we have
|
256 |
|
|
asynchroneous loop -> remove it */
|
257 |
|
|
if (ii->index == II_CMOV) {
|
258 |
|
|
int f = 0;
|
259 |
|
|
if ((ii->opt[1] & OPT_REF) && ii->op[1] == REF (b, i)) f = 1;
|
260 |
|
|
if ((ii->opt[2] & OPT_REF) && ii->op[2] == REF (b, i)) f = 2;
|
261 |
|
|
if (ii->opt[1] == ii->opt[2] && ii->op[1] == ii->op[2]) f = 2;
|
262 |
|
|
if (f) {
|
263 |
|
|
change_insn_type (ii, II_ADD);
|
264 |
883 |
markom |
cucdebug (2, "%8x:cmov %i\n", REF(b, i), f);
|
265 |
879 |
markom |
ii->opt[f] = OPT_CONST;
|
266 |
|
|
ii->op[f] = 0;
|
267 |
|
|
ii->opt[3] = OPT_NONE;
|
268 |
|
|
modified = 1;
|
269 |
|
|
continue;
|
270 |
|
|
}
|
271 |
|
|
}
|
272 |
|
|
|
273 |
|
|
/* Do nothing to volatile instructions */
|
274 |
|
|
if (ii->type & IT_VOLATILE) continue;
|
275 |
|
|
|
276 |
|
|
/* Check whether we can simplify the instruction */
|
277 |
|
|
if (apply_edge_condition (ii)) {
|
278 |
|
|
modified = 1;
|
279 |
|
|
continue;
|
280 |
|
|
}
|
281 |
|
|
/* We cannot do anything more if at least one is not constant */
|
282 |
|
|
if (!(ii->opt[2] & OPT_CONST)) continue;
|
283 |
|
|
|
284 |
|
|
if (ii->opt[1] & OPT_CONST) { /* We have constant expression */
|
285 |
|
|
unsigned long value;
|
286 |
|
|
int ok = 1;
|
287 |
|
|
/* Was constant expression already? */
|
288 |
|
|
if (ii->index == II_ADD && !ii->op[2]) continue;
|
289 |
|
|
|
290 |
|
|
if (ii->index == II_ADD) value = ii->op[1] + ii->op[2];
|
291 |
|
|
else if (ii->index == II_SUB) value = ii->op[1] - ii->op[2];
|
292 |
|
|
else if (ii->index == II_SLL) value = ii->op[1] << ii->op[2];
|
293 |
|
|
else if (ii->index == II_SRL) value = ii->op[1] >> ii->op[2];
|
294 |
|
|
else if (ii->index == II_MUL) value = ii->op[1] * ii->op[2];
|
295 |
|
|
else if (ii->index == II_OR) value = ii->op[1] | ii->op[2];
|
296 |
|
|
else if (ii->index == II_XOR) value = ii->op[1] ^ ii->op[2];
|
297 |
|
|
else if (ii->index == II_AND) value = ii->op[1] & ii->op[2];
|
298 |
|
|
else ok = 0;
|
299 |
|
|
if (ok) {
|
300 |
|
|
change_insn_type (ii, II_ADD);
|
301 |
|
|
ii->op[0] = -1; ii->opt[0] = OPT_REGISTER | OPT_DEST;
|
302 |
|
|
ii->op[1] = value; ii->opt[1] = OPT_CONST;
|
303 |
|
|
ii->op[2] = 0; ii->opt[2] = OPT_CONST;
|
304 |
883 |
markom |
modified = 1; cucdebug (2, "%8x:const\n", REF (b, i));
|
305 |
879 |
markom |
}
|
306 |
|
|
} else if (ii->opt[1] & OPT_REF) {
|
307 |
|
|
cuc_insn *prev = &f->INSN(ii->op[1]);
|
308 |
|
|
/* Is this just a link? */
|
309 |
|
|
if (ii->index == II_ADD
|
310 |
|
|
&& !(ii->type & IT_MEMADD) && ii->op[2] == 0) {
|
311 |
|
|
int b1, i1, j1;
|
312 |
883 |
markom |
cucdebug (2, "%8x:link %8x: ", REF(b, i), ii->op[1]);
|
313 |
879 |
markom |
for (b1 = 0; b1 < f->num_bb; b1++) if (!(f->bb[b1].type & BB_DEAD))
|
314 |
|
|
for (i1 = 0; i1 < f->bb[b1].ninsn; i1++)
|
315 |
|
|
for (j1 = 0; j1 < MAX_OPERANDS; j1++)
|
316 |
|
|
if ((f->bb[b1].insn[i1].opt[j1] & OPT_REF)
|
317 |
|
|
&& f->bb[b1].insn[i1].op[j1] == REF(b, i)) {
|
318 |
883 |
markom |
cucdebug (2, "%x ", REF (b1, i1));
|
319 |
879 |
markom |
f->bb[b1].insn[i1].op[j1] = ii->op[1];
|
320 |
|
|
}
|
321 |
883 |
markom |
cucdebug (2, "\n");
|
322 |
879 |
markom |
change_insn_type (ii, II_NOP);
|
323 |
|
|
} else if (prev->opt[2] & OPT_CONST) {
|
324 |
|
|
/* Handle some common cases */
|
325 |
|
|
/* add - add joining */
|
326 |
|
|
if (ii->index == II_ADD && prev->index == II_ADD) {
|
327 |
|
|
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
|
328 |
|
|
ii->op[2] += prev->op[2];
|
329 |
883 |
markom |
modified = 1; cucdebug (2, "%8x: add-add\n", REF(b, i));
|
330 |
879 |
markom |
} else /* add - sub joining */
|
331 |
|
|
if (ii->index == II_ADD && prev->index == II_SUB) {
|
332 |
|
|
change_insn_type (&insn[i], II_SUB);
|
333 |
|
|
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
|
334 |
|
|
ii->op[2] += prev->op[2];
|
335 |
883 |
markom |
modified = 1; cucdebug (2, "%8x: add-sub\n", REF(b, i));
|
336 |
879 |
markom |
} else /* sub - add joining */
|
337 |
|
|
if (ii->index == II_SUB && prev->index == II_ADD) {
|
338 |
|
|
ii->op[1] = prev->op[1]; ii->opt[1] = prev->opt[1];
|
339 |
|
|
ii->op[2] += prev->op[2];
|
340 |
883 |
markom |
modified = 1; cucdebug (2, "%8x: sub-add\n", REF(b, i));
|
341 |
879 |
markom |
}
|
342 |
|
|
}
|
343 |
|
|
}
|
344 |
|
|
}
|
345 |
|
|
}
|
346 |
|
|
} while (modified);
|
347 |
|
|
}
|
348 |
|
|
|
349 |
|
|
/* Remove nop instructions */
|
350 |
|
|
void remove_nops (cuc_func *f)
|
351 |
|
|
{
|
352 |
|
|
int b;
|
353 |
|
|
for (b = 0; b < f->num_bb; b++) {
|
354 |
|
|
int c, d = 0, i, j;
|
355 |
|
|
cuc_insn *insn = f->bb[b].insn;
|
356 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++)
|
357 |
|
|
if (insn[i].index != II_NOP) {
|
358 |
|
|
reloc [i] = d;
|
359 |
|
|
insn[d++] = insn[i];
|
360 |
|
|
} else {
|
361 |
|
|
reloc[i] = d; /* For jumps only */
|
362 |
|
|
}
|
363 |
|
|
f->bb[b].ninsn = d;
|
364 |
|
|
|
365 |
|
|
/* Relocate references from all basic blocks */
|
366 |
|
|
for (c = 0; c < f->num_bb; c++)
|
367 |
883 |
markom |
for (i = 0; i < f->bb[c].ninsn; i++) {
|
368 |
|
|
dep_list *d = f->bb[c].insn[i].dep;
|
369 |
879 |
markom |
for (j = 0; j < MAX_OPERANDS; j++)
|
370 |
|
|
if ((f->bb[c].insn[i].opt[j] & OPT_REF)
|
371 |
|
|
&& REF_BB(f->bb[c].insn[i].op[j]) == b)
|
372 |
|
|
f->bb[c].insn[i].op[j] = REF (b, reloc[REF_I (f->bb[c].insn[i].op[j])]);
|
373 |
883 |
markom |
|
374 |
|
|
while (d) {
|
375 |
|
|
if (REF_BB(d->ref) == b) d->ref = REF (b, reloc[REF_I (d->ref)]);
|
376 |
|
|
d = d->next;
|
377 |
|
|
}
|
378 |
|
|
}
|
379 |
879 |
markom |
}
|
380 |
|
|
}
|
381 |
|
|
|
382 |
|
|
/* Remove unused assignments */
|
383 |
|
|
void remove_dead (cuc_func *f)
|
384 |
|
|
{
|
385 |
|
|
int b, i, j;
|
386 |
|
|
for (b = 0; b < f->num_bb; b++)
|
387 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++)
|
388 |
|
|
if (!(f->bb[b].insn[i].type & (IT_VOLATILE | IT_OUTPUT)))
|
389 |
|
|
f->bb[b].insn[i].type |= IT_UNUSED;
|
390 |
|
|
|
391 |
|
|
for (b = 0; b < f->num_bb; b++) {
|
392 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++)
|
393 |
|
|
for (j = 0; j < MAX_OPERANDS; j++)
|
394 |
|
|
if (f->bb[b].insn[i].opt[j] & OPT_REF) {
|
395 |
|
|
f->INSN(f->bb[b].insn[i].op[j]).type &= ~IT_UNUSED;
|
396 |
|
|
}
|
397 |
|
|
}
|
398 |
|
|
|
399 |
|
|
for (b = 0; b < f->num_bb; b++)
|
400 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++)
|
401 |
|
|
if (f->bb[b].insn[i].type & IT_UNUSED) {
|
402 |
|
|
change_insn_type (&f->bb[b].insn[i], II_NOP);
|
403 |
|
|
}
|
404 |
|
|
|
405 |
|
|
remove_nops (f);
|
406 |
|
|
}
|
407 |
|
|
|
408 |
|
|
/* Removes trivial register assignments */
|
409 |
|
|
void remove_trivial_regs (cuc_func *f)
|
410 |
|
|
{
|
411 |
|
|
int b, i;
|
412 |
|
|
for (i = 0; i < MAX_REGS; i++) f->saved_regs[i] = call_saved[i];
|
413 |
|
|
|
414 |
|
|
for (b = 0; b < f->num_bb; b++) {
|
415 |
|
|
cuc_insn *insn = f->bb[b].insn;
|
416 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++) {
|
417 |
|
|
if (insn[i].index == II_ADD
|
418 |
|
|
&& insn[i].opt[0] & OPT_REGISTER
|
419 |
|
|
&& insn[i].opt[1] & OPT_REGISTER && insn[i].op[0] == insn[i].op[1]
|
420 |
|
|
&& insn[i].opt[2] & OPT_CONST && insn[i].op[2] == 0) {
|
421 |
|
|
if (insn[i].type & IT_OUTPUT) f->saved_regs[insn[i].op[0]] = 1;
|
422 |
|
|
change_insn_type (&insn[i], II_NOP);
|
423 |
|
|
}
|
424 |
|
|
}
|
425 |
|
|
}
|
426 |
883 |
markom |
if (cuc_debug >= 2) {
|
427 |
879 |
markom |
printf ("saved regs ");
|
428 |
|
|
for (i = 0; i < MAX_REGS; i++) printf ("%i:%i ", i, f->saved_regs[i]);
|
429 |
|
|
printf ("\n");
|
430 |
|
|
}
|
431 |
|
|
remove_nops (f);
|
432 |
|
|
}
|
433 |
|
|
|
434 |
883 |
markom |
/* Determine inputs and outputs */
|
435 |
|
|
void set_io (cuc_func *f)
|
436 |
|
|
{
|
437 |
|
|
int b, i, j;
|
438 |
|
|
/* Determine register usage */
|
439 |
|
|
for (i = 0; i < MAX_REGS; i++) {
|
440 |
|
|
f->lur[i] = -1;
|
441 |
|
|
f->used_regs[i] = 0;
|
442 |
|
|
}
|
443 |
|
|
for (b = 0; b < f->num_bb; b++) {
|
444 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++)
|
445 |
|
|
for (j = 0; j < MAX_OPERANDS; j++)
|
446 |
|
|
if (f->bb[b].insn[i].opt[j] & OPT_REGISTER && f->bb[b].insn[i].op[j] >= 0)
|
447 |
|
|
if (f->bb[b].insn[i].opt[j] & OPT_DEST) f->lur[f->bb[b].insn[i].op[j]] = REF (b, i);
|
448 |
|
|
else f->used_regs[f->bb[b].insn[i].op[j]] = 1;
|
449 |
|
|
}
|
450 |
|
|
}
|
451 |
|
|
|
452 |
879 |
markom |
/* relocate all accesses inside of BB b to back/fwd */
|
453 |
|
|
static void relocate_bb (cuc_bb *bb, int b, int back, int fwd)
|
454 |
|
|
{
|
455 |
|
|
int i, j;
|
456 |
|
|
for (i = 0; i < bb->ninsn; i++)
|
457 |
|
|
for (j = 0; j < MAX_OPERANDS; j++)
|
458 |
|
|
if (bb->insn[i].opt[j] & OPT_REF
|
459 |
|
|
&& REF_BB (bb->insn[i].op[j]) == b) {
|
460 |
|
|
int t = REF_I (bb->insn[i].op[j]);
|
461 |
|
|
if (t < i) bb->insn[i].op[j] = REF (back, t);
|
462 |
|
|
else bb->insn[i].op[j] = REF (fwd, t);
|
463 |
|
|
}
|
464 |
|
|
}
|
465 |
|
|
|
466 |
|
|
/* split the BB, based on the group numbers in .tmp */
|
467 |
|
|
void expand_bb (cuc_func *f, int b)
|
468 |
|
|
{
|
469 |
|
|
int n = f->num_bb;
|
470 |
|
|
int mg = 0;
|
471 |
|
|
int b1, i, j;
|
472 |
|
|
|
473 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++)
|
474 |
|
|
if (f->bb[b].insn[i].tmp > mg) mg = f->bb[b].insn[i].tmp;
|
475 |
|
|
|
476 |
|
|
/* Create copies */
|
477 |
|
|
for (b1 = 1; b1 <= mg; b1++) {
|
478 |
|
|
assert (f->num_bb < MAX_BB);
|
479 |
|
|
cpy_bb (&f->bb[f->num_bb], &f->bb[b]);
|
480 |
|
|
f->num_bb++;
|
481 |
|
|
}
|
482 |
|
|
|
483 |
|
|
/* Relocate */
|
484 |
|
|
for (b1 = 0; b1 < f->num_bb; b1++)
|
485 |
|
|
for (i = 0; i < f->bb[b1].ninsn; i++) {
|
486 |
883 |
markom |
dep_list *d = f->bb[b1].insn[i].dep;
|
487 |
879 |
markom |
for (j = 0; j < MAX_OPERANDS; j++)
|
488 |
|
|
if (f->bb[b1].insn[i].opt[j] & OPT_REF) {
|
489 |
|
|
int t = f->bb[b1].insn[i].op[j];
|
490 |
|
|
if (REF_BB(t) == b && f->INSN(t).tmp != 0)
|
491 |
|
|
f->bb[b1].insn[i].op[j] = REF (n + f->INSN(t).tmp - 1, REF_I(t));
|
492 |
|
|
}
|
493 |
883 |
markom |
while (d) {
|
494 |
|
|
if (REF_BB (d->ref) == b && f->INSN(d->ref).tmp != 0)
|
495 |
|
|
d->ref = REF (n + f->INSN(d->ref).tmp - 1, REF_I(d->ref));
|
496 |
|
|
d = d->next;
|
497 |
|
|
}
|
498 |
879 |
markom |
}
|
499 |
|
|
|
500 |
|
|
/* Delete unused instructions */
|
501 |
|
|
for (j = 0; j <= mg; j++) {
|
502 |
|
|
if (j == 0) b1 = b;
|
503 |
|
|
else b1 = n + j - 1;
|
504 |
|
|
for (i = 0; i < f->bb[b1].ninsn; i++) {
|
505 |
|
|
if (f->bb[b1].insn[i].tmp != j)
|
506 |
|
|
change_insn_type (&f->bb[b1].insn[i], II_NOP);
|
507 |
|
|
f->bb[b1].insn[i].tmp = 0;
|
508 |
|
|
}
|
509 |
|
|
if (j < mg) {
|
510 |
|
|
f->bb[b1].next[0] = n + j;
|
511 |
|
|
f->bb[b1].next[1] = -1;
|
512 |
|
|
f->bb[n + j].prev[0] = b1;
|
513 |
|
|
f->bb[n + j].prev[1] = -1;
|
514 |
|
|
} else {
|
515 |
|
|
i = f->bb[b1].next[0];
|
516 |
|
|
f->bb[n + j].prev[0] = j == 1 ? b : b1 - 1;
|
517 |
|
|
f->bb[n + j].prev[1] = -1;
|
518 |
|
|
if (i >= 0) {
|
519 |
|
|
if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
|
520 |
|
|
if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
|
521 |
|
|
}
|
522 |
|
|
i = f->bb[b1].next[1];
|
523 |
|
|
if (i >= 0) {
|
524 |
|
|
if (f->bb[i].prev[0] == b) f->bb[i].prev[0] = b1;
|
525 |
|
|
if (f->bb[i].prev[1] == b) f->bb[i].prev[1] = b1;
|
526 |
|
|
}
|
527 |
|
|
}
|
528 |
|
|
}
|
529 |
|
|
}
|
530 |
|
|
|
531 |
|
|
/* Latch outputs in loops */
|
532 |
|
|
void add_latches (cuc_func *f)
|
533 |
|
|
{
|
534 |
|
|
int b, i, j;
|
535 |
|
|
|
536 |
|
|
//print_cuc_bb (f, "ADD_LATCHES a");
|
537 |
|
|
/* Cuts the tree and marks registers */
|
538 |
|
|
mark_cut (f);
|
539 |
|
|
|
540 |
|
|
/* Split BBs with more than one group */
|
541 |
|
|
for (b = 0; b < f->num_bb; b++) expand_bb (f, b);
|
542 |
|
|
remove_nops (f);
|
543 |
|
|
//print_cuc_bb (f, "ADD_LATCHES 0");
|
544 |
|
|
|
545 |
|
|
/* Convert accesses in BB_INLOOP type block to latched */
|
546 |
|
|
for (b = 0; b < f->num_bb; b++) {
|
547 |
|
|
int j;
|
548 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++)
|
549 |
|
|
for (j = 0; j < MAX_OPERANDS; j++) if (f->bb[b].insn[i].opt[j] == OPT_REF) {
|
550 |
|
|
int t = f->bb[b].insn[i].op[j];
|
551 |
|
|
/* If we are pointing to a INLOOP block from outside, or forward
|
552 |
|
|
(= previous loop iteration) we must register that data */
|
553 |
|
|
if ((f->bb[REF_BB(t)].type & BB_INLOOP || no_multicycle)
|
554 |
|
|
&& !(f->INSN(t).type & (IT_BRANCH | IT_COND))
|
555 |
|
|
&& (REF_BB(t) != b || REF_I(t) >= i)) {
|
556 |
|
|
f->INSN(t).type |= IT_LATCHED;
|
557 |
|
|
}
|
558 |
|
|
}
|
559 |
|
|
}
|
560 |
|
|
//print_cuc_bb (f, "ADD_LATCHES 1");
|
561 |
|
|
|
562 |
|
|
/* Add latches at the end of blocks as needed */
|
563 |
|
|
for (b = 0; b < f->num_bb; b++) {
|
564 |
|
|
int nreg = 0;
|
565 |
|
|
cuc_insn *insn;
|
566 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++)
|
567 |
|
|
if (f->bb[b].insn[i].type & IT_LATCHED) nreg++;
|
568 |
|
|
if (nreg) {
|
569 |
|
|
insn = (cuc_insn *) malloc (sizeof (cuc_insn) * (f->bb[b].ninsn + nreg));
|
570 |
|
|
j = 0;
|
571 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++) {
|
572 |
|
|
insn[i] = f->bb[b].insn[i];
|
573 |
|
|
if (insn[i].type & IT_LATCHED) {
|
574 |
|
|
cuc_insn *ii = &insn[f->bb[b].ninsn + j++];
|
575 |
|
|
change_insn_type (ii, II_REG);
|
576 |
|
|
ii->op[0] = -1; ii->opt[0] = OPT_DEST | OPT_REGISTER;
|
577 |
|
|
ii->op[1] = REF (b, i); ii->opt[1] = OPT_REF;
|
578 |
|
|
ii->opt[2] = ii->opt[3] = OPT_NONE;
|
579 |
|
|
ii->dep = NULL;
|
580 |
|
|
ii->type = IT_VOLATILE;
|
581 |
|
|
sprintf (ii->disasm, "reg %i_%i", b, i);
|
582 |
|
|
}
|
583 |
|
|
}
|
584 |
|
|
f->bb[b].ninsn += nreg;
|
585 |
|
|
free (f->bb[b].insn);
|
586 |
|
|
f->bb[b].insn = insn;
|
587 |
|
|
}
|
588 |
|
|
}
|
589 |
|
|
//print_cuc_bb (f, "ADD_LATCHES 2");
|
590 |
|
|
|
591 |
|
|
/* Repair references */
|
592 |
|
|
for (b = 0; b < f->num_bb; b++)
|
593 |
|
|
for (i = 0; i < f->bb[b].ninsn; i++)
|
594 |
|
|
for (j = 0; j < MAX_OPERANDS; j++)
|
595 |
|
|
/* If destination instruction is latched, use register instead */
|
596 |
|
|
if (f->bb[b].insn[i].opt[j] == OPT_REF
|
597 |
|
|
&& f->INSN(f->bb[b].insn[i].op[j]).type & IT_LATCHED) {
|
598 |
|
|
int b1, i1;
|
599 |
|
|
b1 = REF_BB (f->bb[b].insn[i].op[j]);
|
600 |
883 |
markom |
//cucdebug (2, "%i.%i.%i %x\n", b, i, j, f->bb[b].insn[i].op[j]);
|
601 |
879 |
markom |
if (b1 != b || REF_I(f->bb[b].insn[i].op[j]) >= i) {
|
602 |
|
|
for (i1 = f->bb[b1].ninsn - 1; i1 >= 0; i1--) {
|
603 |
|
|
assert (f->bb[b1].insn[i1].index == II_REG);
|
604 |
|
|
if (f->bb[b1].insn[i1].op[1] == f->bb[b].insn[i].op[j]) {
|
605 |
|
|
f->bb[b].insn[i].op[j] = REF (b1, i1);
|
606 |
|
|
break;
|
607 |
|
|
}
|
608 |
|
|
}
|
609 |
|
|
}
|
610 |
|
|
}
|
611 |
|
|
}
|
612 |
|
|
|
613 |
|
|
cuc_timings *preunroll_bb (char *bb_filename, cuc_func *f, cuc_timings *timings, int b, int i, int j)
|
614 |
|
|
{
|
615 |
|
|
cuc_func *func;
|
616 |
883 |
markom |
cucdebug (2, "BB%i unroll %i times preroll %i times\n", b, j, i);
|
617 |
879 |
markom |
func = preunroll_loop (f, b, i, j, bb_filename);
|
618 |
883 |
markom |
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_PREUNROLL");
|
619 |
879 |
markom |
|
620 |
|
|
log ("Optimizing.\n");
|
621 |
|
|
optimize_tree (func);
|
622 |
883 |
markom |
if (cuc_debug >= 6) //print_cuc_bb (func, "AFTER_OPT_TREE1");
|
623 |
879 |
markom |
remove_nops (func);
|
624 |
883 |
markom |
if (cuc_debug >= 6) //print_cuc_bb (func, "NO_NOPS");
|
625 |
879 |
markom |
remove_dead (func);
|
626 |
883 |
markom |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD1");
|
627 |
879 |
markom |
optimize_bb (func);
|
628 |
883 |
markom |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_OPT_BB");
|
629 |
879 |
markom |
remove_dead_bb (func);
|
630 |
883 |
markom |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD_BB");
|
631 |
879 |
markom |
optimize_tree (func);
|
632 |
883 |
markom |
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_OPT_TREE");
|
633 |
|
|
log ("Common subexpression elimination.\n");
|
634 |
|
|
cse (func);
|
635 |
|
|
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_CSE");
|
636 |
879 |
markom |
remove_dead (func);
|
637 |
883 |
markom |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD");
|
638 |
879 |
markom |
remove_trivial_regs (func);
|
639 |
883 |
markom |
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL");
|
640 |
|
|
add_latches (func);
|
641 |
|
|
if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES");
|
642 |
|
|
set_io (func);
|
643 |
879 |
markom |
add_memory_dep (func, memory_order);
|
644 |
883 |
markom |
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP");
|
645 |
879 |
markom |
add_data_dep (func);
|
646 |
883 |
markom |
if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP");
|
647 |
879 |
markom |
schedule_memory (func, memory_order);
|
648 |
883 |
markom |
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM");
|
649 |
879 |
markom |
|
650 |
|
|
analyse_timings (func, timings);
|
651 |
883 |
markom |
cucdebug (2, "new_time = %i, old_time = %i, size = %f\n",
|
652 |
879 |
markom |
timings->new_time, func->orig_time, timings->size);
|
653 |
|
|
log ("new time = %icyc, old_time = %icyc, size = %.0f gates\n",
|
654 |
|
|
timings->new_time, func->orig_time, timings->size);
|
655 |
|
|
//output_verilog (func, argv[1]);
|
656 |
|
|
free_func (func);
|
657 |
|
|
timings->b = b;
|
658 |
|
|
timings->unroll = j;
|
659 |
|
|
timings->preroll = i;
|
660 |
883 |
markom |
timings->nshared = 0;
|
661 |
879 |
markom |
return timings;
|
662 |
|
|
}
|
663 |
|
|
|
664 |
|
|
int tim_comp (cuc_timings *a, cuc_timings *b)
|
665 |
|
|
{
|
666 |
|
|
if (a->new_time < b->new_time) return -1;
|
667 |
|
|
else if (a->new_time > b->new_time) return 1;
|
668 |
|
|
else return 0;
|
669 |
|
|
}
|
670 |
|
|
|
671 |
|
|
cuc_func *analyse_function (char *module_name, long orig_time,
|
672 |
|
|
unsigned long start_addr, unsigned long end_addr)
|
673 |
|
|
{
|
674 |
|
|
cuc_timings timings;
|
675 |
|
|
cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func));
|
676 |
|
|
cuc_func *saved;
|
677 |
|
|
int b, i, j;
|
678 |
|
|
char tmp1[256];
|
679 |
|
|
char tmp2[256];
|
680 |
|
|
|
681 |
|
|
func->orig_time = orig_time;
|
682 |
|
|
func->start_addr = start_addr;
|
683 |
|
|
func->end_addr = end_addr;
|
684 |
|
|
|
685 |
|
|
sprintf (tmp1, "%s.bin", module_name);
|
686 |
883 |
markom |
cucdebug (2, "Loading %s.bin\n", module_name);
|
687 |
879 |
markom |
cuc_load (tmp1);
|
688 |
|
|
|
689 |
|
|
log ("Detecting basic blocks\n");
|
690 |
|
|
detect_bb (func);
|
691 |
883 |
markom |
if (cuc_debug >= 2) print_cuc_insns ("WITH_BB_LIMITS", 0);
|
692 |
879 |
markom |
|
693 |
|
|
//sprintf (tmp1, "%s.bin.mp", module_name);
|
694 |
|
|
sprintf (tmp2, "%s.bin.bb", module_name);
|
695 |
|
|
generate_bb_seq (func, config.sim.mprof_fn, tmp2);
|
696 |
|
|
|
697 |
|
|
build_bb (func);
|
698 |
883 |
markom |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_BUILD_BB");
|
699 |
879 |
markom |
reg_dep (func);
|
700 |
|
|
|
701 |
|
|
log ("Detecting dependencies\n");
|
702 |
883 |
markom |
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_REG_DEP");
|
703 |
879 |
markom |
optimize_tree (func);
|
704 |
|
|
log ("Optimizing.\n");
|
705 |
883 |
markom |
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_OPT_TREE1");
|
706 |
879 |
markom |
remove_nops (func);
|
707 |
883 |
markom |
if (cuc_debug >= 6) print_cuc_bb (func, "NO_NOPS");
|
708 |
879 |
markom |
remove_dead (func);
|
709 |
883 |
markom |
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_DEAD1");
|
710 |
879 |
markom |
optimize_bb (func);
|
711 |
883 |
markom |
if (cuc_debug >= 6) print_cuc_bb (func, "AFTER_OPT_BB");
|
712 |
879 |
markom |
remove_dead_bb (func);
|
713 |
883 |
markom |
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_DEAD_BB");
|
714 |
879 |
markom |
optimize_tree (func);
|
715 |
883 |
markom |
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_OPT_TREE");
|
716 |
|
|
log ("Common subexpression elimination.\n");
|
717 |
|
|
cse (func);
|
718 |
|
|
if (cuc_debug >= 3) print_cuc_bb (func, "AFTER_CSE");
|
719 |
879 |
markom |
remove_dead (func);
|
720 |
883 |
markom |
if (cuc_debug >= 5) print_cuc_bb (func, "AFTER_DEAD");
|
721 |
879 |
markom |
remove_trivial_regs (func);
|
722 |
883 |
markom |
if (cuc_debug >= 2) print_cuc_bb (func, "AFTER_TRIVIAL");
|
723 |
879 |
markom |
|
724 |
883 |
markom |
csm (func);
|
725 |
879 |
markom |
assert (saved = dup_func (func));
|
726 |
883 |
markom |
|
727 |
|
|
timings.preroll = timings.unroll = 1;
|
728 |
|
|
timings.nshared = 0;
|
729 |
|
|
add_latches (func);
|
730 |
|
|
set_io (func);
|
731 |
|
|
|
732 |
|
|
if (cuc_debug >= 1) print_cuc_bb (func, "AFTER_LATCHES");
|
733 |
|
|
analyse_timings (func, &timings);
|
734 |
879 |
markom |
add_memory_dep (func, memory_order);
|
735 |
883 |
markom |
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_MEMORY_DEP");
|
736 |
879 |
markom |
add_data_dep (func);
|
737 |
883 |
markom |
if (cuc_debug >= 8) print_cuc_bb (func, "AFTER_DATA_DEP");
|
738 |
879 |
markom |
schedule_memory (func, memory_order);
|
739 |
883 |
markom |
if (cuc_debug >= 7) print_cuc_bb (func, "AFTER_SCHEDULE_MEM");
|
740 |
879 |
markom |
|
741 |
883 |
markom |
//output_verilog (func, module_name);
|
742 |
879 |
markom |
free_func (func);
|
743 |
883 |
markom |
log ("Base option: pre%i,un%i,sha%i: %icyc %.1f\n",
|
744 |
|
|
timings.preroll, timings.unroll, timings.nshared, timings.new_time, timings.size);
|
745 |
|
|
saved->timings = timings;
|
746 |
879 |
markom |
|
747 |
|
|
#if 1
|
748 |
|
|
/* detect and unroll simple loops */
|
749 |
|
|
for (b = 0; b < saved->num_bb; b++) {
|
750 |
|
|
cuc_timings t[MAX_UNROLL * MAX_PREROLL];
|
751 |
|
|
cuc_timings *ut;
|
752 |
|
|
cuc_timings *cut = &t[0];
|
753 |
|
|
int nt = 1;
|
754 |
|
|
double csize;
|
755 |
|
|
|
756 |
|
|
/* Is it a loop? */
|
757 |
|
|
if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b) continue;
|
758 |
|
|
t[0] = timings;
|
759 |
|
|
t[0].b = b;
|
760 |
|
|
t[0].preroll = 1;
|
761 |
|
|
t[0].unroll = 1;
|
762 |
883 |
markom |
t[0].nshared = 0;
|
763 |
879 |
markom |
|
764 |
|
|
sprintf (tmp1, "%s.bin.bb", module_name);
|
765 |
|
|
i = 1;
|
766 |
|
|
do {
|
767 |
|
|
cuc_timings *pt;
|
768 |
|
|
cuc_timings *cpt = cut;
|
769 |
|
|
j = 1;
|
770 |
|
|
|
771 |
|
|
do {
|
772 |
|
|
pt = cpt;
|
773 |
|
|
cpt = preunroll_bb (tmp1, saved, &t[nt++], b, ++j, i);
|
774 |
|
|
} while (j <= MAX_PREROLL && pt->new_time >= cpt->new_time);
|
775 |
|
|
i++;
|
776 |
|
|
ut = cut;
|
777 |
|
|
cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i);
|
778 |
|
|
} while (i <= MAX_UNROLL && ut->new_time >= cut->new_time);
|
779 |
|
|
|
780 |
|
|
/* Sort the timings */
|
781 |
883 |
markom |
#if 0
|
782 |
|
|
if (cuc_debug >= 3)
|
783 |
879 |
markom |
for (i = 0; i < nt; i++) printf ("%i:%i,%i: %icyc\n",
|
784 |
|
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time);
|
785 |
883 |
markom |
#endif
|
786 |
879 |
markom |
|
787 |
|
|
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
|
788 |
|
|
|
789 |
|
|
/* Delete timings, that have worst time and bigger size than other */
|
790 |
|
|
j = 1;
|
791 |
|
|
csize = t[0].size;
|
792 |
|
|
for (i = 1; i < nt; i++)
|
793 |
|
|
if (t[i].size < csize) t[j++] = t[i];
|
794 |
|
|
nt = j;
|
795 |
883 |
markom |
|
796 |
|
|
cucdebug (1, "Available options\n");
|
797 |
|
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
798 |
|
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
799 |
|
|
/* Add results from CSM */
|
800 |
|
|
j = nt;
|
801 |
|
|
for (i = 0; i < saved->bb[b].ntim; i++) {
|
802 |
|
|
int i1;
|
803 |
|
|
for (i1 = 0; i1 < nt; i1++) {
|
804 |
|
|
t[j] = t[i1];
|
805 |
|
|
t[j].size += saved->bb[b].tim[i].size - timings.size;
|
806 |
|
|
t[j].new_time += saved->bb[b].tim[i].new_time - timings.new_time;
|
807 |
|
|
t[j].nshared = saved->bb[b].tim[i].nshared;
|
808 |
|
|
t[j].shared = saved->bb[b].tim[i].shared;
|
809 |
|
|
if (++j >= MAX_UNROLL * MAX_PREROLL) goto full;
|
810 |
|
|
}
|
811 |
|
|
}
|
812 |
|
|
|
813 |
|
|
full:
|
814 |
|
|
nt = j;
|
815 |
879 |
markom |
|
816 |
883 |
markom |
cucdebug (1, "Available options:\n");
|
817 |
|
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
818 |
|
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
819 |
879 |
markom |
|
820 |
883 |
markom |
/* Sort again with new timings added */
|
821 |
|
|
qsort (t, nt, sizeof (cuc_timings), (int (*)(const void *, const void *))tim_comp);
|
822 |
|
|
|
823 |
|
|
/* Delete timings, that have worst time and bigger size than other */
|
824 |
|
|
j = 1;
|
825 |
|
|
csize = t[0].size;
|
826 |
|
|
for (i = 1; i < nt; i++)
|
827 |
|
|
if (t[i].size < csize) t[j++] = t[i];
|
828 |
|
|
nt = j;
|
829 |
|
|
|
830 |
|
|
cucdebug (1, "Available options:\n");
|
831 |
|
|
for (i = 0; i < nt; i++) cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
|
832 |
|
|
t[i].b, t[i].preroll, t[i].unroll, t[i].new_time, t[i].size);
|
833 |
|
|
|
834 |
|
|
if (saved->bb[b].ntim) free (saved->bb[b].tim);
|
835 |
879 |
markom |
saved->bb[b].ntim = nt;
|
836 |
|
|
assert (saved->bb[b].tim = (cuc_timings *) malloc (sizeof (cuc_timings) * nt));
|
837 |
|
|
|
838 |
|
|
/* Copy options in reverse order -- smallest first */
|
839 |
|
|
for (i = 0; i < nt; i++) saved->bb[b].tim[i] = t[nt - 1 - i];
|
840 |
883 |
markom |
|
841 |
|
|
log ("Available options:\n");
|
842 |
|
|
for (i = 0; i < saved->bb[b].ntim; i++) {
|
843 |
|
|
log ("%i:pre%i,un%i,sha%i: %icyc %.1f\n",
|
844 |
|
|
saved->bb[b].tim[i].b, saved->bb[b].tim[i].preroll, saved->bb[b].tim[i].unroll,
|
845 |
|
|
saved->bb[b].tim[i].nshared, saved->bb[b].tim[i].new_time, saved->bb[b].tim[i].size);
|
846 |
|
|
}
|
847 |
879 |
markom |
}
|
848 |
|
|
#endif
|
849 |
|
|
return saved;
|
850 |
|
|
}
|
851 |
|
|
|
852 |
883 |
markom |
void options_cmd (cuc_func *f, char *name)
|
853 |
|
|
{
|
854 |
|
|
int b, i;
|
855 |
|
|
printf ("%-12s :pre%i,un%i,sha%i: time = %i cyc; size = %.f gates (old time = %i)\n", name,
|
856 |
|
|
f->timings.preroll, f->timings.unroll, f->timings.nshared,
|
857 |
|
|
f->timings.new_time, f->timings.size, f->orig_time);
|
858 |
|
|
for (b = 0; b < f->num_bb; b++) {
|
859 |
|
|
/* Print out results */
|
860 |
|
|
for (i = 0; i < f->bb[b].ntim; i++) {
|
861 |
|
|
printf ("%-12sBB%-2i:pre%i,un%i,sha%i: time = %i cyc; size = %.f gates\n", name,
|
862 |
|
|
f->bb[b].tim[i].b, f->bb[b].tim[i].preroll, f->bb[b].tim[i].unroll,
|
863 |
|
|
f->bb[b].tim[i].nshared, f->bb[b].tim[i].new_time, f->bb[b].tim[i].size);
|
864 |
|
|
}
|
865 |
|
|
}
|
866 |
|
|
}
|
867 |
|
|
|
868 |
879 |
markom |
/* Dumps specified function to file (hex) */
|
869 |
|
|
unsigned long extract_function (char *out_fn, unsigned long start_addr)
|
870 |
|
|
{
|
871 |
|
|
FILE *fo;
|
872 |
|
|
unsigned long a = start_addr;
|
873 |
|
|
int x = 0;
|
874 |
|
|
assert (fo = fopen (out_fn, "wt+"));
|
875 |
|
|
|
876 |
|
|
do {
|
877 |
|
|
unsigned long d = evalsim_mem32 (a);
|
878 |
|
|
int index = insn_decode (d);
|
879 |
|
|
assert (index >= 0);
|
880 |
|
|
if (x) x++;
|
881 |
|
|
if (strcmp (insn_name (index), "l.jr") == 0) x = 1;
|
882 |
|
|
a += 4;
|
883 |
|
|
fprintf (fo, "%08x\n", d);
|
884 |
|
|
} while (x < 2);
|
885 |
|
|
|
886 |
|
|
fclose (fo);
|
887 |
|
|
return a - 4;
|
888 |
|
|
}
|
889 |
|
|
|
890 |
|
|
static cuc_func *func[MAX_FUNCS];
|
891 |
|
|
|
892 |
|
|
void main_cuc (char *filename)
|
893 |
|
|
{
|
894 |
883 |
markom |
int i, j;
|
895 |
879 |
markom |
char tmp1[256];
|
896 |
|
|
|
897 |
883 |
markom |
printf ("Entering OpenRISC Custom Unit Compiler command prompt\n");
|
898 |
|
|
printf ("Using profile file \"%s\" and memory profile file \"%s\".\n", config.sim.prof_fn, config.sim.mprof_fn);
|
899 |
879 |
markom |
sprintf (tmp1, "%s.log", filename);
|
900 |
883 |
markom |
printf ("Analyzing. (log file \"%s\").\n", tmp1);
|
901 |
879 |
markom |
assert (flog = fopen (tmp1, "wt+"));
|
902 |
|
|
|
903 |
|
|
/* Loads in the specified timings table */
|
904 |
|
|
load_timing_table ("virtex.tim");
|
905 |
|
|
|
906 |
|
|
prof_set (1, 0);
|
907 |
|
|
assert (prof_acquire (config.sim.prof_fn) == 0);
|
908 |
|
|
|
909 |
|
|
cycle_duration = 40.;
|
910 |
|
|
|
911 |
|
|
/* Try all functions except "total" */
|
912 |
|
|
for (i = 0; i < prof_nfuncs - 1; i++) {
|
913 |
|
|
long orig_time;
|
914 |
|
|
unsigned long start_addr, end_addr;
|
915 |
|
|
orig_time = prof_func[i].cum_cycles;
|
916 |
|
|
start_addr = prof_func[i].addr;
|
917 |
|
|
|
918 |
|
|
/* Extract the function from the binary */
|
919 |
|
|
sprintf (tmp1, "%s.bin", prof_func[i].name);
|
920 |
|
|
end_addr = extract_function (tmp1, start_addr);
|
921 |
|
|
|
922 |
|
|
log ("Testing function %s (%08x - %08x)\n", prof_func[i].name, start_addr, end_addr);
|
923 |
|
|
func[i] = analyse_function (prof_func[i].name, orig_time, start_addr, end_addr);
|
924 |
|
|
}
|
925 |
|
|
|
926 |
883 |
markom |
while (1) {
|
927 |
|
|
char *s;
|
928 |
|
|
printf ("(cuc) ");
|
929 |
|
|
fflush (stdout);
|
930 |
|
|
fgets(tmp1, sizeof tmp1, stdin);
|
931 |
|
|
for (s = tmp1; *s != '\0' && *s != '\n' && *s != '\r'; s++);
|
932 |
|
|
*s = '\0';
|
933 |
|
|
|
934 |
|
|
if (strcmp (tmp1, "q") == 0 || strcmp (tmp1, "quit") == 0) {
|
935 |
|
|
break;
|
936 |
|
|
} else if (strcmp (tmp1, "p") == 0 || strcmp (tmp1, "profile") == 0) {
|
937 |
|
|
printf ("----------------------------------------------------------------------------\n");
|
938 |
|
|
printf ("|function name |addr |# calls |avg cycles | old% | impr. f. |\n");
|
939 |
|
|
printf ("|-------------------------+--------+--------+------------+------+----------|\n");
|
940 |
|
|
for (j = 0; j < prof_nfuncs; j++) {
|
941 |
|
|
int bestcyc = 0, besti = 0;
|
942 |
|
|
for (i = 0; i < prof_nfuncs; i++)
|
943 |
|
|
if (prof_func[i].cum_cycles > bestcyc) {
|
944 |
|
|
bestcyc = prof_func[i].cum_cycles;
|
945 |
|
|
besti = i;
|
946 |
|
|
}
|
947 |
|
|
i = besti;
|
948 |
|
|
printf ("| %-24s|%08X|%8i|%12.1f| %3.0f%% |",
|
949 |
|
|
prof_func[i].name, prof_func[i].addr, prof_func[i].calls,
|
950 |
|
|
((double)prof_func[i].cum_cycles / prof_func[i].calls),
|
951 |
|
|
(100. * prof_func[i].cum_cycles / prof_cycles));
|
952 |
|
|
if (func[i]) {
|
953 |
|
|
printf ("%9.2f |\n", 1.f * prof_func[i].cum_cycles / func[i]->timings.new_time);
|
954 |
|
|
} else printf (" N/A |\n");
|
955 |
|
|
prof_func[i].cum_cycles = -1;
|
956 |
|
|
}
|
957 |
|
|
printf ("----------------------------------------------------------------------------\n");
|
958 |
|
|
printf ("Total %i functions, %i cycles.\n", prof_nfuncs, prof_cycles);
|
959 |
|
|
} else if (strncmp (tmp1, "d", 1) == 0 || strncmp (tmp1, "debug", 5) == 0) {
|
960 |
|
|
sscanf (tmp1, "%*s %i", &cuc_debug);
|
961 |
|
|
if (cuc_debug < 0) cuc_debug = 0;
|
962 |
|
|
if (cuc_debug > 9) cuc_debug = 9;
|
963 |
|
|
} else if (strcmp (tmp1, "g") == 0 || strcmp (tmp1, "generate") == 0) {
|
964 |
|
|
for (i = 0; i < prof_nfuncs; i++);
|
965 |
|
|
} else if (strcmp (tmp1, "o") == 0 || strcmp (tmp1, "options") == 0) {
|
966 |
|
|
printf ("Available options:\n");
|
967 |
|
|
for (i = 0; i < prof_nfuncs; i++)
|
968 |
|
|
if (func[i]) options_cmd (func[i], prof_func[i].name);
|
969 |
|
|
} else {
|
970 |
|
|
if (strcmp (tmp1, "h") != 0 && strcmp (tmp1, "help") != 0)
|
971 |
|
|
printf ("Unknown command.\n");
|
972 |
|
|
printf ("OpenRISC Custom Unit Compiler command prompt\n");
|
973 |
|
|
printf ("h|help displays this help\n");
|
974 |
|
|
printf ("q|quit returns to or1ksim prompt\n");
|
975 |
|
|
printf ("p|profile displays function profiling\n");
|
976 |
|
|
printf ("d|debug # sets debug level (0-9)\n");
|
977 |
|
|
printf ("o|options displays available options\n");
|
978 |
|
|
printf ("g|generate generates verilog file\n");
|
979 |
|
|
}
|
980 |
|
|
}
|
981 |
|
|
|
982 |
879 |
markom |
/* Dispose memory */
|
983 |
|
|
for (i = 0; i < prof_nfuncs -1; i++)
|
984 |
|
|
if (func[i]) free_func (func[i]);
|
985 |
|
|
|
986 |
|
|
fclose (flog);
|
987 |
|
|
}
|
988 |
|
|
|