OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [tilepro/] [gen-mul-tables.cc] - Blame information for rev 709

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 709 jeremybenn
/* Multiply table generator for tile.
2
   Copyright (C) 2011, 2012
3
   Free Software Foundation, Inc.
4
   Contributed by Walter Lee (walt@tilera.com)
5
 
6
   This file is part of GCC.
7
 
8
   GCC is free software; you can redistribute it and/or modify it
9
   under the terms of the GNU General Public License as published
10
   by the Free Software Foundation; either version 3, or (at your
11
   option) any later version.
12
 
13
   GCC is distributed in the hope that it will be useful, but WITHOUT
14
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16
   License for more details.
17
 
18
   You should have received a copy of the GNU General Public License
19
   along with GCC; see the file COPYING3.  If not see
20
   <http://www.gnu.org/licenses/>.  */
21
 
22
/* This program creates a table used to compile multiply by constant
23
   efficiently.
24
 
25
   This program should be compiled by a c++ compiler.  If it's
26
   compiled with with -DTILEPRO, it generates the multiply table for
27
   TILEPro; otherwise it generates the multiply table for TILE-Gx.
28
   Running the program produces the table in stdout.
29
 
30
   The program works by generating every possible combination of up to
31
   MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left
32
   shift) and computing the multiplier computed by those instructions.
33
   For example,
34
 
35
   s2a r2,r1,r1
36
   s2a r3,r2,r2
37
 
38
   multiplies r1 by 25.
39
 
40
   There are usually multiple instruction sequences to multiply by a
41
   given constant. This program keeps only the cheapest one.
42
   "Cheapest" is defined first by the minimum theoretical schedule
43
   length, and if those are equal then by the number of instructions,
44
   and if those are equal then by which instructions we "prefer"
45
   (e.g. because we think one might take infinitesimally less power
46
   than another, or simply because we arbitrarily pick one to be more
47
   canonical).
48
 
49
   Once this program has determined the best instruction sequence for
50
   each multiplier, it emits them in a table sorted by the multiplier
51
   so the user can binary-search it to look for a match.  The table is
52
   pruned based on various criteria to keep its sizes reasonable.  */
53
 
54
#include <stdio.h>
55
#include <stdlib.h>
56
#include <assert.h>
57
#include <string.h>
58
#define __STDC_LIMIT_MACROS
59
#include <stdint.h>
60
 
61
#include <map>
62
 
63
#ifdef TILEPRO
64
 
65
/* The string representing the architecture.  */
66
#define ARCH "tilepro"
67
 
68
/* The type for the multiplication.  */
69
typedef int MUL_TYPE;
70
 
71
#else
72
 
73
/* The string representing the architecture.  */
74
#define ARCH "tilegx"
75
 
76
/* The type for the multiplication.  */
77
typedef long long MUL_TYPE;
78
 
79
#endif
80
 
81
/* Longest instruction sequence this will produce. With the current
82
   stupid algorithm runtime grows very quickly with this number.  */
83
#define MAX_INSTRUCTIONS 4
84
 
85
/* Maximum number of subexpressions in the expression DAG being
86
   generated.  This is the same as the number of instructions, except
87
   that zero and the original register we'd like to multiply by a
88
   constant are also thrown into the mix.  */
89
#define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS)
90
 
91
#define MIN(x, y)  ((x) <= (y) ? (x) : (y))
92
#define MAX(x, y)  ((x) >= (y) ? (x) : (y))
93
 
94
/* For this program a unary op is one which has only one nonconstant
95
   operand.  So shift left by 5 is considered unary.  */
96
typedef MUL_TYPE (*unary_op_func) (MUL_TYPE);
97
typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE);
98
 
99
/* This describes an operation like 'add two registers' or 'left-shift
100
   by 7'.
101
 
102
   We call something a 'unary' operator if it only takes in one
103
   register as input, even though it might have an implicit second
104
   constant operand.  Currently this is used for left-shift by
105
   constant.  */
106
class Operator
107
{
108
public:
109
  /* Construct for a binary operator.  */
110
  Operator (const char *pattern, const char *name, binary_op_func func,
111
            int cost)
112
    : m_pattern (pattern), m_name (name), m_top_index (-1),
113
      m_unary_func (0), m_binary_func (func), m_cost (cost),
114
      m_rhs_if_unary (0)
115
  {
116
  }
117
 
118
  /* Construct for a unary operator.  */
119
  Operator (const char *pattern, const char *name, unary_op_func func,
120
            int rhs_if_unary, int cost)
121
    : m_pattern (pattern), m_name (name), m_top_index (-1),
122
      m_unary_func (func), m_binary_func (0), m_cost (cost),
123
      m_rhs_if_unary (rhs_if_unary)
124
  {
125
  }
126
 
127
  bool is_unary () const
128
  {
129
    return m_binary_func == NULL;
130
  }
131
 
132
  /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3.  */
133
  const char *m_pattern;
134
 
135
  /* Name of the opcode for this operation, e.g. add.  */
136
  const char *m_name;
137
 
138
  /* We don't have enough bits in our output representation to store
139
     the original insn_code value, so we store a compressed form
140
     instead.  These values are decoded back into insn_code via the
141
     machine-generated multiply_insn_seq_decode_opcode lookup
142
     table.  */
143
  int m_top_index;
144
 
145
  /* Unary operator to apply, or NULL if this is a binary operator.  */
146
  unary_op_func m_unary_func;
147
 
148
  /* Binary operator to apply, or NULL if this is a unary operator.  */
149
  binary_op_func m_binary_func;
150
 
151
  /* Function of how expensive we consider this operator. Higher is
152
     worse.  */
153
  int m_cost;
154
 
155
  /* the RHS value to write into the C file if unary; used for shift
156
     count.  */
157
  int m_rhs_if_unary;
158
};
159
 
160
 
161
/* An entry in an expression DAG.  */
162
class Expr
163
{
164
public:
165
  Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0),
166
    m_critical_path_length (0)
167
  {
168
  }
169
 
170
  /* The operator being applied to the operands.  */
171
  const Operator *m_op;
172
 
173
  /* The index of the left-hand operand in the array of subexpressions
174
     already computed.  */
175
  int m_lhs;
176
 
177
  /* For binary ops ,this is the index of the left-hand operand in the
178
     array of subexpressions already computed. For unary ops, it is
179
     specific to the op (e.g. it might hold a constant shift
180
     count).  */
181
  int m_rhs;
182
 
183
  /* The multiplier produced by this expression tree. For example, for
184
     the tree ((x << 5) + x), the value would be 33.  */
185
  MUL_TYPE m_produced_val;
186
 
187
  /* How far is this expression from the root, i.e. how many cycles
188
     minimum will it take to compute this?  */
189
  int m_critical_path_length;
190
};
191
 
192
 
193
/* Make function pointers for the various linear operators we can
194
   apply to compute a multiplicative value.  */
195
 
196
static MUL_TYPE
197
add (MUL_TYPE n1, MUL_TYPE n2)
198
{
199
  return n1 + n2;
200
}
201
 
202
static MUL_TYPE
203
sub (MUL_TYPE n1, MUL_TYPE n2)
204
{
205
  return n1 - n2;
206
}
207
 
208
static MUL_TYPE
209
s1a (MUL_TYPE n1, MUL_TYPE n2)
210
{
211
  return n1 * 2 + n2;
212
}
213
 
214
static MUL_TYPE
215
s2a (MUL_TYPE n1, MUL_TYPE n2)
216
{
217
  return n1 * 4 + n2;
218
}
219
 
220
static MUL_TYPE
221
s3a (MUL_TYPE n1, MUL_TYPE n2)
222
{
223
  return n1 * 8 + n2;
224
}
225
 
226
#define SHIFT(count)                            \
227
static MUL_TYPE                                 \
228
shift##count(MUL_TYPE n)                        \
229
{                                               \
230
  return n << (count);                          \
231
}
232
 
233
SHIFT (1);
234
SHIFT (2);
235
SHIFT (3);
236
SHIFT (4);
237
SHIFT (5);
238
SHIFT (6);
239
SHIFT (7);
240
SHIFT (8);
241
SHIFT (9);
242
SHIFT (10);
243
SHIFT (11);
244
SHIFT (12);
245
SHIFT (13);
246
SHIFT (14);
247
SHIFT (15);
248
SHIFT (16);
249
SHIFT (17);
250
SHIFT (18);
251
SHIFT (19);
252
SHIFT (20);
253
SHIFT (21);
254
SHIFT (22);
255
SHIFT (23);
256
SHIFT (24);
257
SHIFT (25);
258
SHIFT (26);
259
SHIFT (27);
260
SHIFT (28);
261
SHIFT (29);
262
SHIFT (30);
263
SHIFT (31);
264
#ifndef TILEPRO
265
SHIFT (32);
266
SHIFT (33);
267
SHIFT (34);
268
SHIFT (35);
269
SHIFT (36);
270
SHIFT (37);
271
SHIFT (38);
272
SHIFT (39);
273
SHIFT (40);
274
SHIFT (41);
275
SHIFT (42);
276
SHIFT (43);
277
SHIFT (44);
278
SHIFT (45);
279
SHIFT (46);
280
SHIFT (47);
281
SHIFT (48);
282
SHIFT (49);
283
SHIFT (50);
284
SHIFT (51);
285
SHIFT (52);
286
SHIFT (53);
287
SHIFT (54);
288
SHIFT (55);
289
SHIFT (56);
290
SHIFT (57);
291
SHIFT (58);
292
SHIFT (59);
293
SHIFT (60);
294
SHIFT (61);
295
SHIFT (62);
296
SHIFT (63);
297
#endif
298
 
299
#ifdef TILEPRO
300
static Operator ops[] = {
301
  Operator ("CODE_FOR_addsi3", "add", add, 1040),
302
  Operator ("CODE_FOR_subsi3", "sub", sub, 1041),
303
  Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042),
304
  Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043),
305
  Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044),
306
  /* Note: shl by 1 is not necessary, since adding a value to itself
307
     produces the same result. But the architecture team thinks
308
     left-shift may use slightly less power, so we might as well
309
     prefer it.  */
310
  Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001),
311
  Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002),
312
  Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003),
313
  Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004),
314
  Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005),
315
  Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006),
316
  Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007),
317
  Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008),
318
  Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009),
319
  Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010),
320
  Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011),
321
  Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012),
322
  Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013),
323
  Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014),
324
  Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015),
325
  Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016),
326
  Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017),
327
  Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018),
328
  Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019),
329
  Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020),
330
  Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021),
331
  Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022),
332
  Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023),
333
  Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024),
334
  Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025),
335
  Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026),
336
  Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027),
337
  Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028),
338
  Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029),
339
  Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030),
340
  Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031)
341
};
342
#else
343
static Operator ops[] = {
344
  Operator ("CODE_FOR_adddi3", "add", add, 1070),
345
  Operator ("CODE_FOR_subdi3", "sub", sub, 1071),
346
  Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072),
347
  Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073),
348
  Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074),
349
  // Note: shl by 1 is not necessary, since adding a value to itself
350
  // produces the same result. But the architecture team thinks left-shift
351
  // may use slightly less power, so we might as well prefer it.
352
  Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001),
353
  Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002),
354
  Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003),
355
  Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004),
356
  Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005),
357
  Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006),
358
  Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007),
359
  Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008),
360
  Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009),
361
  Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010),
362
  Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011),
363
  Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012),
364
  Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013),
365
  Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014),
366
  Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015),
367
  Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016),
368
  Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017),
369
  Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018),
370
  Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019),
371
  Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020),
372
  Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021),
373
  Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022),
374
  Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023),
375
  Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024),
376
  Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025),
377
  Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026),
378
  Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027),
379
  Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028),
380
  Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029),
381
  Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030),
382
  Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031),
383
  Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032),
384
  Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033),
385
  Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034),
386
  Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035),
387
  Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036),
388
  Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037),
389
  Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038),
390
  Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039),
391
  Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040),
392
  Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041),
393
  Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042),
394
  Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043),
395
  Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044),
396
  Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045),
397
  Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046),
398
  Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047),
399
  Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048),
400
  Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049),
401
  Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050),
402
  Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051),
403
  Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052),
404
  Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053),
405
  Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054),
406
  Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055),
407
  Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056),
408
  Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057),
409
  Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058),
410
  Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059),
411
  Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060),
412
  Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061),
413
  Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062),
414
  Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063)
415
};
416
#endif
417
 
418
/* An ExpressionTree is an expression DAG.  */
419
class ExpressionTree
420
{
421
public:
422
  ExpressionTree () : m_num_vals (2)
423
  {
424
    m_exprs[0].m_produced_val = 0;
425
    m_exprs[1].m_produced_val = 1;
426
  }
427
 
428
  void copy_technique_from (ExpressionTree * other)
429
  {
430
    /* TODO: write this; other can compute the same products with less
431
       cost.  We update this ExpressionTree object because some int is
432
       already mapped to it.  */
433
  }
434
 
435
  int m_num_vals;
436
  Expr m_exprs[MAX_SUBEXPRS];
437
 
438
  int cost () const
439
  {
440
    int cost = 0;
441
    for (int j = 2; j < m_num_vals; j++)
442
        cost += m_exprs[j].m_op->m_cost;
443
      return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000;
444
  }
445
};
446
 
447
 
448
typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap;
449
 
450
 
451
static void
452
find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution)
453
{
454
  /* Don't look more if we can't add any new values to the expression
455
     tree.  */
456
  const int num_vals = s.m_num_vals;
457
  if (num_vals == MAX_SUBEXPRS)
458
    return;
459
 
460
  /* Grow array to make room for new values added as we loop.  */
461
  s.m_num_vals = num_vals + 1;
462
 
463
  const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op;
464
  const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1;
465
 
466
  for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++)
467
    {
468
      const Operator *const op = &ops[f];
469
 
470
      for (int i = 0; i < num_vals; i++)
471
        {
472
          /* Only allow zero as the first operand to sub, otherwise
473
             it is useless.  */
474
          if (i == 0 && op->m_binary_func != sub)
475
            continue;
476
 
477
          /* Unary ops don't actually use the second operand, so as a
478
             big hack we trick it into only looping once in the inner
479
             loop.  */
480
          const int j_end = op->is_unary () ? 2 : num_vals;
481
 
482
          /* We never allow zero as the second operand, as it is
483
             always useless.  */
484
          for (int j = 1; j < j_end; j++)
485
            {
486
              /* Does this expression use the immediately previous
487
                 expression?  */
488
              const bool uses_prev_value =
489
                (i == num_vals - 1
490
                 || (!op->is_unary () && j == num_vals - 1));
491
 
492
              if (!uses_prev_value)
493
                {
494
                  /* For search efficiency, prune redundant
495
                     instruction orderings.
496
 
497
                     This op does not take the immediately preceding
498
                     value as input, which means we could have done it
499
                     in the previous slot. If its opcode is less than
500
                     the previous instruction's, we'll declare this
501
                     instruction order non-canonical and not pursue
502
                     this branch of the search.  */
503
                  if (op->m_top_index < prev_top_index)
504
                    continue;
505
                }
506
 
507
              MUL_TYPE n;
508
              if (op->is_unary ())
509
                {
510
                  n = op->m_unary_func (s.m_exprs[i].m_produced_val);
511
                }
512
              else
513
                {
514
                  n = op->m_binary_func (s.m_exprs[i].m_produced_val,
515
                                         s.m_exprs[j].m_produced_val);
516
                }
517
 
518
              bool duplicate = false;
519
              for (int k = 0; k < num_vals; k++)
520
                if (n == s.m_exprs[j].m_produced_val)
521
                  {
522
                    duplicate = true;
523
                    break;
524
                  }
525
 
526
              if (duplicate)
527
                continue;
528
 
529
              /* See if we found the best solution for n.  */
530
              Expr *e = &s.m_exprs[num_vals];
531
              e->m_op = op;
532
              e->m_lhs = i;
533
              e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j;
534
              e->m_produced_val = n;
535
              e->m_critical_path_length =
536
                1 + MAX (s.m_exprs[i].m_critical_path_length,
537
                         s.m_exprs[j].m_critical_path_length);
538
 
539
              ExpressionTreeMap::iterator best (best_solution.find (n));
540
              if (best == best_solution.end ()
541
                  || (*best).second->cost () > s.cost ())
542
                best_solution[n] = new ExpressionTree (s);
543
 
544
              /* Recurse and find more.  */
545
              find_sequences (s, best_solution);
546
            }
547
        }
548
    }
549
 
550
  /* Restore old size.  */
551
  s.m_num_vals = num_vals;
552
}
553
 
554
 
555
/* For each insn_code used by an operator, choose a compact number so
556
   it takes less space in the output data structure. This prints out a
557
   lookup table used to map the compactified number back to an
558
   insn_code.  */
559
static void
560
create_insn_code_compression_table ()
561
{
562
  int next_index = 1;
563
 
564
  /* Entry 0 must hold CODE_FOR_nothing to mean "end of array".  */
565
  printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n"
566
          "  CODE_FOR_nothing /* must be first */ ", ARCH);
567
 
568
  for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++)
569
    {
570
      Operator *op = &ops[i];
571
      int index = -1;
572
 
573
      /* See if some previous Operator was using the same insn_code.
574
         If so, reuse its table entry.  */
575
      for (size_t j = 0; j < i; j++)
576
        {
577
          Operator *old = &ops[j];
578
          if (strcmp (old->m_pattern, op->m_pattern) == 0)
579
            {
580
              index = old->m_top_index;
581
              break;
582
            }
583
        }
584
 
585
      if (index == -1)
586
        {
587
          /* We need to make a new entry in the table.  */
588
          printf (",\n  %s", op->m_pattern);
589
          index = next_index++;
590
        }
591
 
592
      op->m_top_index = index;
593
    }
594
 
595
  printf ("\n};\n\n");
596
}
597
 
598
 
599
/* These are constants we've seen in code, that we want to keep table
600
   entries for.  */
601
static int multiply_constants_used[] = {
602
  -11796480,
603
  -27439,
604
  -25148,
605
  -22820,
606
  -21709,
607
  -20995,
608
  -20284,
609
  -20239,
610
  -19595,
611
  -19447,
612
  -19183,
613
  -19165,
614
  -18730,
615
  -17828,
616
  -17799,
617
  -17237,
618
  -17036,
619
  -16549,
620
  -16423,
621
  -16294,
622
  -16244,
623
  -16069,
624
  -15137,
625
  -15083,
626
  -15038,
627
  -14924,
628
  -14905,
629
  -14752,
630
  -14731,
631
  -14529,
632
  -14273,
633
  -14090,
634
  -14084,
635
  -14043,
636
  -13850,
637
  -13802,
638
  -13631,
639
  -13455,
640
  -13275,
641
  -12879,
642
  -12700,
643
  -12534,
644
  -12399,
645
  -12131,
646
  -12112,
647
  -12054,
648
  -12019,
649
  -11759,
650
  -11585,
651
  -11467,
652
  -11395,
653
  -11295,
654
  -11248,
655
  -11148,
656
  -11116,
657
  -11086,
658
  -11059,
659
  -11018,
660
  -10811,
661
  -10538,
662
  -10258,
663
  -10217,
664
  -10033,
665
  -9766,
666
  -9754,
667
  -9534,
668
  -9527,
669
  -9467,
670
  -9262,
671
  -9232,
672
  -9222,
673
  -9198,
674
  -9191,
675
  -9113,
676
  -8825,
677
  -8756,
678
  -8697,
679
  -8693,
680
  -8565,
681
  -8342,
682
  -8208,
683
  -8200,
684
  -8170,
685
  -8102,
686
  -7770,
687
  -7678,
688
  -7562,
689
  -7376,
690
  -7373,
691
  -7221,
692
  -7121,
693
  -6835,
694
  -6810,
695
  -6626,
696
  -6581,
697
  -6461,
698
  -6278,
699
  -6263,
700
  -6163,
701
  -6029,
702
  -5816,
703
  -5540,
704
  -5461,
705
  -5384,
706
  -5329,
707
  -4985,
708
  -4926,
709
  -4815,
710
  -4788,
711
  -4758,
712
  -4433,
713
  -4229,
714
  -4209,
715
  -4176,
716
  -4104,
717
  -4095,
718
  -4078,
719
  -3941,
720
  -3818,
721
  -3600,
722
  -3474,
723
  -3314,
724
  -3264,
725
  -3196,
726
  -3072,
727
  -2912,
728
  -2836,
729
  -2773,
730
  -2269,
731
  -2184,
732
  -2100,
733
  -1730,
734
  -1512,
735
  -1500,
736
  -1396,
737
  -1344,
738
  -1312,
739
  -1297,
740
  -1059,
741
  -1058,
742
  1027,
743
  1049,
744
  1059,
745
  1100,
746
  1104,
747
  1108,
748
  1136,
749
  1200,
750
  1204,
751
  1242,
752
  1292,
753
  1304,
754
  1312,
755
  1320,
756
  1336,
757
  1344,
758
  1348,
759
  1360,
760
  1364,
761
  1395,
762
  1448,
763
  1460,
764
  1461,
765
  1472,
766
  1488,
767
  1500,
768
  1512,
769
  1568,
770
  1576,
771
  1649,
772
  1664,
773
  1684,
774
  1696,
775
  1744,
776
  1812,
777
  1822,
778
  1884,
779
  1963,
780
  1978,
781
  2000,
782
  2012,
783
  2014,
784
  2037,
785
  2039,
786
  2100,
787
  2139,
788
  2144,
789
  2184,
790
  2237,
791
  2260,
792
  2320,
793
  2408,
794
  2446,
795
  2447,
796
  2499,
797
  2531,
798
  2578,
799
  2592,
800
  2611,
801
  2633,
802
  2704,
803
  2730,
804
  2773,
805
  2880,
806
  2896,
807
  2998,
808
  3000,
809
  3001,
810
  3021,
811
  3079,
812
  3112,
813
  3150,
814
  3179,
815
  3192,
816
  3240,
817
  3264,
818
  3271,
819
  3283,
820
  3328,
821
  3363,
822
  3367,
823
  3453,
824
  3529,
825
  3570,
826
  3580,
827
  3600,
828
  3624,
829
  3707,
830
  3783,
831
  3826,
832
  3897,
833
  3941,
834
  3962,
835
  3989,
836
  4000,
837
  4025,
838
  4073,
839
  4093,
840
  4099,
841
  4108,
842
  4184,
843
  4209,
844
  4369,
845
  4376,
846
  4416,
847
  4433,
848
  4434,
849
  4482,
850
  4582,
851
  4712,
852
  4717,
853
  4813,
854
  4815,
855
  4864,
856
  5000,
857
  5027,
858
  5040,
859
  5091,
860
  5195,
861
  5243,
862
  5260,
863
  5285,
864
  5329,
865
  5331,
866
  5350,
867
  5361,
868
  5387,
869
  5461,
870
  5492,
871
  5529,
872
  5573,
873
  5793,
874
  5819,
875
  5915,
876
  5946,
877
  5992,
878
  6000,
879
  6164,
880
  6205,
881
  6262,
882
  6263,
883
  6269,
884
  6270,
885
  6387,
886
  6400,
887
  6406,
888
  6476,
889
  6541,
890
  6565,
891
  6568,
892
  6626,
893
  6656,
894
  6732,
895
  6810,
896
  6817,
897
  6859,
898
  7040,
899
  7053,
900
  7141,
901
  7169,
902
  7221,
903
  7223,
904
  7274,
905
  7282,
906
  7350,
907
  7369,
908
  7373,
909
  7442,
910
  7447,
911
  7471,
912
  7518,
913
  7542,
914
  7566,
915
  7587,
916
  7663,
917
  7678,
918
  7682,
919
  7748,
920
  7752,
921
  7791,
922
  8000,
923
  8026,
924
  8048,
925
  8170,
926
  8203,
927
  8204,
928
  8290,
929
  8368,
930
  8520,
931
  8640,
932
  8666,
933
  8672,
934
  8697,
935
  8716,
936
  8728,
937
  8756,
938
  8820,
939
  8875,
940
  8918,
941
  8956,
942
  9058,
943
  9154,
944
  9175,
945
  9191,
946
  9217,
947
  9262,
948
  9321,
949
  9373,
950
  9434,
951
  9465,
952
  9514,
953
  9534,
954
  9633,
955
  9746,
956
  9810,
957
  9850,
958
  9947,
959
  9973,
960
  10000,
961
  10009,
962
  10033,
963
  10055,
964
  10217,
965
  10248,
966
  10298,
967
  10310,
968
  10323,
969
  10368,
970
  10438,
971
  10456,
972
  10486,
973
  10538,
974
  10664,
975
  10695,
976
  10700,
977
  10703,
978
  10832,
979
  10887,
980
  10935,
981
  10958,
982
  11018,
983
  11059,
984
  11061,
985
  11086,
986
  11116,
987
  11148,
988
  11190,
989
  11249,
990
  11314,
991
  11332,
992
  11363,
993
  11409,
994
  11415,
995
  11443,
996
  11467,
997
  11512,
998
  11522,
999
  11529,
1000
  11585,
1001
  11759,
1002
  11768,
1003
  11795,
1004
  11893,
1005
  11997,
1006
  12131,
1007
  12299,
1008
  12536,
1009
  12543,
1010
  12893,
1011
  12945,
1012
  12998,
1013
  13109,
1014
  13213,
1015
  13685,
1016
  13930,
1017
  14023,
1018
  14024,
1019
  14271,
1020
  14564,
1021
  14647,
1022
  15326,
1023
  15850,
1024
  15855,
1025
  15929,
1026
  16000,
1027
  16154,
1028
  16496,
1029
  16807,
1030
  16819,
1031
  16984,
1032
  17203,
1033
  17223,
1034
  17333,
1035
  17760,
1036
  17799,
1037
  17837,
1038
  18029,
1039
  18068,
1040
  18336,
1041
  18515,
1042
  19595,
1043
  20017,
1044
  20131,
1045
  20862,
1046
  20995,
1047
  21709,
1048
  22554,
1049
  25000,
1050
  25172,
1051
  25600,
1052
  25733,
1053
  27439,
1054
  38470,
1055
  46802,
1056
  50000,
1057
  11796480,
1058
  16843009,
1059
  23592960,
1060
};
1061
 
1062
 
1063
const int num_mult_constants_used =
1064
  (int)(sizeof multiply_constants_used
1065
        / sizeof multiply_constants_used[0]);
1066
 
1067
 
1068
#define XSIZE (sizeof multiply_constants_used / \
1069
               sizeof multiply_constants_used[0] + 32) / 32
1070
unsigned multiply_constants_avail[XSIZE];
1071
#undef XSIZE
1072
 
1073
 
1074
/* bsearch helper function.  */
1075
static int
1076
compare_constants (const void *key, const void *t)
1077
{
1078
  return (*(int*)key) - *((int*)t);
1079
}
1080
 
1081
 
1082
static int *
1083
find_mult_constants_used (int multiplier)
1084
{
1085
  return (int *) bsearch (&multiplier, multiply_constants_used,
1086
                          num_mult_constants_used,
1087
                          sizeof multiply_constants_used[0],
1088
                          compare_constants);
1089
}
1090
 
1091
 
1092
int num_ops (ExpressionTree *s)
1093
{
1094
  int n = 0;
1095
  for (int i = 0; i < s->m_num_vals; i++)
1096
    {
1097
      Expr *e = &s->m_exprs[i];
1098
      if (e->m_op != NULL)
1099
        n++;
1100
    }
1101
  return n;
1102
}
1103
 
1104
 
1105
#ifdef TILEPRO
1106
bool
1107
tilepro_emit (int multiplier, int num_ops)
1108
{
1109
  int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier;
1110
 
1111
  /* Keep constants in range [-1024, 1024].  */
1112
  if (abs_multiplier <= 1024)
1113
    return true;
1114
 
1115
  /* Keep constants near powers of two.  */
1116
  int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier));
1117
  int next_pow2 = prev_pow2 << 1;
1118
 
1119
  if ((abs_multiplier - prev_pow2 <= 10)
1120
      || (next_pow2 - abs_multiplier <= 10))
1121
    return true;
1122
 
1123
  /* Keep constants near powers of ten.  */
1124
  {
1125
    long long j = 1;
1126
    long long prev_pow10;
1127
    long long next_pow10;
1128
 
1129
    while (((j * 10) < abs_multiplier)
1130
           && (j < (j * 10)))
1131
      j = j * 10;
1132
 
1133
    prev_pow10 = j;
1134
    next_pow10 = j * 10;
1135
 
1136
    if ((abs_multiplier - prev_pow10 <= 10)
1137
        || (next_pow10 - abs_multiplier <= 10))
1138
      return true;
1139
  }
1140
 
1141
  /* Keep short sequences that have two or fewer ops.  */
1142
  if (num_ops <= 2)
1143
    return true;
1144
 
1145
  /* Keep constants that are mostly 0's or mostly 1's.  */
1146
  if (__builtin_popcount (multiplier) <= 2 ||
1147
      __builtin_popcount (multiplier) >= 30)
1148
    return true;
1149
 
1150
  /* Keep constants seen in actual code.  */
1151
  if ((find_mult_constants_used (multiplier)))
1152
    return true;
1153
 
1154
  return false;
1155
}
1156
#else
1157
bool
1158
tilegx_emit (long long multiplier, int num_ops)
1159
{
1160
  long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier;
1161
 
1162
  /* Keep constants in range [-1024, 1024].  */
1163
  if (abs_multiplier <= 1024)
1164
    return true;
1165
 
1166
  /* Otherwise exclude sequences with four ops.  */
1167
  if (num_ops > 3)
1168
    return false;
1169
 
1170
  /* Keep constants near powers of two.  */
1171
  {
1172
    unsigned long long prev_pow2 =
1173
      1LL << (63 - __builtin_clzll (abs_multiplier));
1174
    unsigned long long next_pow2 = prev_pow2 << 1;
1175
 
1176
    /* handle overflow case. */
1177
    if (next_pow2 == 0)
1178
      {
1179
        if (prev_pow2 - abs_multiplier <= 10)
1180
          return true;
1181
      }
1182
    else if ((abs_multiplier - prev_pow2 <= 10)
1183
             || (next_pow2 - abs_multiplier <= 10))
1184
      return true;
1185
  }
1186
 
1187
  /* Keep constants near powers of ten.  */
1188
  {
1189
    long long j = 1;
1190
    long long prev_pow10;
1191
    long long next_pow10;
1192
 
1193
    while (((j * 10) < abs_multiplier)
1194
           && (j < (INTMAX_MAX / 10)))
1195
      j = j * 10;
1196
 
1197
    prev_pow10 = j;
1198
    next_pow10 = (j > (INTMAX_MAX / 10)) ? 0 : j * 10;
1199
 
1200
    if ((abs_multiplier - prev_pow10 <= 100)
1201
        || (next_pow10
1202
            && (next_pow10 - abs_multiplier <= 100)))
1203
      return true;
1204
  }
1205
 
1206
  if (num_ops <= 2)
1207
    return true;
1208
 
1209
  /* Keep constants that are mostly 0's or mostly 1's.  */
1210
  if (__builtin_popcountll (multiplier) <= 2 ||
1211
      __builtin_popcountll (multiplier) >= 62)
1212
    return true;
1213
 
1214
  /* Keep constants seen in actual code.  */
1215
  if (find_mult_constants_used (multiplier))
1216
    return true;
1217
 
1218
  return false;
1219
}
1220
#endif
1221
 
1222
 
1223
int
1224
main ()
1225
{
1226
  ExpressionTreeMap best_solution;
1227
  ExpressionTree s;
1228
 
1229
#ifdef TILEPRO
1230
  printf ("/* Constant multiply table for TILEPro.\n");
1231
#else
1232
  printf ("/* Constant multiply table for TILE-Gx.\n");
1233
#endif
1234
  printf ("   Copyright (C) 2011, 2012\n");
1235
  printf ("   Free Software Foundation, Inc.\n");
1236
  printf ("   Contributed by Walter Lee (walt@tilera.com)\n");
1237
  printf ("\n");
1238
  printf ("   This file is part of GCC.\n");
1239
  printf ("\n");
1240
  printf ("   GCC is free software; you can redistribute it and/or modify it\n");
1241
  printf ("   under the terms of the GNU General Public License as published\n");
1242
  printf ("   by the Free Software Foundation; either version 3, or (at your\n");
1243
  printf ("   option) any later version.\n");
1244
  printf ("\n");
1245
  printf ("   GCC is distributed in the hope that it will be useful, but WITHOUT\n");
1246
  printf ("   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n");
1247
  printf ("   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public\n");
1248
  printf ("   License for more details.\n");
1249
  printf ("\n");
1250
  printf ("   You should have received a copy of the GNU General Public License\n");
1251
  printf ("   along with GCC; see the file COPYING3.  If not see\n");
1252
  printf ("   <http://www.gnu.org/licenses/>.  */\n");
1253
  printf ("\n");
1254
  printf ("#include \"config.h\"\n");
1255
  printf ("#include \"system.h\"\n");
1256
  printf ("#include \"coretypes.h\"\n");
1257
  printf ("#include \"expr.h\"\n");
1258
  printf ("#include \"optabs.h\"\n");
1259
  printf ("#include \"%s-multiply.h\"\n\n", ARCH);
1260
  create_insn_code_compression_table ();
1261
 
1262
  /* Try all combinations of operators and see what constants we
1263
     produce.  For each possible constant, record the most efficient
1264
     way to generate it.  */
1265
  find_sequences (s, best_solution);
1266
 
1267
  printf ("const struct %s_multiply_insn_seq "
1268
          "%s_multiply_insn_seq_table[] = {\n",
1269
          ARCH, ARCH);
1270
 
1271
  const char *comma_separator = "";
1272
 
1273
  ExpressionTreeMap::iterator i (best_solution.begin ());
1274
  for (; i != best_solution.end (); ++i)
1275
    {
1276
      ExpressionTree *s = (*i).second;
1277
      const MUL_TYPE n = (*i).first;
1278
 
1279
      if (n == 0 || n == 1)
1280
        {
1281
          /* Both of these require zero operations, so these entries
1282
             are bogus and should never be used.  */
1283
          continue;
1284
        }
1285
 
1286
      /* Prune the list of entries to keep the table to a reasonable
1287
         size.  */
1288
#ifdef TILEPRO
1289
      if (!tilepro_emit (n, num_ops (s)))
1290
        continue;
1291
#else
1292
      if (!tilegx_emit (n, num_ops (s)))
1293
        continue;
1294
#endif
1295
 
1296
      printf ("%s", comma_separator);
1297
 
1298
#ifdef TILEPRO
1299
      const MUL_TYPE int_min = INT32_MIN;
1300
#else
1301
      const MUL_TYPE int_min = INT64_MIN;
1302
#endif
1303
      if (n == int_min)
1304
        {
1305
          /* Handle C's woeful lack of unary negation. Without this,
1306
             printing out INT_MIN in decimal will yield an unsigned
1307
             int which could generate a compiler warning.  */
1308
#ifdef TILEPRO
1309
          printf ("  {%d - 1 /* 0x%x */ ,\n   {", n + 1,
1310
                  (unsigned) n);
1311
#else
1312
          printf ("  {%lldll - 1 /* 0x%llx */ ,\n   {", n + 1,
1313
                  (unsigned MUL_TYPE) n);
1314
#endif
1315
        }
1316
      else
1317
        {
1318
#ifdef TILEPRO
1319
          printf ("  {%d /* 0x%x */ ,\n   {", n, (unsigned) n);
1320
#else
1321
          printf ("  {%lldll /* 0x%llx */ ,\n   {", n, (unsigned MUL_TYPE) n);
1322
#endif
1323
        }
1324
 
1325
      bool first = true;
1326
      for (int j = 0; j < s->m_num_vals; j++)
1327
        {
1328
          Expr *e = &s->m_exprs[j];
1329
 
1330
          const Operator *op = e->m_op;
1331
          if (op == NULL)
1332
            continue;
1333
 
1334
          char buf[1024];
1335
          snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s",
1336
                    first ? "" : "    ",
1337
                    op->m_top_index,
1338
                    e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ",");
1339
 
1340
          char opnd0[10];
1341
          if (e->m_lhs)
1342
            snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs);
1343
          else
1344
            snprintf (opnd0, sizeof opnd0, "zero");
1345
 
1346
          printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n",
1347
                  buf, op->m_name, j, opnd0,
1348
                  op->is_unary () ? "" : "r", e->m_rhs);
1349
 
1350
          first = false;
1351
        }
1352
      printf ("   }");
1353
      comma_separator = ",\n";
1354
    }
1355
 
1356
  printf ("\n};\n\n");
1357
  printf ("const int %s_multiply_insn_seq_table_size =\n"
1358
          "  (int) (sizeof %s_multiply_insn_seq_table\n"
1359
          "         / sizeof %s_multiply_insn_seq_table[0]);\n",
1360
          ARCH, ARCH, ARCH);
1361
 
1362
  return EXIT_SUCCESS;
1363
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.