OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [tree-ssa-math-opts.c] - Blame information for rev 826

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Global, SSA-based optimizations using mathematical identities.
2
   Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3
   Free Software Foundation, Inc.
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it
8
under the terms of the GNU General Public License as published by the
9
Free Software Foundation; either version 3, or (at your option) any
10
later version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT
13
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
/* Currently, the only mini-pass in this file tries to CSE reciprocal
22
   operations.  These are common in sequences such as this one:
23
 
24
        modulus = sqrt(x*x + y*y + z*z);
25
        x = x / modulus;
26
        y = y / modulus;
27
        z = z / modulus;
28
 
29
   that can be optimized to
30
 
31
        modulus = sqrt(x*x + y*y + z*z);
32
        rmodulus = 1.0 / modulus;
33
        x = x * rmodulus;
34
        y = y * rmodulus;
35
        z = z * rmodulus;
36
 
37
   We do this for loop invariant divisors, and with this pass whenever
38
   we notice that a division has the same divisor multiple times.
39
 
40
   Of course, like in PRE, we don't insert a division if a dominator
41
   already has one.  However, this cannot be done as an extension of
42
   PRE for several reasons.
43
 
44
   First of all, with some experiments it was found out that the
45
   transformation is not always useful if there are only two divisions
46
   hy the same divisor.  This is probably because modern processors
47
   can pipeline the divisions; on older, in-order processors it should
48
   still be effective to optimize two divisions by the same number.
49
   We make this a param, and it shall be called N in the remainder of
50
   this comment.
51
 
52
   Second, if trapping math is active, we have less freedom on where
53
   to insert divisions: we can only do so in basic blocks that already
54
   contain one.  (If divisions don't trap, instead, we can insert
55
   divisions elsewhere, which will be in blocks that are common dominators
56
   of those that have the division).
57
 
58
   We really don't want to compute the reciprocal unless a division will
59
   be found.  To do this, we won't insert the division in a basic block
60
   that has less than N divisions *post-dominating* it.
61
 
62
   The algorithm constructs a subset of the dominator tree, holding the
63
   blocks containing the divisions and the common dominators to them,
64
   and walk it twice.  The first walk is in post-order, and it annotates
65
   each block with the number of divisions that post-dominate it: this
66
   gives information on where divisions can be inserted profitably.
67
   The second walk is in pre-order, and it inserts divisions as explained
68
   above, and replaces divisions by multiplications.
69
 
70
   In the best case, the cost of the pass is O(n_statements).  In the
71
   worst-case, the cost is due to creating the dominator tree subset,
72
   with a cost of O(n_basic_blocks ^ 2); however this can only happen
73
   for n_statements / n_basic_blocks statements.  So, the amortized cost
74
   of creating the dominator tree subset is O(n_basic_blocks) and the
75
   worst-case cost of the pass is O(n_statements * n_basic_blocks).
76
 
77
   More practically, the cost will be small because there are few
78
   divisions, and they tend to be in the same basic block, so insert_bb
79
   is called very few times.
80
 
81
   If we did this using domwalk.c, an efficient implementation would have
82
   to work on all the variables in a single pass, because we could not
83
   work on just a subset of the dominator tree, as we do now, and the
84
   cost would also be something like O(n_statements * n_basic_blocks).
85
   The data structures would be more complex in order to work on all the
86
   variables in a single pass.  */
87
 
88
#include "config.h"
89
#include "system.h"
90
#include "coretypes.h"
91
#include "tm.h"
92
#include "flags.h"
93
#include "tree.h"
94
#include "tree-flow.h"
95
#include "real.h"
96
#include "timevar.h"
97
#include "tree-pass.h"
98
#include "alloc-pool.h"
99
#include "basic-block.h"
100
#include "target.h"
101
#include "diagnostic.h"
102
#include "rtl.h"
103
#include "expr.h"
104
#include "optabs.h"
105
 
106
/* This structure represents one basic block that either computes a
107
   division, or is a common dominator for basic block that compute a
108
   division.  */
109
struct occurrence {
110
  /* The basic block represented by this structure.  */
111
  basic_block bb;
112
 
113
  /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
114
     inserted in BB.  */
115
  tree recip_def;
116
 
117
  /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
118
     was inserted in BB.  */
119
  gimple recip_def_stmt;
120
 
121
  /* Pointer to a list of "struct occurrence"s for blocks dominated
122
     by BB.  */
123
  struct occurrence *children;
124
 
125
  /* Pointer to the next "struct occurrence"s in the list of blocks
126
     sharing a common dominator.  */
127
  struct occurrence *next;
128
 
129
  /* The number of divisions that are in BB before compute_merit.  The
130
     number of divisions that are in BB or post-dominate it after
131
     compute_merit.  */
132
  int num_divisions;
133
 
134
  /* True if the basic block has a division, false if it is a common
135
     dominator for basic blocks that do.  If it is false and trapping
136
     math is active, BB is not a candidate for inserting a reciprocal.  */
137
  bool bb_has_division;
138
};
139
 
140
 
141
/* The instance of "struct occurrence" representing the highest
142
   interesting block in the dominator tree.  */
143
static struct occurrence *occ_head;
144
 
145
/* Allocation pool for getting instances of "struct occurrence".  */
146
static alloc_pool occ_pool;
147
 
148
 
149
 
150
/* Allocate and return a new struct occurrence for basic block BB, and
151
   whose children list is headed by CHILDREN.  */
152
static struct occurrence *
153
occ_new (basic_block bb, struct occurrence *children)
154
{
155
  struct occurrence *occ;
156
 
157
  bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool);
158
  memset (occ, 0, sizeof (struct occurrence));
159
 
160
  occ->bb = bb;
161
  occ->children = children;
162
  return occ;
163
}
164
 
165
 
166
/* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
167
   list of "struct occurrence"s, one per basic block, having IDOM as
168
   their common dominator.
169
 
170
   We try to insert NEW_OCC as deep as possible in the tree, and we also
171
   insert any other block that is a common dominator for BB and one
172
   block already in the tree.  */
173
 
174
static void
175
insert_bb (struct occurrence *new_occ, basic_block idom,
176
           struct occurrence **p_head)
177
{
178
  struct occurrence *occ, **p_occ;
179
 
180
  for (p_occ = p_head; (occ = *p_occ) != NULL; )
181
    {
182
      basic_block bb = new_occ->bb, occ_bb = occ->bb;
183
      basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
184
      if (dom == bb)
185
        {
186
          /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
187
             from its list.  */
188
          *p_occ = occ->next;
189
          occ->next = new_occ->children;
190
          new_occ->children = occ;
191
 
192
          /* Try the next block (it may as well be dominated by BB).  */
193
        }
194
 
195
      else if (dom == occ_bb)
196
        {
197
          /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
198
          insert_bb (new_occ, dom, &occ->children);
199
          return;
200
        }
201
 
202
      else if (dom != idom)
203
        {
204
          gcc_assert (!dom->aux);
205
 
206
          /* There is a dominator between IDOM and BB, add it and make
207
             two children out of NEW_OCC and OCC.  First, remove OCC from
208
             its list.  */
209
          *p_occ = occ->next;
210
          new_occ->next = occ;
211
          occ->next = NULL;
212
 
213
          /* None of the previous blocks has DOM as a dominator: if we tail
214
             recursed, we would reexamine them uselessly. Just switch BB with
215
             DOM, and go on looking for blocks dominated by DOM.  */
216
          new_occ = occ_new (dom, new_occ);
217
        }
218
 
219
      else
220
        {
221
          /* Nothing special, go on with the next element.  */
222
          p_occ = &occ->next;
223
        }
224
    }
225
 
226
  /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
227
  new_occ->next = *p_head;
228
  *p_head = new_occ;
229
}
230
 
231
/* Register that we found a division in BB.  */
232
 
233
static inline void
234
register_division_in (basic_block bb)
235
{
236
  struct occurrence *occ;
237
 
238
  occ = (struct occurrence *) bb->aux;
239
  if (!occ)
240
    {
241
      occ = occ_new (bb, NULL);
242
      insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head);
243
    }
244
 
245
  occ->bb_has_division = true;
246
  occ->num_divisions++;
247
}
248
 
249
 
250
/* Compute the number of divisions that postdominate each block in OCC and
251
   its children.  */
252
 
253
static void
254
compute_merit (struct occurrence *occ)
255
{
256
  struct occurrence *occ_child;
257
  basic_block dom = occ->bb;
258
 
259
  for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
260
    {
261
      basic_block bb;
262
      if (occ_child->children)
263
        compute_merit (occ_child);
264
 
265
      if (flag_exceptions)
266
        bb = single_noncomplex_succ (dom);
267
      else
268
        bb = dom;
269
 
270
      if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
271
        occ->num_divisions += occ_child->num_divisions;
272
    }
273
}
274
 
275
 
276
/* Return whether USE_STMT is a floating-point division by DEF.  */
277
static inline bool
278
is_division_by (gimple use_stmt, tree def)
279
{
280
  return is_gimple_assign (use_stmt)
281
         && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
282
         && gimple_assign_rhs2 (use_stmt) == def
283
         /* Do not recognize x / x as valid division, as we are getting
284
            confused later by replacing all immediate uses x in such
285
            a stmt.  */
286
         && gimple_assign_rhs1 (use_stmt) != def;
287
}
288
 
289
/* Walk the subset of the dominator tree rooted at OCC, setting the
290
   RECIP_DEF field to a definition of 1.0 / DEF that can be used in
291
   the given basic block.  The field may be left NULL, of course,
292
   if it is not possible or profitable to do the optimization.
293
 
294
   DEF_BSI is an iterator pointing at the statement defining DEF.
295
   If RECIP_DEF is set, a dominator already has a computation that can
296
   be used.  */
297
 
298
static void
299
insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
300
                    tree def, tree recip_def, int threshold)
301
{
302
  tree type;
303
  gimple new_stmt;
304
  gimple_stmt_iterator gsi;
305
  struct occurrence *occ_child;
306
 
307
  if (!recip_def
308
      && (occ->bb_has_division || !flag_trapping_math)
309
      && occ->num_divisions >= threshold)
310
    {
311
      /* Make a variable with the replacement and substitute it.  */
312
      type = TREE_TYPE (def);
313
      recip_def = make_rename_temp (type, "reciptmp");
314
      new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def,
315
                                               build_one_cst (type), def);
316
 
317
      if (occ->bb_has_division)
318
        {
319
          /* Case 1: insert before an existing division.  */
320
          gsi = gsi_after_labels (occ->bb);
321
          while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
322
            gsi_next (&gsi);
323
 
324
          gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
325
        }
326
      else if (def_gsi && occ->bb == def_gsi->bb)
327
        {
328
          /* Case 2: insert right after the definition.  Note that this will
329
             never happen if the definition statement can throw, because in
330
             that case the sole successor of the statement's basic block will
331
             dominate all the uses as well.  */
332
          gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
333
        }
334
      else
335
        {
336
          /* Case 3: insert in a basic block not containing defs/uses.  */
337
          gsi = gsi_after_labels (occ->bb);
338
          gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
339
        }
340
 
341
      occ->recip_def_stmt = new_stmt;
342
    }
343
 
344
  occ->recip_def = recip_def;
345
  for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
346
    insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
347
}
348
 
349
 
350
/* Replace the division at USE_P with a multiplication by the reciprocal, if
351
   possible.  */
352
 
353
static inline void
354
replace_reciprocal (use_operand_p use_p)
355
{
356
  gimple use_stmt = USE_STMT (use_p);
357
  basic_block bb = gimple_bb (use_stmt);
358
  struct occurrence *occ = (struct occurrence *) bb->aux;
359
 
360
  if (optimize_bb_for_speed_p (bb)
361
      && occ->recip_def && use_stmt != occ->recip_def_stmt)
362
    {
363
      gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
364
      SET_USE (use_p, occ->recip_def);
365
      fold_stmt_inplace (use_stmt);
366
      update_stmt (use_stmt);
367
    }
368
}
369
 
370
 
371
/* Free OCC and return one more "struct occurrence" to be freed.  */
372
 
373
static struct occurrence *
374
free_bb (struct occurrence *occ)
375
{
376
  struct occurrence *child, *next;
377
 
378
  /* First get the two pointers hanging off OCC.  */
379
  next = occ->next;
380
  child = occ->children;
381
  occ->bb->aux = NULL;
382
  pool_free (occ_pool, occ);
383
 
384
  /* Now ensure that we don't recurse unless it is necessary.  */
385
  if (!child)
386
    return next;
387
  else
388
    {
389
      while (next)
390
        next = free_bb (next);
391
 
392
      return child;
393
    }
394
}
395
 
396
 
397
/* Look for floating-point divisions among DEF's uses, and try to
398
   replace them by multiplications with the reciprocal.  Add
399
   as many statements computing the reciprocal as needed.
400
 
401
   DEF must be a GIMPLE register of a floating-point type.  */
402
 
403
static void
404
execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
405
{
406
  use_operand_p use_p;
407
  imm_use_iterator use_iter;
408
  struct occurrence *occ;
409
  int count = 0, threshold;
410
 
411
  gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
412
 
413
  FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
414
    {
415
      gimple use_stmt = USE_STMT (use_p);
416
      if (is_division_by (use_stmt, def))
417
        {
418
          register_division_in (gimple_bb (use_stmt));
419
          count++;
420
        }
421
    }
422
 
423
  /* Do the expensive part only if we can hope to optimize something.  */
424
  threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
425
  if (count >= threshold)
426
    {
427
      gimple use_stmt;
428
      for (occ = occ_head; occ; occ = occ->next)
429
        {
430
          compute_merit (occ);
431
          insert_reciprocals (def_gsi, occ, def, NULL, threshold);
432
        }
433
 
434
      FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
435
        {
436
          if (is_division_by (use_stmt, def))
437
            {
438
              FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
439
                replace_reciprocal (use_p);
440
            }
441
        }
442
    }
443
 
444
  for (occ = occ_head; occ; )
445
    occ = free_bb (occ);
446
 
447
  occ_head = NULL;
448
}
449
 
450
static bool
451
gate_cse_reciprocals (void)
452
{
453
  return optimize && flag_reciprocal_math;
454
}
455
 
456
/* Go through all the floating-point SSA_NAMEs, and call
457
   execute_cse_reciprocals_1 on each of them.  */
458
static unsigned int
459
execute_cse_reciprocals (void)
460
{
461
  basic_block bb;
462
  tree arg;
463
 
464
  occ_pool = create_alloc_pool ("dominators for recip",
465
                                sizeof (struct occurrence),
466
                                n_basic_blocks / 3 + 1);
467
 
468
  calculate_dominance_info (CDI_DOMINATORS);
469
  calculate_dominance_info (CDI_POST_DOMINATORS);
470
 
471
#ifdef ENABLE_CHECKING
472
  FOR_EACH_BB (bb)
473
    gcc_assert (!bb->aux);
474
#endif
475
 
476
  for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
477
    if (gimple_default_def (cfun, arg)
478
        && FLOAT_TYPE_P (TREE_TYPE (arg))
479
        && is_gimple_reg (arg))
480
      execute_cse_reciprocals_1 (NULL, gimple_default_def (cfun, arg));
481
 
482
  FOR_EACH_BB (bb)
483
    {
484
      gimple_stmt_iterator gsi;
485
      gimple phi;
486
      tree def;
487
 
488
      for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
489
        {
490
          phi = gsi_stmt (gsi);
491
          def = PHI_RESULT (phi);
492
          if (FLOAT_TYPE_P (TREE_TYPE (def))
493
              && is_gimple_reg (def))
494
            execute_cse_reciprocals_1 (NULL, def);
495
        }
496
 
497
      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
498
        {
499
          gimple stmt = gsi_stmt (gsi);
500
 
501
          if (gimple_has_lhs (stmt)
502
              && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
503
              && FLOAT_TYPE_P (TREE_TYPE (def))
504
              && TREE_CODE (def) == SSA_NAME)
505
            execute_cse_reciprocals_1 (&gsi, def);
506
        }
507
 
508
      if (optimize_bb_for_size_p (bb))
509
        continue;
510
 
511
      /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
512
      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
513
        {
514
          gimple stmt = gsi_stmt (gsi);
515
          tree fndecl;
516
 
517
          if (is_gimple_assign (stmt)
518
              && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
519
            {
520
              tree arg1 = gimple_assign_rhs2 (stmt);
521
              gimple stmt1;
522
 
523
              if (TREE_CODE (arg1) != SSA_NAME)
524
                continue;
525
 
526
              stmt1 = SSA_NAME_DEF_STMT (arg1);
527
 
528
              if (is_gimple_call (stmt1)
529
                  && gimple_call_lhs (stmt1)
530
                  && (fndecl = gimple_call_fndecl (stmt1))
531
                  && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
532
                      || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
533
                {
534
                  enum built_in_function code;
535
                  bool md_code, fail;
536
                  imm_use_iterator ui;
537
                  use_operand_p use_p;
538
 
539
                  code = DECL_FUNCTION_CODE (fndecl);
540
                  md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
541
 
542
                  fndecl = targetm.builtin_reciprocal (code, md_code, false);
543
                  if (!fndecl)
544
                    continue;
545
 
546
                  /* Check that all uses of the SSA name are divisions,
547
                     otherwise replacing the defining statement will do
548
                     the wrong thing.  */
549
                  fail = false;
550
                  FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
551
                    {
552
                      gimple stmt2 = USE_STMT (use_p);
553
                      if (is_gimple_debug (stmt2))
554
                        continue;
555
                      if (!is_gimple_assign (stmt2)
556
                          || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
557
                          || gimple_assign_rhs1 (stmt2) == arg1
558
                          || gimple_assign_rhs2 (stmt2) != arg1)
559
                        {
560
                          fail = true;
561
                          break;
562
                        }
563
                    }
564
                  if (fail)
565
                    continue;
566
 
567
                  gimple_replace_lhs (stmt1, arg1);
568
                  gimple_call_set_fndecl (stmt1, fndecl);
569
                  update_stmt (stmt1);
570
 
571
                  FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
572
                    {
573
                      gimple_assign_set_rhs_code (stmt, MULT_EXPR);
574
                      fold_stmt_inplace (stmt);
575
                      update_stmt (stmt);
576
                    }
577
                }
578
            }
579
        }
580
    }
581
 
582
  free_dominance_info (CDI_DOMINATORS);
583
  free_dominance_info (CDI_POST_DOMINATORS);
584
  free_alloc_pool (occ_pool);
585
  return 0;
586
}
587
 
588
struct gimple_opt_pass pass_cse_reciprocals =
589
{
590
 {
591
  GIMPLE_PASS,
592
  "recip",                              /* name */
593
  gate_cse_reciprocals,                 /* gate */
594
  execute_cse_reciprocals,              /* execute */
595
  NULL,                                 /* sub */
596
  NULL,                                 /* next */
597
  0,                                     /* static_pass_number */
598
  TV_NONE,                              /* tv_id */
599
  PROP_ssa,                             /* properties_required */
600
  0,                                     /* properties_provided */
601
  0,                                     /* properties_destroyed */
602
  0,                                     /* todo_flags_start */
603
  TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
604
    | TODO_verify_stmts                /* todo_flags_finish */
605
 }
606
};
607
 
608
/* Records an occurrence at statement USE_STMT in the vector of trees
609
   STMTS if it is dominated by *TOP_BB or dominates it or this basic block
610
   is not yet initialized.  Returns true if the occurrence was pushed on
611
   the vector.  Adjusts *TOP_BB to be the basic block dominating all
612
   statements in the vector.  */
613
 
614
static bool
615
maybe_record_sincos (VEC(gimple, heap) **stmts,
616
                     basic_block *top_bb, gimple use_stmt)
617
{
618
  basic_block use_bb = gimple_bb (use_stmt);
619
  if (*top_bb
620
      && (*top_bb == use_bb
621
          || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
622
    VEC_safe_push (gimple, heap, *stmts, use_stmt);
623
  else if (!*top_bb
624
           || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
625
    {
626
      VEC_safe_push (gimple, heap, *stmts, use_stmt);
627
      *top_bb = use_bb;
628
    }
629
  else
630
    return false;
631
 
632
  return true;
633
}
634
 
635
/* Look for sin, cos and cexpi calls with the same argument NAME and
636
   create a single call to cexpi CSEing the result in this case.
637
   We first walk over all immediate uses of the argument collecting
638
   statements that we can CSE in a vector and in a second pass replace
639
   the statement rhs with a REALPART or IMAGPART expression on the
640
   result of the cexpi call we insert before the use statement that
641
   dominates all other candidates.  */
642
 
643
static void
644
execute_cse_sincos_1 (tree name)
645
{
646
  gimple_stmt_iterator gsi;
647
  imm_use_iterator use_iter;
648
  tree fndecl, res, type;
649
  gimple def_stmt, use_stmt, stmt;
650
  int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
651
  VEC(gimple, heap) *stmts = NULL;
652
  basic_block top_bb = NULL;
653
  int i;
654
 
655
  type = TREE_TYPE (name);
656
  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
657
    {
658
      if (gimple_code (use_stmt) != GIMPLE_CALL
659
          || !gimple_call_lhs (use_stmt)
660
          || !(fndecl = gimple_call_fndecl (use_stmt))
661
          || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
662
        continue;
663
 
664
      switch (DECL_FUNCTION_CODE (fndecl))
665
        {
666
        CASE_FLT_FN (BUILT_IN_COS):
667
          seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
668
          break;
669
 
670
        CASE_FLT_FN (BUILT_IN_SIN):
671
          seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
672
          break;
673
 
674
        CASE_FLT_FN (BUILT_IN_CEXPI):
675
          seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
676
          break;
677
 
678
        default:;
679
        }
680
    }
681
 
682
  if (seen_cos + seen_sin + seen_cexpi <= 1)
683
    {
684
      VEC_free(gimple, heap, stmts);
685
      return;
686
    }
687
 
688
  /* Simply insert cexpi at the beginning of top_bb but not earlier than
689
     the name def statement.  */
690
  fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
691
  if (!fndecl)
692
    return;
693
  res = make_rename_temp (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp");
694
  stmt = gimple_build_call (fndecl, 1, name);
695
  gimple_call_set_lhs (stmt, res);
696
 
697
  def_stmt = SSA_NAME_DEF_STMT (name);
698
  if (!SSA_NAME_IS_DEFAULT_DEF (name)
699
      && gimple_code (def_stmt) != GIMPLE_PHI
700
      && gimple_bb (def_stmt) == top_bb)
701
    {
702
      gsi = gsi_for_stmt (def_stmt);
703
      gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
704
    }
705
  else
706
    {
707
      gsi = gsi_after_labels (top_bb);
708
      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
709
    }
710
  update_stmt (stmt);
711
 
712
  /* And adjust the recorded old call sites.  */
713
  for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i)
714
    {
715
      tree rhs = NULL;
716
      fndecl = gimple_call_fndecl (use_stmt);
717
 
718
      switch (DECL_FUNCTION_CODE (fndecl))
719
        {
720
        CASE_FLT_FN (BUILT_IN_COS):
721
          rhs = fold_build1 (REALPART_EXPR, type, res);
722
          break;
723
 
724
        CASE_FLT_FN (BUILT_IN_SIN):
725
          rhs = fold_build1 (IMAGPART_EXPR, type, res);
726
          break;
727
 
728
        CASE_FLT_FN (BUILT_IN_CEXPI):
729
          rhs = res;
730
          break;
731
 
732
        default:;
733
          gcc_unreachable ();
734
        }
735
 
736
        /* Replace call with a copy.  */
737
        stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
738
 
739
        gsi = gsi_for_stmt (use_stmt);
740
        gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
741
        gsi_remove (&gsi, true);
742
    }
743
 
744
  VEC_free(gimple, heap, stmts);
745
}
746
 
747
/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
748
   on the SSA_NAME argument of each of them.  */
749
 
750
static unsigned int
751
execute_cse_sincos (void)
752
{
753
  basic_block bb;
754
 
755
  calculate_dominance_info (CDI_DOMINATORS);
756
 
757
  FOR_EACH_BB (bb)
758
    {
759
      gimple_stmt_iterator gsi;
760
 
761
      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
762
        {
763
          gimple stmt = gsi_stmt (gsi);
764
          tree fndecl;
765
 
766
          if (is_gimple_call (stmt)
767
              && gimple_call_lhs (stmt)
768
              && (fndecl = gimple_call_fndecl (stmt))
769
              && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
770
            {
771
              tree arg;
772
 
773
              switch (DECL_FUNCTION_CODE (fndecl))
774
                {
775
                CASE_FLT_FN (BUILT_IN_COS):
776
                CASE_FLT_FN (BUILT_IN_SIN):
777
                CASE_FLT_FN (BUILT_IN_CEXPI):
778
                  arg = gimple_call_arg (stmt, 0);
779
                  if (TREE_CODE (arg) == SSA_NAME)
780
                    execute_cse_sincos_1 (arg);
781
                  break;
782
 
783
                default:;
784
                }
785
            }
786
        }
787
    }
788
 
789
  free_dominance_info (CDI_DOMINATORS);
790
  return 0;
791
}
792
 
793
static bool
794
gate_cse_sincos (void)
795
{
796
  /* Make sure we have either sincos or cexp.  */
797
  return (TARGET_HAS_SINCOS
798
          || TARGET_C99_FUNCTIONS)
799
         && optimize;
800
}
801
 
802
struct gimple_opt_pass pass_cse_sincos =
803
{
804
 {
805
  GIMPLE_PASS,
806
  "sincos",                             /* name */
807
  gate_cse_sincos,                      /* gate */
808
  execute_cse_sincos,                   /* execute */
809
  NULL,                                 /* sub */
810
  NULL,                                 /* next */
811
  0,                                     /* static_pass_number */
812
  TV_NONE,                              /* tv_id */
813
  PROP_ssa,                             /* properties_required */
814
  0,                                     /* properties_provided */
815
  0,                                     /* properties_destroyed */
816
  0,                                     /* todo_flags_start */
817
  TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
818
    | TODO_verify_stmts                 /* todo_flags_finish */
819
 }
820
};
821
 
822
/* A symbolic number is used to detect byte permutation and selection
823
   patterns.  Therefore the field N contains an artificial number
824
   consisting of byte size markers:
825
 
826
 
827
   1..size - byte contains the content of the byte
828
   number indexed with that value minus one  */
829
 
830
struct symbolic_number {
831
  unsigned HOST_WIDEST_INT n;
832
  int size;
833
};
834
 
835
/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
836
   number N.  Return false if the requested operation is not permitted
837
   on a symbolic number.  */
838
 
839
static inline bool
840
do_shift_rotate (enum tree_code code,
841
                 struct symbolic_number *n,
842
                 int count)
843
{
844
  if (count % 8 != 0)
845
    return false;
846
 
847
  /* Zero out the extra bits of N in order to avoid them being shifted
848
     into the significant bits.  */
849
  if (n->size < (int)sizeof (HOST_WIDEST_INT))
850
    n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
851
 
852
  switch (code)
853
    {
854
    case LSHIFT_EXPR:
855
      n->n <<= count;
856
      break;
857
    case RSHIFT_EXPR:
858
      n->n >>= count;
859
      break;
860
    case LROTATE_EXPR:
861
      n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count));
862
      break;
863
    case RROTATE_EXPR:
864
      n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count));
865
      break;
866
    default:
867
      return false;
868
    }
869
  return true;
870
}
871
 
872
/* Perform sanity checking for the symbolic number N and the gimple
873
   statement STMT.  */
874
 
875
static inline bool
876
verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
877
{
878
  tree lhs_type;
879
 
880
  lhs_type = gimple_expr_type (stmt);
881
 
882
  if (TREE_CODE (lhs_type) != INTEGER_TYPE)
883
    return false;
884
 
885
  if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT)
886
    return false;
887
 
888
  return true;
889
}
890
 
891
/* find_bswap_1 invokes itself recursively with N and tries to perform
892
   the operation given by the rhs of STMT on the result.  If the
893
   operation could successfully be executed the function returns the
894
   tree expression of the source operand and NULL otherwise.  */
895
 
896
static tree
897
find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
898
{
899
  enum tree_code code;
900
  tree rhs1, rhs2 = NULL;
901
  gimple rhs1_stmt, rhs2_stmt;
902
  tree source_expr1;
903
  enum gimple_rhs_class rhs_class;
904
 
905
  if (!limit || !is_gimple_assign (stmt))
906
    return NULL_TREE;
907
 
908
  rhs1 = gimple_assign_rhs1 (stmt);
909
 
910
  if (TREE_CODE (rhs1) != SSA_NAME)
911
    return NULL_TREE;
912
 
913
  code = gimple_assign_rhs_code (stmt);
914
  rhs_class = gimple_assign_rhs_class (stmt);
915
  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
916
 
917
  if (rhs_class == GIMPLE_BINARY_RHS)
918
    rhs2 = gimple_assign_rhs2 (stmt);
919
 
920
  /* Handle unary rhs and binary rhs with integer constants as second
921
     operand.  */
922
 
923
  if (rhs_class == GIMPLE_UNARY_RHS
924
      || (rhs_class == GIMPLE_BINARY_RHS
925
          && TREE_CODE (rhs2) == INTEGER_CST))
926
    {
927
      if (code != BIT_AND_EXPR
928
          && code != LSHIFT_EXPR
929
          && code != RSHIFT_EXPR
930
          && code != LROTATE_EXPR
931
          && code != RROTATE_EXPR
932
          && code != NOP_EXPR
933
          && code != CONVERT_EXPR)
934
        return NULL_TREE;
935
 
936
      source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
937
 
938
      /* If find_bswap_1 returned NULL STMT is a leaf node and we have
939
         to initialize the symbolic number.  */
940
      if (!source_expr1)
941
        {
942
          /* Set up the symbolic number N by setting each byte to a
943
             value between 1 and the byte size of rhs1.  The highest
944
             order byte is set to n->size and the lowest order
945
             byte to 1.  */
946
          n->size = TYPE_PRECISION (TREE_TYPE (rhs1));
947
          if (n->size % BITS_PER_UNIT != 0)
948
            return NULL_TREE;
949
          n->size /= BITS_PER_UNIT;
950
          n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
951
                  (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
952
 
953
          if (n->size < (int)sizeof (HOST_WIDEST_INT))
954
            n->n &= ((unsigned HOST_WIDEST_INT)1 <<
955
                     (n->size * BITS_PER_UNIT)) - 1;
956
 
957
          source_expr1 = rhs1;
958
        }
959
 
960
      switch (code)
961
        {
962
        case BIT_AND_EXPR:
963
          {
964
            int i;
965
            unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2);
966
            unsigned HOST_WIDEST_INT tmp = val;
967
 
968
            /* Only constants masking full bytes are allowed.  */
969
            for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT)
970
              if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
971
                return NULL_TREE;
972
 
973
            n->n &= val;
974
          }
975
          break;
976
        case LSHIFT_EXPR:
977
        case RSHIFT_EXPR:
978
        case LROTATE_EXPR:
979
        case RROTATE_EXPR:
980
          if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
981
            return NULL_TREE;
982
          break;
983
        CASE_CONVERT:
984
          {
985
            int type_size;
986
 
987
            type_size = TYPE_PRECISION (gimple_expr_type (stmt));
988
            if (type_size % BITS_PER_UNIT != 0)
989
              return NULL_TREE;
990
 
991
            if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT)))
992
              {
993
                /* If STMT casts to a smaller type mask out the bits not
994
                   belonging to the target type.  */
995
                n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
996
              }
997
            n->size = type_size / BITS_PER_UNIT;
998
          }
999
          break;
1000
        default:
1001
          return NULL_TREE;
1002
        };
1003
      return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL;
1004
    }
1005
 
1006
  /* Handle binary rhs.  */
1007
 
1008
  if (rhs_class == GIMPLE_BINARY_RHS)
1009
    {
1010
      struct symbolic_number n1, n2;
1011
      tree source_expr2;
1012
 
1013
      if (code != BIT_IOR_EXPR)
1014
        return NULL_TREE;
1015
 
1016
      if (TREE_CODE (rhs2) != SSA_NAME)
1017
        return NULL_TREE;
1018
 
1019
      rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1020
 
1021
      switch (code)
1022
        {
1023
        case BIT_IOR_EXPR:
1024
          source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
1025
 
1026
          if (!source_expr1)
1027
            return NULL_TREE;
1028
 
1029
          source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
1030
 
1031
          if (source_expr1 != source_expr2
1032
              || n1.size != n2.size)
1033
            return NULL_TREE;
1034
 
1035
          n->size = n1.size;
1036
          n->n = n1.n | n2.n;
1037
 
1038
          if (!verify_symbolic_number_p (n, stmt))
1039
            return NULL_TREE;
1040
 
1041
          break;
1042
        default:
1043
          return NULL_TREE;
1044
        }
1045
      return source_expr1;
1046
    }
1047
  return NULL_TREE;
1048
}
1049
 
1050
/* Check if STMT completes a bswap implementation consisting of ORs,
1051
   SHIFTs and ANDs.  Return the source tree expression on which the
1052
   byte swap is performed and NULL if no bswap was found.  */
1053
 
1054
static tree
1055
find_bswap (gimple stmt)
1056
{
1057
/* The number which the find_bswap result should match in order to
1058
   have a full byte swap.  The number is shifted to the left according
1059
   to the size of the symbolic number before using it.  */
1060
  unsigned HOST_WIDEST_INT cmp =
1061
    sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1062
    (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
1063
 
1064
  struct symbolic_number n;
1065
  tree source_expr;
1066
 
1067
  /* The last parameter determines the depth search limit.  It usually
1068
     correlates directly to the number of bytes to be touched.  We
1069
     increase that number by one here in order to also cover signed ->
1070
     unsigned conversions of the src operand as can be seen in
1071
     libgcc.  */
1072
  source_expr =  find_bswap_1 (stmt, &n,
1073
                               TREE_INT_CST_LOW (
1074
                                 TYPE_SIZE_UNIT (gimple_expr_type (stmt))) + 1);
1075
 
1076
  if (!source_expr)
1077
    return NULL_TREE;
1078
 
1079
  /* Zero out the extra bits of N and CMP.  */
1080
  if (n.size < (int)sizeof (HOST_WIDEST_INT))
1081
    {
1082
      unsigned HOST_WIDEST_INT mask =
1083
        ((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
1084
 
1085
      n.n &= mask;
1086
      cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
1087
    }
1088
 
1089
  /* A complete byte swap should make the symbolic number to start
1090
     with the largest digit in the highest order byte.  */
1091
  if (cmp != n.n)
1092
    return NULL_TREE;
1093
 
1094
  return source_expr;
1095
}
1096
 
1097
/* Find manual byte swap implementations and turn them into a bswap
1098
   builtin invokation.  */
1099
 
1100
static unsigned int
1101
execute_optimize_bswap (void)
1102
{
1103
  basic_block bb;
1104
  bool bswap32_p, bswap64_p;
1105
  bool changed = false;
1106
  tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1107
 
1108
  if (BITS_PER_UNIT != 8)
1109
    return 0;
1110
 
1111
  if (sizeof (HOST_WIDEST_INT) < 8)
1112
    return 0;
1113
 
1114
  bswap32_p = (built_in_decls[BUILT_IN_BSWAP32]
1115
               && optab_handler (bswap_optab, SImode)->insn_code !=
1116
               CODE_FOR_nothing);
1117
  bswap64_p = (built_in_decls[BUILT_IN_BSWAP64]
1118
               && (optab_handler (bswap_optab, DImode)->insn_code !=
1119
                   CODE_FOR_nothing
1120
                   || (bswap32_p && word_mode == SImode)));
1121
 
1122
  if (!bswap32_p && !bswap64_p)
1123
    return 0;
1124
 
1125
  /* Determine the argument type of the builtins.  The code later on
1126
     assumes that the return and argument type are the same.  */
1127
  if (bswap32_p)
1128
    {
1129
      tree fndecl = built_in_decls[BUILT_IN_BSWAP32];
1130
      bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1131
    }
1132
 
1133
  if (bswap64_p)
1134
    {
1135
      tree fndecl = built_in_decls[BUILT_IN_BSWAP64];
1136
      bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1137
    }
1138
 
1139
  FOR_EACH_BB (bb)
1140
    {
1141
      gimple_stmt_iterator gsi;
1142
 
1143
      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1144
        {
1145
          gimple stmt = gsi_stmt (gsi);
1146
          tree bswap_src, bswap_type;
1147
          tree bswap_tmp;
1148
          tree fndecl = NULL_TREE;
1149
          int type_size;
1150
          gimple call;
1151
 
1152
          if (!is_gimple_assign (stmt)
1153
              || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
1154
            continue;
1155
 
1156
          type_size = TYPE_PRECISION (gimple_expr_type (stmt));
1157
 
1158
          switch (type_size)
1159
            {
1160
            case 32:
1161
              if (bswap32_p)
1162
                {
1163
                  fndecl = built_in_decls[BUILT_IN_BSWAP32];
1164
                  bswap_type = bswap32_type;
1165
                }
1166
              break;
1167
            case 64:
1168
              if (bswap64_p)
1169
                {
1170
                  fndecl = built_in_decls[BUILT_IN_BSWAP64];
1171
                  bswap_type = bswap64_type;
1172
                }
1173
              break;
1174
            default:
1175
              continue;
1176
            }
1177
 
1178
          if (!fndecl)
1179
            continue;
1180
 
1181
          bswap_src = find_bswap (stmt);
1182
 
1183
          if (!bswap_src)
1184
            continue;
1185
 
1186
          changed = true;
1187
 
1188
          bswap_tmp = bswap_src;
1189
 
1190
          /* Convert the src expression if necessary.  */
1191
          if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1192
            {
1193
              gimple convert_stmt;
1194
 
1195
              bswap_tmp = create_tmp_var (bswap_type, "bswapsrc");
1196
              add_referenced_var (bswap_tmp);
1197
              bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1198
 
1199
              convert_stmt = gimple_build_assign_with_ops (
1200
                               CONVERT_EXPR, bswap_tmp, bswap_src, NULL);
1201
              gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1202
            }
1203
 
1204
          call = gimple_build_call (fndecl, 1, bswap_tmp);
1205
 
1206
          bswap_tmp = gimple_assign_lhs (stmt);
1207
 
1208
          /* Convert the result if necessary.  */
1209
          if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1210
            {
1211
              gimple convert_stmt;
1212
 
1213
              bswap_tmp = create_tmp_var (bswap_type, "bswapdst");
1214
              add_referenced_var (bswap_tmp);
1215
              bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1216
              convert_stmt = gimple_build_assign_with_ops (
1217
                               CONVERT_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
1218
              gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1219
            }
1220
 
1221
          gimple_call_set_lhs (call, bswap_tmp);
1222
 
1223
          if (dump_file)
1224
            {
1225
              fprintf (dump_file, "%d bit bswap implementation found at: ",
1226
                       (int)type_size);
1227
              print_gimple_stmt (dump_file, stmt, 0, 0);
1228
            }
1229
 
1230
          gsi_insert_after (&gsi, call, GSI_SAME_STMT);
1231
          gsi_remove (&gsi, true);
1232
        }
1233
    }
1234
 
1235
  return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
1236
          | TODO_verify_stmts : 0);
1237
}
1238
 
1239
static bool
1240
gate_optimize_bswap (void)
1241
{
1242
  return flag_expensive_optimizations && optimize;
1243
}
1244
 
1245
struct gimple_opt_pass pass_optimize_bswap =
1246
{
1247
 {
1248
  GIMPLE_PASS,
1249
  "bswap",                              /* name */
1250
  gate_optimize_bswap,                  /* gate */
1251
  execute_optimize_bswap,               /* execute */
1252
  NULL,                                 /* sub */
1253
  NULL,                                 /* next */
1254
  0,                                     /* static_pass_number */
1255
  TV_NONE,                              /* tv_id */
1256
  PROP_ssa,                             /* properties_required */
1257
  0,                                     /* properties_provided */
1258
  0,                                     /* properties_destroyed */
1259
  0,                                     /* todo_flags_start */
1260
 
1261
 }
1262
};

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.