OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [gcc/] [tree-ssa-math-opts.c] - Blame information for rev 856

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Global, SSA-based optimizations using mathematical identities.
2
   Copyright (C) 2005, 2007 Free Software Foundation, Inc.
3
 
4
This file is part of GCC.
5
 
6
GCC is free software; you can redistribute it and/or modify it
7
under the terms of the GNU General Public License as published by the
8
Free Software Foundation; either version 3, or (at your option) any
9
later version.
10
 
11
GCC is distributed in the hope that it will be useful, but WITHOUT
12
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GCC; see the file COPYING3.  If not see
18
<http://www.gnu.org/licenses/>.  */
19
 
20
/* Currently, the only mini-pass in this file tries to CSE reciprocal
21
   operations.  These are common in sequences such as this one:
22
 
23
        modulus = sqrt(x*x + y*y + z*z);
24
        x = x / modulus;
25
        y = y / modulus;
26
        z = z / modulus;
27
 
28
   that can be optimized to
29
 
30
        modulus = sqrt(x*x + y*y + z*z);
31
        rmodulus = 1.0 / modulus;
32
        x = x * rmodulus;
33
        y = y * rmodulus;
34
        z = z * rmodulus;
35
 
36
   We do this for loop invariant divisors, and with this pass whenever
37
   we notice that a division has the same divisor multiple times.
38
 
39
   Of course, like in PRE, we don't insert a division if a dominator
40
   already has one.  However, this cannot be done as an extension of
41
   PRE for several reasons.
42
 
43
   First of all, with some experiments it was found out that the
44
   transformation is not always useful if there are only two divisions
45
   hy the same divisor.  This is probably because modern processors
46
   can pipeline the divisions; on older, in-order processors it should
47
   still be effective to optimize two divisions by the same number.
48
   We make this a param, and it shall be called N in the remainder of
49
   this comment.
50
 
51
   Second, if trapping math is active, we have less freedom on where
52
   to insert divisions: we can only do so in basic blocks that already
53
   contain one.  (If divisions don't trap, instead, we can insert
54
   divisions elsewhere, which will be in blocks that are common dominators
55
   of those that have the division).
56
 
57
   We really don't want to compute the reciprocal unless a division will
58
   be found.  To do this, we won't insert the division in a basic block
59
   that has less than N divisions *post-dominating* it.
60
 
61
   The algorithm constructs a subset of the dominator tree, holding the
62
   blocks containing the divisions and the common dominators to them,
63
   and walk it twice.  The first walk is in post-order, and it annotates
64
   each block with the number of divisions that post-dominate it: this
65
   gives information on where divisions can be inserted profitably.
66
   The second walk is in pre-order, and it inserts divisions as explained
67
   above, and replaces divisions by multiplications.
68
 
69
   In the best case, the cost of the pass is O(n_statements).  In the
70
   worst-case, the cost is due to creating the dominator tree subset,
71
   with a cost of O(n_basic_blocks ^ 2); however this can only happen
72
   for n_statements / n_basic_blocks statements.  So, the amortized cost
73
   of creating the dominator tree subset is O(n_basic_blocks) and the
74
   worst-case cost of the pass is O(n_statements * n_basic_blocks).
75
 
76
   More practically, the cost will be small because there are few
77
   divisions, and they tend to be in the same basic block, so insert_bb
78
   is called very few times.
79
 
80
   If we did this using domwalk.c, an efficient implementation would have
81
   to work on all the variables in a single pass, because we could not
82
   work on just a subset of the dominator tree, as we do now, and the
83
   cost would also be something like O(n_statements * n_basic_blocks).
84
   The data structures would be more complex in order to work on all the
85
   variables in a single pass.  */
86
 
87
#include "config.h"
88
#include "system.h"
89
#include "coretypes.h"
90
#include "tm.h"
91
#include "flags.h"
92
#include "tree.h"
93
#include "tree-flow.h"
94
#include "real.h"
95
#include "timevar.h"
96
#include "tree-pass.h"
97
#include "alloc-pool.h"
98
#include "basic-block.h"
99
#include "target.h"
100
 
101
 
102
/* This structure represents one basic block that either computes a
103
   division, or is a common dominator for basic block that compute a
104
   division.  */
105
struct occurrence {
106
  /* The basic block represented by this structure.  */
107
  basic_block bb;
108
 
109
  /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
110
     inserted in BB.  */
111
  tree recip_def;
112
 
113
  /* If non-NULL, the MODIFY_EXPR for a reciprocal computation that
114
     was inserted in BB.  */
115
  tree recip_def_stmt;
116
 
117
  /* Pointer to a list of "struct occurrence"s for blocks dominated
118
     by BB.  */
119
  struct occurrence *children;
120
 
121
  /* Pointer to the next "struct occurrence"s in the list of blocks
122
     sharing a common dominator.  */
123
  struct occurrence *next;
124
 
125
  /* The number of divisions that are in BB before compute_merit.  The
126
     number of divisions that are in BB or post-dominate it after
127
     compute_merit.  */
128
  int num_divisions;
129
 
130
  /* True if the basic block has a division, false if it is a common
131
     dominator for basic blocks that do.  If it is false and trapping
132
     math is active, BB is not a candidate for inserting a reciprocal.  */
133
  bool bb_has_division;
134
};
135
 
136
 
137
/* The instance of "struct occurrence" representing the highest
138
   interesting block in the dominator tree.  */
139
static struct occurrence *occ_head;
140
 
141
/* Allocation pool for getting instances of "struct occurrence".  */
142
static alloc_pool occ_pool;
143
 
144
 
145
 
146
/* Allocate and return a new struct occurrence for basic block BB, and
147
   whose children list is headed by CHILDREN.  */
148
static struct occurrence *
149
occ_new (basic_block bb, struct occurrence *children)
150
{
151
  struct occurrence *occ;
152
 
153
  occ = bb->aux = pool_alloc (occ_pool);
154
  memset (occ, 0, sizeof (struct occurrence));
155
 
156
  occ->bb = bb;
157
  occ->children = children;
158
  return occ;
159
}
160
 
161
 
162
/* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
163
   list of "struct occurrence"s, one per basic block, having IDOM as
164
   their common dominator.
165
 
166
   We try to insert NEW_OCC as deep as possible in the tree, and we also
167
   insert any other block that is a common dominator for BB and one
168
   block already in the tree.  */
169
 
170
static void
171
insert_bb (struct occurrence *new_occ, basic_block idom,
172
           struct occurrence **p_head)
173
{
174
  struct occurrence *occ, **p_occ;
175
 
176
  for (p_occ = p_head; (occ = *p_occ) != NULL; )
177
    {
178
      basic_block bb = new_occ->bb, occ_bb = occ->bb;
179
      basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
180
      if (dom == bb)
181
        {
182
          /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
183
             from its list.  */
184
          *p_occ = occ->next;
185
          occ->next = new_occ->children;
186
          new_occ->children = occ;
187
 
188
          /* Try the next block (it may as well be dominated by BB).  */
189
        }
190
 
191
      else if (dom == occ_bb)
192
        {
193
          /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
194
          insert_bb (new_occ, dom, &occ->children);
195
          return;
196
        }
197
 
198
      else if (dom != idom)
199
        {
200
          gcc_assert (!dom->aux);
201
 
202
          /* There is a dominator between IDOM and BB, add it and make
203
             two children out of NEW_OCC and OCC.  First, remove OCC from
204
             its list.  */
205
          *p_occ = occ->next;
206
          new_occ->next = occ;
207
          occ->next = NULL;
208
 
209
          /* None of the previous blocks has DOM as a dominator: if we tail
210
             recursed, we would reexamine them uselessly. Just switch BB with
211
             DOM, and go on looking for blocks dominated by DOM.  */
212
          new_occ = occ_new (dom, new_occ);
213
        }
214
 
215
      else
216
        {
217
          /* Nothing special, go on with the next element.  */
218
          p_occ = &occ->next;
219
        }
220
    }
221
 
222
  /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
223
  new_occ->next = *p_head;
224
  *p_head = new_occ;
225
}
226
 
227
/* Register that we found a division in BB.  */
228
 
229
static inline void
230
register_division_in (basic_block bb)
231
{
232
  struct occurrence *occ;
233
 
234
  occ = (struct occurrence *) bb->aux;
235
  if (!occ)
236
    {
237
      occ = occ_new (bb, NULL);
238
      insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head);
239
    }
240
 
241
  occ->bb_has_division = true;
242
  occ->num_divisions++;
243
}
244
 
245
 
246
/* Compute the number of divisions that postdominate each block in OCC and
247
   its children.  */
248
 
249
static void
250
compute_merit (struct occurrence *occ)
251
{
252
  struct occurrence *occ_child;
253
  basic_block dom = occ->bb;
254
 
255
  for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
256
    {
257
      basic_block bb;
258
      if (occ_child->children)
259
        compute_merit (occ_child);
260
 
261
      if (flag_exceptions)
262
        bb = single_noncomplex_succ (dom);
263
      else
264
        bb = dom;
265
 
266
      if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
267
        occ->num_divisions += occ_child->num_divisions;
268
    }
269
}
270
 
271
 
272
/* Return whether USE_STMT is a floating-point division by DEF.  */
273
static inline bool
274
is_division_by (tree use_stmt, tree def)
275
{
276
  return TREE_CODE (use_stmt) == MODIFY_EXPR
277
         && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
278
         && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def;
279
}
280
 
281
/* Walk the subset of the dominator tree rooted at OCC, setting the
282
   RECIP_DEF field to a definition of 1.0 / DEF that can be used in
283
   the given basic block.  The field may be left NULL, of course,
284
   if it is not possible or profitable to do the optimization.
285
 
286
   DEF_BSI is an iterator pointing at the statement defining DEF.
287
   If RECIP_DEF is set, a dominator already has a computation that can
288
   be used.  */
289
 
290
static void
291
insert_reciprocals (block_stmt_iterator *def_bsi, struct occurrence *occ,
292
                    tree def, tree recip_def, int threshold)
293
{
294
  tree type, new_stmt;
295
  block_stmt_iterator bsi;
296
  struct occurrence *occ_child;
297
 
298
  if (!recip_def
299
      && (occ->bb_has_division || !flag_trapping_math)
300
      && occ->num_divisions >= threshold)
301
    {
302
      /* Make a variable with the replacement and substitute it.  */
303
      type = TREE_TYPE (def);
304
      recip_def = make_rename_temp (type, "reciptmp");
305
      new_stmt = build2 (MODIFY_EXPR, void_type_node, recip_def,
306
                         fold_build2 (RDIV_EXPR, type, build_one_cst (type),
307
                                      def));
308
 
309
 
310
      if (occ->bb_has_division)
311
        {
312
          /* Case 1: insert before an existing division.  */
313
          bsi = bsi_after_labels (occ->bb);
314
          while (!bsi_end_p (bsi) && !is_division_by (bsi_stmt (bsi), def))
315
            bsi_next (&bsi);
316
 
317
          bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
318
        }
319
      else if (def_bsi && occ->bb == def_bsi->bb)
320
        {
321
          /* Case 2: insert right after the definition.  Note that this will
322
             never happen if the definition statement can throw, because in
323
             that case the sole successor of the statement's basic block will
324
             dominate all the uses as well.  */
325
          bsi_insert_after (def_bsi, new_stmt, BSI_NEW_STMT);
326
        }
327
      else
328
        {
329
          /* Case 3: insert in a basic block not containing defs/uses.  */
330
          bsi = bsi_after_labels (occ->bb);
331
          bsi_insert_before (&bsi, new_stmt, BSI_SAME_STMT);
332
        }
333
 
334
      occ->recip_def_stmt = new_stmt;
335
    }
336
 
337
  occ->recip_def = recip_def;
338
  for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
339
    insert_reciprocals (def_bsi, occ_child, def, recip_def, threshold);
340
}
341
 
342
 
343
/* Replace the division at USE_P with a multiplication by the reciprocal, if
344
   possible.  */
345
 
346
static inline void
347
replace_reciprocal (use_operand_p use_p)
348
{
349
  tree use_stmt = USE_STMT (use_p);
350
  basic_block bb = bb_for_stmt (use_stmt);
351
  struct occurrence *occ = (struct occurrence *) bb->aux;
352
 
353
  if (occ->recip_def && use_stmt != occ->recip_def_stmt)
354
    {
355
      TREE_SET_CODE (TREE_OPERAND (use_stmt, 1), MULT_EXPR);
356
      SET_USE (use_p, occ->recip_def);
357
      fold_stmt_inplace (use_stmt);
358
      update_stmt (use_stmt);
359
    }
360
}
361
 
362
 
363
/* Free OCC and return one more "struct occurrence" to be freed.  */
364
 
365
static struct occurrence *
366
free_bb (struct occurrence *occ)
367
{
368
  struct occurrence *child, *next;
369
 
370
  /* First get the two pointers hanging off OCC.  */
371
  next = occ->next;
372
  child = occ->children;
373
  occ->bb->aux = NULL;
374
  pool_free (occ_pool, occ);
375
 
376
  /* Now ensure that we don't recurse unless it is necessary.  */
377
  if (!child)
378
    return next;
379
  else
380
    {
381
      while (next)
382
        next = free_bb (next);
383
 
384
      return child;
385
    }
386
}
387
 
388
 
389
/* Look for floating-point divisions among DEF's uses, and try to
390
   replace them by multiplications with the reciprocal.  Add
391
   as many statements computing the reciprocal as needed.
392
 
393
   DEF must be a GIMPLE register of a floating-point type.  */
394
 
395
static void
396
execute_cse_reciprocals_1 (block_stmt_iterator *def_bsi, tree def)
397
{
398
  use_operand_p use_p;
399
  imm_use_iterator use_iter;
400
  struct occurrence *occ;
401
  int count = 0, threshold;
402
 
403
  gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
404
 
405
  FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
406
    {
407
      tree use_stmt = USE_STMT (use_p);
408
      if (is_division_by (use_stmt, def))
409
        {
410
          register_division_in (bb_for_stmt (use_stmt));
411
          count++;
412
        }
413
    }
414
 
415
  /* Do the expensive part only if we can hope to optimize something.  */
416
  threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
417
  if (count >= threshold)
418
    {
419
      tree use_stmt;
420
      for (occ = occ_head; occ; occ = occ->next)
421
        {
422
          compute_merit (occ);
423
          insert_reciprocals (def_bsi, occ, def, NULL, threshold);
424
        }
425
 
426
      FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
427
        {
428
          if (is_division_by (use_stmt, def))
429
            {
430
              FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
431
                replace_reciprocal (use_p);
432
            }
433
        }
434
    }
435
 
436
  for (occ = occ_head; occ; )
437
    occ = free_bb (occ);
438
 
439
  occ_head = NULL;
440
}
441
 
442
 
443
static bool
444
gate_cse_reciprocals (void)
445
{
446
  return optimize && !optimize_size && flag_unsafe_math_optimizations;
447
}
448
 
449
 
450
/* Go through all the floating-point SSA_NAMEs, and call
451
   execute_cse_reciprocals_1 on each of them.  */
452
static unsigned int
453
execute_cse_reciprocals (void)
454
{
455
  basic_block bb;
456
  tree arg;
457
 
458
  occ_pool = create_alloc_pool ("dominators for recip",
459
                                sizeof (struct occurrence),
460
                                n_basic_blocks / 3 + 1);
461
 
462
  calculate_dominance_info (CDI_DOMINATORS);
463
  calculate_dominance_info (CDI_POST_DOMINATORS);
464
 
465
#ifdef ENABLE_CHECKING
466
  FOR_EACH_BB (bb)
467
    gcc_assert (!bb->aux);
468
#endif
469
 
470
  for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = TREE_CHAIN (arg))
471
    if (default_def (arg)
472
        && FLOAT_TYPE_P (TREE_TYPE (arg))
473
        && is_gimple_reg (arg))
474
      execute_cse_reciprocals_1 (NULL, default_def (arg));
475
 
476
  FOR_EACH_BB (bb)
477
    {
478
      block_stmt_iterator bsi;
479
      tree phi, def;
480
 
481
      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
482
        {
483
          def = PHI_RESULT (phi);
484
          if (FLOAT_TYPE_P (TREE_TYPE (def))
485
              && is_gimple_reg (def))
486
            execute_cse_reciprocals_1 (NULL, def);
487
        }
488
 
489
      for (bsi = bsi_after_labels (bb); !bsi_end_p (bsi); bsi_next (&bsi))
490
        {
491
          tree stmt = bsi_stmt (bsi);
492
          if (TREE_CODE (stmt) == MODIFY_EXPR
493
              && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
494
              && FLOAT_TYPE_P (TREE_TYPE (def))
495
              && TREE_CODE (def) == SSA_NAME)
496
            execute_cse_reciprocals_1 (&bsi, def);
497
        }
498
    }
499
 
500
  free_dominance_info (CDI_DOMINATORS);
501
  free_dominance_info (CDI_POST_DOMINATORS);
502
  free_alloc_pool (occ_pool);
503
  return 0;
504
}
505
 
506
struct tree_opt_pass pass_cse_reciprocals =
507
{
508
  "recip",                              /* name */
509
  gate_cse_reciprocals,                 /* gate */
510
  execute_cse_reciprocals,              /* execute */
511
  NULL,                                 /* sub */
512
  NULL,                                 /* next */
513
  0,                                     /* static_pass_number */
514
  0,                                     /* tv_id */
515
  PROP_ssa,                             /* properties_required */
516
  0,                                     /* properties_provided */
517
  0,                                     /* properties_destroyed */
518
  0,                                     /* todo_flags_start */
519
  TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
520
    | TODO_verify_stmts,                /* todo_flags_finish */
521
 
522
};

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.