OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [tree-ssa-threadupdate.c] - Blame information for rev 826

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Thread edges through blocks and update the control flow and SSA graphs.
2
   Copyright (C) 2004, 2005, 2006, 2007, 2008 Free Software Foundation,
3
   Inc.
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 3, or (at your option)
10
any later version.
11
 
12
GCC is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
GNU General Public License for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
#include "config.h"
22
#include "system.h"
23
#include "coretypes.h"
24
#include "tm.h"
25
#include "tree.h"
26
#include "flags.h"
27
#include "rtl.h"
28
#include "tm_p.h"
29
#include "ggc.h"
30
#include "basic-block.h"
31
#include "output.h"
32
#include "expr.h"
33
#include "function.h"
34
#include "diagnostic.h"
35
#include "tree-flow.h"
36
#include "tree-dump.h"
37
#include "tree-pass.h"
38
#include "cfgloop.h"
39
 
40
/* Given a block B, update the CFG and SSA graph to reflect redirecting
41
   one or more in-edges to B to instead reach the destination of an
42
   out-edge from B while preserving any side effects in B.
43
 
44
   i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
45
   side effects of executing B.
46
 
47
     1. Make a copy of B (including its outgoing edges and statements).  Call
48
        the copy B'.  Note B' has no incoming edges or PHIs at this time.
49
 
50
     2. Remove the control statement at the end of B' and all outgoing edges
51
        except B'->C.
52
 
53
     3. Add a new argument to each PHI in C with the same value as the existing
54
        argument associated with edge B->C.  Associate the new PHI arguments
55
        with the edge B'->C.
56
 
57
     4. For each PHI in B, find or create a PHI in B' with an identical
58
        PHI_RESULT.  Add an argument to the PHI in B' which has the same
59
        value as the PHI in B associated with the edge A->B.  Associate
60
        the new argument in the PHI in B' with the edge A->B.
61
 
62
     5. Change the edge A->B to A->B'.
63
 
64
        5a. This automatically deletes any PHI arguments associated with the
65
            edge A->B in B.
66
 
67
        5b. This automatically associates each new argument added in step 4
68
            with the edge A->B'.
69
 
70
     6. Repeat for other incoming edges into B.
71
 
72
     7. Put the duplicated resources in B and all the B' blocks into SSA form.
73
 
74
   Note that block duplication can be minimized by first collecting the
75
   set of unique destination blocks that the incoming edges should
76
   be threaded to.  Block duplication can be further minimized by using
77
   B instead of creating B' for one destination if all edges into B are
78
   going to be threaded to a successor of B.
79
 
80
   We further reduce the number of edges and statements we create by
81
   not copying all the outgoing edges and the control statement in
82
   step #1.  We instead create a template block without the outgoing
83
   edges and duplicate the template.  */
84
 
85
 
86
/* Steps #5 and #6 of the above algorithm are best implemented by walking
87
   all the incoming edges which thread to the same destination edge at
88
   the same time.  That avoids lots of table lookups to get information
89
   for the destination edge.
90
 
91
   To realize that implementation we create a list of incoming edges
92
   which thread to the same outgoing edge.  Thus to implement steps
93
   #5 and #6 we traverse our hash table of outgoing edge information.
94
   For each entry we walk the list of incoming edges which thread to
95
   the current outgoing edge.  */
96
 
97
struct el
98
{
99
  edge e;
100
  struct el *next;
101
};
102
 
103
/* Main data structure recording information regarding B's duplicate
104
   blocks.  */
105
 
106
/* We need to efficiently record the unique thread destinations of this
107
   block and specific information associated with those destinations.  We
108
   may have many incoming edges threaded to the same outgoing edge.  This
109
   can be naturally implemented with a hash table.  */
110
 
111
struct redirection_data
112
{
113
  /* A duplicate of B with the trailing control statement removed and which
114
     targets a single successor of B.  */
115
  basic_block dup_block;
116
 
117
  /* An outgoing edge from B.  DUP_BLOCK will have OUTGOING_EDGE->dest as
118
     its single successor.  */
119
  edge outgoing_edge;
120
 
121
  /* A list of incoming edges which we want to thread to
122
     OUTGOING_EDGE->dest.  */
123
  struct el *incoming_edges;
124
 
125
  /* Flag indicating whether or not we should create a duplicate block
126
     for this thread destination.  This is only true if we are threading
127
     all incoming edges and thus are using BB itself as a duplicate block.  */
128
  bool do_not_duplicate;
129
};
130
 
131
/* Main data structure to hold information for duplicates of BB.  */
132
static htab_t redirection_data;
133
 
134
/* Data structure of information to pass to hash table traversal routines.  */
135
struct local_info
136
{
137
  /* The current block we are working on.  */
138
  basic_block bb;
139
 
140
  /* A template copy of BB with no outgoing edges or control statement that
141
     we use for creating copies.  */
142
  basic_block template_block;
143
 
144
  /* TRUE if we thread one or more jumps, FALSE otherwise.  */
145
  bool jumps_threaded;
146
};
147
 
148
/* Passes which use the jump threading code register jump threading
149
   opportunities as they are discovered.  We keep the registered
150
   jump threading opportunities in this vector as edge pairs
151
   (original_edge, target_edge).  */
152
static VEC(edge,heap) *threaded_edges;
153
 
154
 
155
/* Jump threading statistics.  */
156
 
157
struct thread_stats_d
158
{
159
  unsigned long num_threaded_edges;
160
};
161
 
162
struct thread_stats_d thread_stats;
163
 
164
 
165
/* Remove the last statement in block BB if it is a control statement
166
   Also remove all outgoing edges except the edge which reaches DEST_BB.
167
   If DEST_BB is NULL, then remove all outgoing edges.  */
168
 
169
static void
170
remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
171
{
172
  gimple_stmt_iterator gsi;
173
  edge e;
174
  edge_iterator ei;
175
 
176
  gsi = gsi_last_bb (bb);
177
 
178
  /* If the duplicate ends with a control statement, then remove it.
179
 
180
     Note that if we are duplicating the template block rather than the
181
     original basic block, then the duplicate might not have any real
182
     statements in it.  */
183
  if (!gsi_end_p (gsi)
184
      && gsi_stmt (gsi)
185
      && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
186
          || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
187
          || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
188
    gsi_remove (&gsi, true);
189
 
190
  for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
191
    {
192
      if (e->dest != dest_bb)
193
        remove_edge (e);
194
      else
195
        ei_next (&ei);
196
    }
197
}
198
 
199
/* Create a duplicate of BB which only reaches the destination of the edge
200
   stored in RD.  Record the duplicate block in RD.  */
201
 
202
static void
203
create_block_for_threading (basic_block bb, struct redirection_data *rd)
204
{
205
  /* We can use the generic block duplication code and simply remove
206
     the stuff we do not need.  */
207
  rd->dup_block = duplicate_block (bb, NULL, NULL);
208
 
209
  /* Zero out the profile, since the block is unreachable for now.  */
210
  rd->dup_block->frequency = 0;
211
  rd->dup_block->count = 0;
212
 
213
  /* The call to duplicate_block will copy everything, including the
214
     useless COND_EXPR or SWITCH_EXPR at the end of BB.  We just remove
215
     the useless COND_EXPR or SWITCH_EXPR here rather than having a
216
     specialized block copier.  We also remove all outgoing edges
217
     from the duplicate block.  The appropriate edge will be created
218
     later.  */
219
  remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL);
220
}
221
 
222
/* Hashing and equality routines for our hash table.  */
223
static hashval_t
224
redirection_data_hash (const void *p)
225
{
226
  edge e = ((const struct redirection_data *)p)->outgoing_edge;
227
  return e->dest->index;
228
}
229
 
230
static int
231
redirection_data_eq (const void *p1, const void *p2)
232
{
233
  edge e1 = ((const struct redirection_data *)p1)->outgoing_edge;
234
  edge e2 = ((const struct redirection_data *)p2)->outgoing_edge;
235
 
236
  return e1 == e2;
237
}
238
 
239
/* Given an outgoing edge E lookup and return its entry in our hash table.
240
 
241
   If INSERT is true, then we insert the entry into the hash table if
242
   it is not already present.  INCOMING_EDGE is added to the list of incoming
243
   edges associated with E in the hash table.  */
244
 
245
static struct redirection_data *
246
lookup_redirection_data (edge e, edge incoming_edge, enum insert_option insert)
247
{
248
  void **slot;
249
  struct redirection_data *elt;
250
 
251
 /* Build a hash table element so we can see if E is already
252
     in the table.  */
253
  elt = XNEW (struct redirection_data);
254
  elt->outgoing_edge = e;
255
  elt->dup_block = NULL;
256
  elt->do_not_duplicate = false;
257
  elt->incoming_edges = NULL;
258
 
259
  slot = htab_find_slot (redirection_data, elt, insert);
260
 
261
  /* This will only happen if INSERT is false and the entry is not
262
     in the hash table.  */
263
  if (slot == NULL)
264
    {
265
      free (elt);
266
      return NULL;
267
    }
268
 
269
  /* This will only happen if E was not in the hash table and
270
     INSERT is true.  */
271
  if (*slot == NULL)
272
    {
273
      *slot = (void *)elt;
274
      elt->incoming_edges = XNEW (struct el);
275
      elt->incoming_edges->e = incoming_edge;
276
      elt->incoming_edges->next = NULL;
277
      return elt;
278
    }
279
  /* E was in the hash table.  */
280
  else
281
    {
282
      /* Free ELT as we do not need it anymore, we will extract the
283
         relevant entry from the hash table itself.  */
284
      free (elt);
285
 
286
      /* Get the entry stored in the hash table.  */
287
      elt = (struct redirection_data *) *slot;
288
 
289
      /* If insertion was requested, then we need to add INCOMING_EDGE
290
         to the list of incoming edges associated with E.  */
291
      if (insert)
292
        {
293
          struct el *el = XNEW (struct el);
294
          el->next = elt->incoming_edges;
295
          el->e = incoming_edge;
296
          elt->incoming_edges = el;
297
        }
298
 
299
      return elt;
300
    }
301
}
302
 
303
/* Given a duplicate block and its single destination (both stored
304
   in RD).  Create an edge between the duplicate and its single
305
   destination.
306
 
307
   Add an additional argument to any PHI nodes at the single
308
   destination.  */
309
 
310
static void
311
create_edge_and_update_destination_phis (struct redirection_data *rd)
312
{
313
  edge e = make_edge (rd->dup_block, rd->outgoing_edge->dest, EDGE_FALLTHRU);
314
  gimple_stmt_iterator gsi;
315
 
316
  rescan_loop_exit (e, true, false);
317
  e->probability = REG_BR_PROB_BASE;
318
  e->count = rd->dup_block->count;
319
  e->aux = rd->outgoing_edge->aux;
320
 
321
  /* If there are any PHI nodes at the destination of the outgoing edge
322
     from the duplicate block, then we will need to add a new argument
323
     to them.  The argument should have the same value as the argument
324
     associated with the outgoing edge stored in RD.  */
325
  for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
326
    {
327
      gimple phi = gsi_stmt (gsi);
328
      source_location locus;
329
      int indx = rd->outgoing_edge->dest_idx;
330
 
331
      locus = gimple_phi_arg_location (phi, indx);
332
      add_phi_arg (phi, gimple_phi_arg_def (phi, indx), e, locus);
333
    }
334
}
335
 
336
/* Hash table traversal callback routine to create duplicate blocks.  */
337
 
338
static int
339
create_duplicates (void **slot, void *data)
340
{
341
  struct redirection_data *rd = (struct redirection_data *) *slot;
342
  struct local_info *local_info = (struct local_info *)data;
343
 
344
  /* If this entry should not have a duplicate created, then there's
345
     nothing to do.  */
346
  if (rd->do_not_duplicate)
347
    return 1;
348
 
349
  /* Create a template block if we have not done so already.  Otherwise
350
     use the template to create a new block.  */
351
  if (local_info->template_block == NULL)
352
    {
353
      create_block_for_threading (local_info->bb, rd);
354
      local_info->template_block = rd->dup_block;
355
 
356
      /* We do not create any outgoing edges for the template.  We will
357
         take care of that in a later traversal.  That way we do not
358
         create edges that are going to just be deleted.  */
359
    }
360
  else
361
    {
362
      create_block_for_threading (local_info->template_block, rd);
363
 
364
      /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
365
         block.  */
366
      create_edge_and_update_destination_phis (rd);
367
    }
368
 
369
  /* Keep walking the hash table.  */
370
  return 1;
371
}
372
 
373
/* We did not create any outgoing edges for the template block during
374
   block creation.  This hash table traversal callback creates the
375
   outgoing edge for the template block.  */
376
 
377
static int
378
fixup_template_block (void **slot, void *data)
379
{
380
  struct redirection_data *rd = (struct redirection_data *) *slot;
381
  struct local_info *local_info = (struct local_info *)data;
382
 
383
  /* If this is the template block, then create its outgoing edges
384
     and halt the hash table traversal.  */
385
  if (rd->dup_block && rd->dup_block == local_info->template_block)
386
    {
387
      create_edge_and_update_destination_phis (rd);
388
      return 0;
389
    }
390
 
391
  return 1;
392
}
393
 
394
/* Hash table traversal callback to redirect each incoming edge
395
   associated with this hash table element to its new destination.  */
396
 
397
static int
398
redirect_edges (void **slot, void *data)
399
{
400
  struct redirection_data *rd = (struct redirection_data *) *slot;
401
  struct local_info *local_info = (struct local_info *)data;
402
  struct el *next, *el;
403
 
404
  /* Walk over all the incoming edges associated associated with this
405
     hash table entry.  */
406
  for (el = rd->incoming_edges; el; el = next)
407
    {
408
      edge e = el->e;
409
 
410
      /* Go ahead and free this element from the list.  Doing this now
411
         avoids the need for another list walk when we destroy the hash
412
         table.  */
413
      next = el->next;
414
      free (el);
415
 
416
      /* Go ahead and clear E->aux.  It's not needed anymore and failure
417
         to clear it will cause all kinds of unpleasant problems later.  */
418
      e->aux = NULL;
419
 
420
      thread_stats.num_threaded_edges++;
421
 
422
      if (rd->dup_block)
423
        {
424
          edge e2;
425
 
426
          if (dump_file && (dump_flags & TDF_DETAILS))
427
            fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
428
                     e->src->index, e->dest->index, rd->dup_block->index);
429
 
430
          rd->dup_block->count += e->count;
431
          rd->dup_block->frequency += EDGE_FREQUENCY (e);
432
          EDGE_SUCC (rd->dup_block, 0)->count += e->count;
433
          /* Redirect the incoming edge to the appropriate duplicate
434
             block.  */
435
          e2 = redirect_edge_and_branch (e, rd->dup_block);
436
          gcc_assert (e == e2);
437
          flush_pending_stmts (e2);
438
        }
439
      else
440
        {
441
          if (dump_file && (dump_flags & TDF_DETAILS))
442
            fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
443
                     e->src->index, e->dest->index, local_info->bb->index);
444
 
445
          /* We are using BB as the duplicate.  Remove the unnecessary
446
             outgoing edges and statements from BB.  */
447
          remove_ctrl_stmt_and_useless_edges (local_info->bb,
448
                                              rd->outgoing_edge->dest);
449
 
450
          /* Fixup the flags on the single remaining edge.  */
451
          single_succ_edge (local_info->bb)->flags
452
            &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
453
          single_succ_edge (local_info->bb)->flags |= EDGE_FALLTHRU;
454
 
455
          /* And adjust count and frequency on BB.  */
456
          local_info->bb->count = e->count;
457
          local_info->bb->frequency = EDGE_FREQUENCY (e);
458
        }
459
    }
460
 
461
  /* Indicate that we actually threaded one or more jumps.  */
462
  if (rd->incoming_edges)
463
    local_info->jumps_threaded = true;
464
 
465
  return 1;
466
}
467
 
468
/* Return true if this block has no executable statements other than
469
   a simple ctrl flow instruction.  When the number of outgoing edges
470
   is one, this is equivalent to a "forwarder" block.  */
471
 
472
static bool
473
redirection_block_p (basic_block bb)
474
{
475
  gimple_stmt_iterator gsi;
476
 
477
  /* Advance to the first executable statement.  */
478
  gsi = gsi_start_bb (bb);
479
  while (!gsi_end_p (gsi)
480
         && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
481
             || is_gimple_debug (gsi_stmt (gsi))
482
             || gimple_nop_p (gsi_stmt (gsi))))
483
    gsi_next (&gsi);
484
 
485
  /* Check if this is an empty block.  */
486
  if (gsi_end_p (gsi))
487
    return true;
488
 
489
  /* Test that we've reached the terminating control statement.  */
490
  return gsi_stmt (gsi)
491
         && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
492
             || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
493
             || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
494
}
495
 
496
/* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
497
   is reached via one or more specific incoming edges, we know which
498
   outgoing edge from BB will be traversed.
499
 
500
   We want to redirect those incoming edges to the target of the
501
   appropriate outgoing edge.  Doing so avoids a conditional branch
502
   and may expose new optimization opportunities.  Note that we have
503
   to update dominator tree and SSA graph after such changes.
504
 
505
   The key to keeping the SSA graph update manageable is to duplicate
506
   the side effects occurring in BB so that those side effects still
507
   occur on the paths which bypass BB after redirecting edges.
508
 
509
   We accomplish this by creating duplicates of BB and arranging for
510
   the duplicates to unconditionally pass control to one specific
511
   successor of BB.  We then revector the incoming edges into BB to
512
   the appropriate duplicate of BB.
513
 
514
   If NOLOOP_ONLY is true, we only perform the threading as long as it
515
   does not affect the structure of the loops in a nontrivial way.  */
516
 
517
static bool
518
thread_block (basic_block bb, bool noloop_only)
519
{
520
  /* E is an incoming edge into BB that we may or may not want to
521
     redirect to a duplicate of BB.  */
522
  edge e, e2;
523
  edge_iterator ei;
524
  struct local_info local_info;
525
  struct loop *loop = bb->loop_father;
526
 
527
  /* ALL indicates whether or not all incoming edges into BB should
528
     be threaded to a duplicate of BB.  */
529
  bool all = true;
530
 
531
  /* To avoid scanning a linear array for the element we need we instead
532
     use a hash table.  For normal code there should be no noticeable
533
     difference.  However, if we have a block with a large number of
534
     incoming and outgoing edges such linear searches can get expensive.  */
535
  redirection_data = htab_create (EDGE_COUNT (bb->succs),
536
                                  redirection_data_hash,
537
                                  redirection_data_eq,
538
                                  free);
539
 
540
  /* If we thread the latch of the loop to its exit, the loop ceases to
541
     exist.  Make sure we do not restrict ourselves in order to preserve
542
     this loop.  */
543
  if (loop->header == bb)
544
    {
545
      e = loop_latch_edge (loop);
546
      e2 = (edge) e->aux;
547
 
548
      if (e2 && loop_exit_edge_p (loop, e2))
549
        {
550
          loop->header = NULL;
551
          loop->latch = NULL;
552
        }
553
    }
554
 
555
  /* Record each unique threaded destination into a hash table for
556
     efficient lookups.  */
557
  FOR_EACH_EDGE (e, ei, bb->preds)
558
    {
559
      e2 = (edge) e->aux;
560
 
561
      if (!e2
562
          /* If NOLOOP_ONLY is true, we only allow threading through the
563
             header of a loop to exit edges.  */
564
          || (noloop_only
565
              && bb == bb->loop_father->header
566
              && !loop_exit_edge_p (bb->loop_father, e2)))
567
        {
568
          all = false;
569
          continue;
570
        }
571
 
572
      update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
573
                                       e->count, (edge) e->aux);
574
 
575
      /* Insert the outgoing edge into the hash table if it is not
576
         already in the hash table.  */
577
      lookup_redirection_data (e2, e, INSERT);
578
    }
579
 
580
  /* If we are going to thread all incoming edges to an outgoing edge, then
581
     BB will become unreachable.  Rather than just throwing it away, use
582
     it for one of the duplicates.  Mark the first incoming edge with the
583
     DO_NOT_DUPLICATE attribute.  */
584
  if (all)
585
    {
586
      edge e = (edge) EDGE_PRED (bb, 0)->aux;
587
      lookup_redirection_data (e, NULL, NO_INSERT)->do_not_duplicate = true;
588
    }
589
 
590
  /* We do not update dominance info.  */
591
  free_dominance_info (CDI_DOMINATORS);
592
 
593
  /* Now create duplicates of BB.
594
 
595
     Note that for a block with a high outgoing degree we can waste
596
     a lot of time and memory creating and destroying useless edges.
597
 
598
     So we first duplicate BB and remove the control structure at the
599
     tail of the duplicate as well as all outgoing edges from the
600
     duplicate.  We then use that duplicate block as a template for
601
     the rest of the duplicates.  */
602
  local_info.template_block = NULL;
603
  local_info.bb = bb;
604
  local_info.jumps_threaded = false;
605
  htab_traverse (redirection_data, create_duplicates, &local_info);
606
 
607
  /* The template does not have an outgoing edge.  Create that outgoing
608
     edge and update PHI nodes as the edge's target as necessary.
609
 
610
     We do this after creating all the duplicates to avoid creating
611
     unnecessary edges.  */
612
  htab_traverse (redirection_data, fixup_template_block, &local_info);
613
 
614
  /* The hash table traversals above created the duplicate blocks (and the
615
     statements within the duplicate blocks).  This loop creates PHI nodes for
616
     the duplicated blocks and redirects the incoming edges into BB to reach
617
     the duplicates of BB.  */
618
  htab_traverse (redirection_data, redirect_edges, &local_info);
619
 
620
  /* Done with this block.  Clear REDIRECTION_DATA.  */
621
  htab_delete (redirection_data);
622
  redirection_data = NULL;
623
 
624
  /* Indicate to our caller whether or not any jumps were threaded.  */
625
  return local_info.jumps_threaded;
626
}
627
 
628
/* Threads edge E through E->dest to the edge E->aux.  Returns the copy
629
   of E->dest created during threading, or E->dest if it was not necessary
630
   to copy it (E is its single predecessor).  */
631
 
632
static basic_block
633
thread_single_edge (edge e)
634
{
635
  basic_block bb = e->dest;
636
  edge eto = (edge) e->aux;
637
  struct redirection_data rd;
638
 
639
  e->aux = NULL;
640
 
641
  thread_stats.num_threaded_edges++;
642
 
643
  if (single_pred_p (bb))
644
    {
645
      /* If BB has just a single predecessor, we should only remove the
646
         control statements at its end, and successors except for ETO.  */
647
      remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
648
 
649
      /* And fixup the flags on the single remaining edge.  */
650
      eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
651
      eto->flags |= EDGE_FALLTHRU;
652
 
653
      return bb;
654
    }
655
 
656
  /* Otherwise, we need to create a copy.  */
657
  update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
658
 
659
  rd.outgoing_edge = eto;
660
 
661
  create_block_for_threading (bb, &rd);
662
  create_edge_and_update_destination_phis (&rd);
663
 
664
  if (dump_file && (dump_flags & TDF_DETAILS))
665
    fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
666
             e->src->index, e->dest->index, rd.dup_block->index);
667
 
668
  rd.dup_block->count = e->count;
669
  rd.dup_block->frequency = EDGE_FREQUENCY (e);
670
  single_succ_edge (rd.dup_block)->count = e->count;
671
  redirect_edge_and_branch (e, rd.dup_block);
672
  flush_pending_stmts (e);
673
 
674
  return rd.dup_block;
675
}
676
 
677
/* Callback for dfs_enumerate_from.  Returns true if BB is different
678
   from STOP and DBDS_CE_STOP.  */
679
 
680
static basic_block dbds_ce_stop;
681
static bool
682
dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
683
{
684
  return (bb != (const_basic_block) stop
685
          && bb != dbds_ce_stop);
686
}
687
 
688
/* Evaluates the dominance relationship of latch of the LOOP and BB, and
689
   returns the state.  */
690
 
691
enum bb_dom_status
692
{
693
  /* BB does not dominate latch of the LOOP.  */
694
  DOMST_NONDOMINATING,
695
  /* The LOOP is broken (there is no path from the header to its latch.  */
696
  DOMST_LOOP_BROKEN,
697
  /* BB dominates the latch of the LOOP.  */
698
  DOMST_DOMINATING
699
};
700
 
701
static enum bb_dom_status
702
determine_bb_domination_status (struct loop *loop, basic_block bb)
703
{
704
  basic_block *bblocks;
705
  unsigned nblocks, i;
706
  bool bb_reachable = false;
707
  edge_iterator ei;
708
  edge e;
709
 
710
#ifdef ENABLE_CHECKING
711
  /* This function assumes BB is a successor of LOOP->header.  */
712
    {
713
      bool ok = false;
714
 
715
      FOR_EACH_EDGE (e, ei, bb->preds)
716
        {
717
          if (e->src == loop->header)
718
            {
719
              ok = true;
720
              break;
721
            }
722
        }
723
 
724
      gcc_assert (ok);
725
    }
726
#endif
727
 
728
  if (bb == loop->latch)
729
    return DOMST_DOMINATING;
730
 
731
  /* Check that BB dominates LOOP->latch, and that it is back-reachable
732
     from it.  */
733
 
734
  bblocks = XCNEWVEC (basic_block, loop->num_nodes);
735
  dbds_ce_stop = loop->header;
736
  nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
737
                                bblocks, loop->num_nodes, bb);
738
  for (i = 0; i < nblocks; i++)
739
    FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
740
      {
741
        if (e->src == loop->header)
742
          {
743
            free (bblocks);
744
            return DOMST_NONDOMINATING;
745
          }
746
        if (e->src == bb)
747
          bb_reachable = true;
748
      }
749
 
750
  free (bblocks);
751
  return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
752
}
753
 
754
/* Thread jumps through the header of LOOP.  Returns true if cfg changes.
755
   If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
756
   to the inside of the loop.  */
757
 
758
static bool
759
thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
760
{
761
  basic_block header = loop->header;
762
  edge e, tgt_edge, latch = loop_latch_edge (loop);
763
  edge_iterator ei;
764
  basic_block tgt_bb, atgt_bb;
765
  enum bb_dom_status domst;
766
 
767
  /* We have already threaded through headers to exits, so all the threading
768
     requests now are to the inside of the loop.  We need to avoid creating
769
     irreducible regions (i.e., loops with more than one entry block), and
770
     also loop with several latch edges, or new subloops of the loop (although
771
     there are cases where it might be appropriate, it is difficult to decide,
772
     and doing it wrongly may confuse other optimizers).
773
 
774
     We could handle more general cases here.  However, the intention is to
775
     preserve some information about the loop, which is impossible if its
776
     structure changes significantly, in a way that is not well understood.
777
     Thus we only handle few important special cases, in which also updating
778
     of the loop-carried information should be feasible:
779
 
780
     1) Propagation of latch edge to a block that dominates the latch block
781
        of a loop.  This aims to handle the following idiom:
782
 
783
        first = 1;
784
        while (1)
785
          {
786
            if (first)
787
              initialize;
788
            first = 0;
789
            body;
790
          }
791
 
792
        After threading the latch edge, this becomes
793
 
794
        first = 1;
795
        if (first)
796
          initialize;
797
        while (1)
798
          {
799
            first = 0;
800
            body;
801
          }
802
 
803
        The original header of the loop is moved out of it, and we may thread
804
        the remaining edges through it without further constraints.
805
 
806
     2) All entry edges are propagated to a single basic block that dominates
807
        the latch block of the loop.  This aims to handle the following idiom
808
        (normally created for "for" loops):
809
 
810
        i = 0;
811
        while (1)
812
          {
813
            if (i >= 100)
814
              break;
815
            body;
816
            i++;
817
          }
818
 
819
        This becomes
820
 
821
        i = 0;
822
        while (1)
823
          {
824
            body;
825
            i++;
826
            if (i >= 100)
827
              break;
828
          }
829
     */
830
 
831
  /* Threading through the header won't improve the code if the header has just
832
     one successor.  */
833
  if (single_succ_p (header))
834
    goto fail;
835
 
836
  if (latch->aux)
837
    {
838
      tgt_edge = (edge) latch->aux;
839
      tgt_bb = tgt_edge->dest;
840
    }
841
  else if (!may_peel_loop_headers
842
           && !redirection_block_p (loop->header))
843
    goto fail;
844
  else
845
    {
846
      tgt_bb = NULL;
847
      tgt_edge = NULL;
848
      FOR_EACH_EDGE (e, ei, header->preds)
849
        {
850
          if (!e->aux)
851
            {
852
              if (e == latch)
853
                continue;
854
 
855
              /* If latch is not threaded, and there is a header
856
                 edge that is not threaded, we would create loop
857
                 with multiple entries.  */
858
              goto fail;
859
            }
860
 
861
          tgt_edge = (edge) e->aux;
862
          atgt_bb = tgt_edge->dest;
863
          if (!tgt_bb)
864
            tgt_bb = atgt_bb;
865
          /* Two targets of threading would make us create loop
866
             with multiple entries.  */
867
          else if (tgt_bb != atgt_bb)
868
            goto fail;
869
        }
870
 
871
      if (!tgt_bb)
872
        {
873
          /* There are no threading requests.  */
874
          return false;
875
        }
876
 
877
      /* Redirecting to empty loop latch is useless.  */
878
      if (tgt_bb == loop->latch
879
          && empty_block_p (loop->latch))
880
        goto fail;
881
    }
882
 
883
  /* The target block must dominate the loop latch, otherwise we would be
884
     creating a subloop.  */
885
  domst = determine_bb_domination_status (loop, tgt_bb);
886
  if (domst == DOMST_NONDOMINATING)
887
    goto fail;
888
  if (domst == DOMST_LOOP_BROKEN)
889
    {
890
      /* If the loop ceased to exist, mark it as such, and thread through its
891
         original header.  */
892
      loop->header = NULL;
893
      loop->latch = NULL;
894
      return thread_block (header, false);
895
    }
896
 
897
  if (tgt_bb->loop_father->header == tgt_bb)
898
    {
899
      /* If the target of the threading is a header of a subloop, we need
900
         to create a preheader for it, so that the headers of the two loops
901
         do not merge.  */
902
      if (EDGE_COUNT (tgt_bb->preds) > 2)
903
        {
904
          tgt_bb = create_preheader (tgt_bb->loop_father, 0);
905
          gcc_assert (tgt_bb != NULL);
906
        }
907
      else
908
        tgt_bb = split_edge (tgt_edge);
909
    }
910
 
911
  if (latch->aux)
912
    {
913
      /* First handle the case latch edge is redirected.  */
914
      loop->latch = thread_single_edge (latch);
915
      gcc_assert (single_succ (loop->latch) == tgt_bb);
916
      loop->header = tgt_bb;
917
 
918
      /* Thread the remaining edges through the former header.  */
919
      thread_block (header, false);
920
    }
921
  else
922
    {
923
      basic_block new_preheader;
924
 
925
      /* Now consider the case entry edges are redirected to the new entry
926
         block.  Remember one entry edge, so that we can find the new
927
        preheader (its destination after threading).  */
928
      FOR_EACH_EDGE (e, ei, header->preds)
929
        {
930
          if (e->aux)
931
            break;
932
        }
933
 
934
      /* The duplicate of the header is the new preheader of the loop.  Ensure
935
         that it is placed correctly in the loop hierarchy.  */
936
      set_loop_copy (loop, loop_outer (loop));
937
 
938
      thread_block (header, false);
939
      set_loop_copy (loop, NULL);
940
      new_preheader = e->dest;
941
 
942
      /* Create the new latch block.  This is always necessary, as the latch
943
         must have only a single successor, but the original header had at
944
         least two successors.  */
945
      loop->latch = NULL;
946
      mfb_kj_edge = single_succ_edge (new_preheader);
947
      loop->header = mfb_kj_edge->dest;
948
      latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
949
      loop->header = latch->dest;
950
      loop->latch = latch->src;
951
    }
952
 
953
  return true;
954
 
955
fail:
956
  /* We failed to thread anything.  Cancel the requests.  */
957
  FOR_EACH_EDGE (e, ei, header->preds)
958
    {
959
      e->aux = NULL;
960
    }
961
  return false;
962
}
963
 
964
/* Walk through the registered jump threads and convert them into a
965
   form convenient for this pass.
966
 
967
   Any block which has incoming edges threaded to outgoing edges
968
   will have its entry in THREADED_BLOCK set.
969
 
970
   Any threaded edge will have its new outgoing edge stored in the
971
   original edge's AUX field.
972
 
973
   This form avoids the need to walk all the edges in the CFG to
974
   discover blocks which need processing and avoids unnecessary
975
   hash table lookups to map from threaded edge to new target.  */
976
 
977
static void
978
mark_threaded_blocks (bitmap threaded_blocks)
979
{
980
  unsigned int i;
981
  bitmap_iterator bi;
982
  bitmap tmp = BITMAP_ALLOC (NULL);
983
  basic_block bb;
984
  edge e;
985
  edge_iterator ei;
986
 
987
  for (i = 0; i < VEC_length (edge, threaded_edges); i += 2)
988
    {
989
      edge e = VEC_index (edge, threaded_edges, i);
990
      edge e2 = VEC_index (edge, threaded_edges, i + 1);
991
 
992
      e->aux = e2;
993
      bitmap_set_bit (tmp, e->dest->index);
994
    }
995
 
996
  /* If optimizing for size, only thread through block if we don't have
997
     to duplicate it or it's an otherwise empty redirection block.  */
998
  if (optimize_function_for_size_p (cfun))
999
    {
1000
      EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1001
        {
1002
          bb = BASIC_BLOCK (i);
1003
          if (EDGE_COUNT (bb->preds) > 1
1004
              && !redirection_block_p (bb))
1005
            {
1006
              FOR_EACH_EDGE (e, ei, bb->preds)
1007
                      e->aux = NULL;
1008
            }
1009
          else
1010
            bitmap_set_bit (threaded_blocks, i);
1011
        }
1012
    }
1013
  else
1014
    bitmap_copy (threaded_blocks, tmp);
1015
 
1016
  BITMAP_FREE(tmp);
1017
}
1018
 
1019
 
1020
/* Walk through all blocks and thread incoming edges to the appropriate
1021
   outgoing edge for each edge pair recorded in THREADED_EDGES.
1022
 
1023
   It is the caller's responsibility to fix the dominance information
1024
   and rewrite duplicated SSA_NAMEs back into SSA form.
1025
 
1026
   If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
1027
   loop headers if it does not simplify the loop.
1028
 
1029
   Returns true if one or more edges were threaded, false otherwise.  */
1030
 
1031
bool
1032
thread_through_all_blocks (bool may_peel_loop_headers)
1033
{
1034
  bool retval = false;
1035
  unsigned int i;
1036
  bitmap_iterator bi;
1037
  bitmap threaded_blocks;
1038
  struct loop *loop;
1039
  loop_iterator li;
1040
 
1041
  /* We must know about loops in order to preserve them.  */
1042
  gcc_assert (current_loops != NULL);
1043
 
1044
  if (threaded_edges == NULL)
1045
    return false;
1046
 
1047
  threaded_blocks = BITMAP_ALLOC (NULL);
1048
  memset (&thread_stats, 0, sizeof (thread_stats));
1049
 
1050
  mark_threaded_blocks (threaded_blocks);
1051
 
1052
  initialize_original_copy_tables ();
1053
 
1054
  /* First perform the threading requests that do not affect
1055
     loop structure.  */
1056
  EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
1057
    {
1058
      basic_block bb = BASIC_BLOCK (i);
1059
 
1060
      if (EDGE_COUNT (bb->preds) > 0)
1061
        retval |= thread_block (bb, true);
1062
    }
1063
 
1064
  /* Then perform the threading through loop headers.  We start with the
1065
     innermost loop, so that the changes in cfg we perform won't affect
1066
     further threading.  */
1067
  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1068
    {
1069
      if (!loop->header
1070
          || !bitmap_bit_p (threaded_blocks, loop->header->index))
1071
        continue;
1072
 
1073
      retval |= thread_through_loop_header (loop, may_peel_loop_headers);
1074
    }
1075
 
1076
  statistics_counter_event (cfun, "Jumps threaded",
1077
                            thread_stats.num_threaded_edges);
1078
 
1079
  free_original_copy_tables ();
1080
 
1081
  BITMAP_FREE (threaded_blocks);
1082
  threaded_blocks = NULL;
1083
  VEC_free (edge, heap, threaded_edges);
1084
  threaded_edges = NULL;
1085
 
1086
  if (retval)
1087
    loops_state_set (LOOPS_NEED_FIXUP);
1088
 
1089
  return retval;
1090
}
1091
 
1092
/* Register a jump threading opportunity.  We queue up all the jump
1093
   threading opportunities discovered by a pass and update the CFG
1094
   and SSA form all at once.
1095
 
1096
   E is the edge we can thread, E2 is the new target edge, i.e., we
1097
   are effectively recording that E->dest can be changed to E2->dest
1098
   after fixing the SSA graph.  */
1099
 
1100
void
1101
register_jump_thread (edge e, edge e2)
1102
{
1103
  if (threaded_edges == NULL)
1104
    threaded_edges = VEC_alloc (edge, heap, 10);
1105
 
1106
  VEC_safe_push (edge, heap, threaded_edges, e);
1107
  VEC_safe_push (edge, heap, threaded_edges, e2);
1108
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.