OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [tree-ssa-threadupdate.c] - Blame information for rev 722

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Thread edges through blocks and update the control flow and SSA graphs.
2
   Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 201
3
   Free Software Foundation, Inc.
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 3, or (at your option)
10
any later version.
11
 
12
GCC is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
GNU General Public License for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
#include "config.h"
22
#include "system.h"
23
#include "coretypes.h"
24
#include "tm.h"
25
#include "tree.h"
26
#include "flags.h"
27
#include "tm_p.h"
28
#include "basic-block.h"
29
#include "output.h"
30
#include "function.h"
31
#include "tree-flow.h"
32
#include "tree-dump.h"
33
#include "tree-pass.h"
34
#include "cfgloop.h"
35
 
36
/* Given a block B, update the CFG and SSA graph to reflect redirecting
37
   one or more in-edges to B to instead reach the destination of an
38
   out-edge from B while preserving any side effects in B.
39
 
40
   i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
41
   side effects of executing B.
42
 
43
     1. Make a copy of B (including its outgoing edges and statements).  Call
44
        the copy B'.  Note B' has no incoming edges or PHIs at this time.
45
 
46
     2. Remove the control statement at the end of B' and all outgoing edges
47
        except B'->C.
48
 
49
     3. Add a new argument to each PHI in C with the same value as the existing
50
        argument associated with edge B->C.  Associate the new PHI arguments
51
        with the edge B'->C.
52
 
53
     4. For each PHI in B, find or create a PHI in B' with an identical
54
        PHI_RESULT.  Add an argument to the PHI in B' which has the same
55
        value as the PHI in B associated with the edge A->B.  Associate
56
        the new argument in the PHI in B' with the edge A->B.
57
 
58
     5. Change the edge A->B to A->B'.
59
 
60
        5a. This automatically deletes any PHI arguments associated with the
61
            edge A->B in B.
62
 
63
        5b. This automatically associates each new argument added in step 4
64
            with the edge A->B'.
65
 
66
     6. Repeat for other incoming edges into B.
67
 
68
     7. Put the duplicated resources in B and all the B' blocks into SSA form.
69
 
70
   Note that block duplication can be minimized by first collecting the
71
   set of unique destination blocks that the incoming edges should
72
   be threaded to.
73
 
74
   Block duplication can be further minimized by using B instead of
75
   creating B' for one destination if all edges into B are going to be
76
   threaded to a successor of B.  We had code to do this at one time, but
77
   I'm not convinced it is correct with the changes to avoid mucking up
78
   the loop structure (which may cancel threading requests, thus a block
79
   which we thought was going to become unreachable may still be reachable).
80
   This code was also going to get ugly with the introduction of the ability
81
   for a single jump thread request to bypass multiple blocks.
82
 
83
   We further reduce the number of edges and statements we create by
84
   not copying all the outgoing edges and the control statement in
85
   step #1.  We instead create a template block without the outgoing
86
   edges and duplicate the template.  */
87
 
88
 
89
/* Steps #5 and #6 of the above algorithm are best implemented by walking
90
   all the incoming edges which thread to the same destination edge at
91
   the same time.  That avoids lots of table lookups to get information
92
   for the destination edge.
93
 
94
   To realize that implementation we create a list of incoming edges
95
   which thread to the same outgoing edge.  Thus to implement steps
96
   #5 and #6 we traverse our hash table of outgoing edge information.
97
   For each entry we walk the list of incoming edges which thread to
98
   the current outgoing edge.  */
99
 
100
struct el
101
{
102
  edge e;
103
  struct el *next;
104
};
105
 
106
/* Main data structure recording information regarding B's duplicate
107
   blocks.  */
108
 
109
/* We need to efficiently record the unique thread destinations of this
110
   block and specific information associated with those destinations.  We
111
   may have many incoming edges threaded to the same outgoing edge.  This
112
   can be naturally implemented with a hash table.  */
113
 
114
struct redirection_data
115
{
116
  /* A duplicate of B with the trailing control statement removed and which
117
     targets a single successor of B.  */
118
  basic_block dup_block;
119
 
120
  /* An outgoing edge from B.  DUP_BLOCK will have OUTGOING_EDGE->dest as
121
     its single successor.  */
122
  edge outgoing_edge;
123
 
124
  edge intermediate_edge;
125
 
126
  /* A list of incoming edges which we want to thread to
127
     OUTGOING_EDGE->dest.  */
128
  struct el *incoming_edges;
129
};
130
 
131
/* Main data structure to hold information for duplicates of BB.  */
132
static htab_t redirection_data;
133
 
134
/* Data structure of information to pass to hash table traversal routines.  */
135
struct local_info
136
{
137
  /* The current block we are working on.  */
138
  basic_block bb;
139
 
140
  /* A template copy of BB with no outgoing edges or control statement that
141
     we use for creating copies.  */
142
  basic_block template_block;
143
 
144
  /* TRUE if we thread one or more jumps, FALSE otherwise.  */
145
  bool jumps_threaded;
146
};
147
 
148
/* Passes which use the jump threading code register jump threading
149
   opportunities as they are discovered.  We keep the registered
150
   jump threading opportunities in this vector as edge pairs
151
   (original_edge, target_edge).  */
152
static VEC(edge,heap) *threaded_edges;
153
 
154
/* When we start updating the CFG for threading, data necessary for jump
155
   threading is attached to the AUX field for the incoming edge.  Use these
156
   macros to access the underlying structure attached to the AUX field.  */
157
#define THREAD_TARGET(E) ((edge *)(E)->aux)[0]
158
#define THREAD_TARGET2(E) ((edge *)(E)->aux)[1]
159
 
160
/* Jump threading statistics.  */
161
 
162
struct thread_stats_d
163
{
164
  unsigned long num_threaded_edges;
165
};
166
 
167
struct thread_stats_d thread_stats;
168
 
169
 
170
/* Remove the last statement in block BB if it is a control statement
171
   Also remove all outgoing edges except the edge which reaches DEST_BB.
172
   If DEST_BB is NULL, then remove all outgoing edges.  */
173
 
174
static void
175
remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
176
{
177
  gimple_stmt_iterator gsi;
178
  edge e;
179
  edge_iterator ei;
180
 
181
  gsi = gsi_last_bb (bb);
182
 
183
  /* If the duplicate ends with a control statement, then remove it.
184
 
185
     Note that if we are duplicating the template block rather than the
186
     original basic block, then the duplicate might not have any real
187
     statements in it.  */
188
  if (!gsi_end_p (gsi)
189
      && gsi_stmt (gsi)
190
      && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
191
          || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
192
          || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
193
    gsi_remove (&gsi, true);
194
 
195
  for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
196
    {
197
      if (e->dest != dest_bb)
198
        remove_edge (e);
199
      else
200
        ei_next (&ei);
201
    }
202
}
203
 
204
/* Create a duplicate of BB.  Record the duplicate block in RD.  */
205
 
206
static void
207
create_block_for_threading (basic_block bb, struct redirection_data *rd)
208
{
209
  edge_iterator ei;
210
  edge e;
211
 
212
  /* We can use the generic block duplication code and simply remove
213
     the stuff we do not need.  */
214
  rd->dup_block = duplicate_block (bb, NULL, NULL);
215
 
216
  FOR_EACH_EDGE (e, ei, rd->dup_block->succs)
217
    e->aux = NULL;
218
 
219
  /* Zero out the profile, since the block is unreachable for now.  */
220
  rd->dup_block->frequency = 0;
221
  rd->dup_block->count = 0;
222
}
223
 
224
/* Hashing and equality routines for our hash table.  */
225
static hashval_t
226
redirection_data_hash (const void *p)
227
{
228
  edge e = ((const struct redirection_data *)p)->outgoing_edge;
229
  return e->dest->index;
230
}
231
 
232
static int
233
redirection_data_eq (const void *p1, const void *p2)
234
{
235
  edge e1 = ((const struct redirection_data *)p1)->outgoing_edge;
236
  edge e2 = ((const struct redirection_data *)p2)->outgoing_edge;
237
  edge e3 = ((const struct redirection_data *)p1)->intermediate_edge;
238
  edge e4 = ((const struct redirection_data *)p2)->intermediate_edge;
239
 
240
  return e1 == e2 && e3 == e4;
241
}
242
 
243
/* Given an outgoing edge E lookup and return its entry in our hash table.
244
 
245
   If INSERT is true, then we insert the entry into the hash table if
246
   it is not already present.  INCOMING_EDGE is added to the list of incoming
247
   edges associated with E in the hash table.  */
248
 
249
static struct redirection_data *
250
lookup_redirection_data (edge e, enum insert_option insert)
251
{
252
  void **slot;
253
  struct redirection_data *elt;
254
 
255
 /* Build a hash table element so we can see if E is already
256
     in the table.  */
257
  elt = XNEW (struct redirection_data);
258
  elt->intermediate_edge = THREAD_TARGET2 (e) ? THREAD_TARGET (e) : NULL;
259
  elt->outgoing_edge = THREAD_TARGET2 (e) ? THREAD_TARGET2 (e)
260
                                          : THREAD_TARGET (e);
261
  elt->dup_block = NULL;
262
  elt->incoming_edges = NULL;
263
 
264
  slot = htab_find_slot (redirection_data, elt, insert);
265
 
266
  /* This will only happen if INSERT is false and the entry is not
267
     in the hash table.  */
268
  if (slot == NULL)
269
    {
270
      free (elt);
271
      return NULL;
272
    }
273
 
274
  /* This will only happen if E was not in the hash table and
275
     INSERT is true.  */
276
  if (*slot == NULL)
277
    {
278
      *slot = (void *)elt;
279
      elt->incoming_edges = XNEW (struct el);
280
      elt->incoming_edges->e = e;
281
      elt->incoming_edges->next = NULL;
282
      return elt;
283
    }
284
  /* E was in the hash table.  */
285
  else
286
    {
287
      /* Free ELT as we do not need it anymore, we will extract the
288
         relevant entry from the hash table itself.  */
289
      free (elt);
290
 
291
      /* Get the entry stored in the hash table.  */
292
      elt = (struct redirection_data *) *slot;
293
 
294
      /* If insertion was requested, then we need to add INCOMING_EDGE
295
         to the list of incoming edges associated with E.  */
296
      if (insert)
297
        {
298
          struct el *el = XNEW (struct el);
299
          el->next = elt->incoming_edges;
300
          el->e = e;
301
          elt->incoming_edges = el;
302
        }
303
 
304
      return elt;
305
    }
306
}
307
 
308
/* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.  */
309
 
310
static void
311
copy_phi_args (basic_block bb, edge src_e, edge tgt_e)
312
{
313
  gimple_stmt_iterator gsi;
314
  int src_indx = src_e->dest_idx;
315
 
316
  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
317
    {
318
      gimple phi = gsi_stmt (gsi);
319
      source_location locus = gimple_phi_arg_location (phi, src_indx);
320
      add_phi_arg (phi, gimple_phi_arg_def (phi, src_indx), tgt_e, locus);
321
    }
322
}
323
 
324
/* We have recently made a copy of ORIG_BB, including its outgoing
325
   edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
326
   ORIG_BB has a new argument associated with edge from NEW_BB to the
327
   successor.  Initialize the PHI argument so that it is equal to the PHI
328
   argument associated with the edge from ORIG_BB to the successor.  */
329
 
330
static void
331
update_destination_phis (basic_block orig_bb, basic_block new_bb)
332
{
333
  edge_iterator ei;
334
  edge e;
335
 
336
  FOR_EACH_EDGE (e, ei, orig_bb->succs)
337
    {
338
      edge e2 = find_edge (new_bb, e->dest);
339
      copy_phi_args (e->dest, e, e2);
340
    }
341
}
342
 
343
/* Given a duplicate block and its single destination (both stored
344
   in RD).  Create an edge between the duplicate and its single
345
   destination.
346
 
347
   Add an additional argument to any PHI nodes at the single
348
   destination.  */
349
 
350
static void
351
create_edge_and_update_destination_phis (struct redirection_data *rd,
352
                                         basic_block bb)
353
{
354
  edge e = make_edge (bb, rd->outgoing_edge->dest, EDGE_FALLTHRU);
355
 
356
  rescan_loop_exit (e, true, false);
357
  e->probability = REG_BR_PROB_BASE;
358
  e->count = bb->count;
359
 
360
  if (rd->outgoing_edge->aux)
361
    {
362
      e->aux = (edge *) XNEWVEC (edge, 2);
363
      THREAD_TARGET(e) = THREAD_TARGET (rd->outgoing_edge);
364
      THREAD_TARGET2(e) = THREAD_TARGET2 (rd->outgoing_edge);
365
    }
366
  else
367
    {
368
      e->aux = NULL;
369
    }
370
 
371
  /* If there are any PHI nodes at the destination of the outgoing edge
372
     from the duplicate block, then we will need to add a new argument
373
     to them.  The argument should have the same value as the argument
374
     associated with the outgoing edge stored in RD.  */
375
  copy_phi_args (e->dest, rd->outgoing_edge, e);
376
}
377
 
378
/* Wire up the outgoing edges from the duplicate block and
379
   update any PHIs as needed.  */
380
static void
381
fix_duplicate_block_edges (struct redirection_data *rd,
382
                           struct local_info *local_info)
383
{
384
  /* If we were threading through an joiner block, then we want
385
     to keep its control statement and redirect an outgoing edge.
386
     Else we want to remove the control statement & edges, then create
387
     a new outgoing edge.  In both cases we may need to update PHIs.  */
388
  if (THREAD_TARGET2 (rd->incoming_edges->e))
389
    {
390
      edge victim;
391
      edge e2;
392
      edge e = rd->incoming_edges->e;
393
 
394
      /* This updates the PHIs at the destination of the duplicate
395
         block.  */
396
      update_destination_phis (local_info->bb, rd->dup_block);
397
 
398
      /* Find the edge from the duplicate block to the block we're
399
         threading through.  That's the edge we want to redirect.  */
400
      victim = find_edge (rd->dup_block, THREAD_TARGET (e)->dest);
401
      e2 = redirect_edge_and_branch (victim, THREAD_TARGET2 (e)->dest);
402
 
403
      /* If we redirected the edge, then we need to copy PHI arguments
404
         at the target.  If the edge already existed (e2 != victim case),
405
         then the PHIs in the target already have the correct arguments.  */
406
      if (e2 == victim)
407
        copy_phi_args (e2->dest, THREAD_TARGET2 (e), e2);
408
    }
409
  else
410
    {
411
      remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL);
412
      create_edge_and_update_destination_phis (rd, rd->dup_block);
413
    }
414
}
415
/* Hash table traversal callback routine to create duplicate blocks.  */
416
 
417
static int
418
create_duplicates (void **slot, void *data)
419
{
420
  struct redirection_data *rd = (struct redirection_data *) *slot;
421
  struct local_info *local_info = (struct local_info *)data;
422
 
423
  /* Create a template block if we have not done so already.  Otherwise
424
     use the template to create a new block.  */
425
  if (local_info->template_block == NULL)
426
    {
427
      create_block_for_threading (local_info->bb, rd);
428
      local_info->template_block = rd->dup_block;
429
 
430
      /* We do not create any outgoing edges for the template.  We will
431
         take care of that in a later traversal.  That way we do not
432
         create edges that are going to just be deleted.  */
433
    }
434
  else
435
    {
436
      create_block_for_threading (local_info->template_block, rd);
437
 
438
      /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
439
         block.   */
440
      fix_duplicate_block_edges (rd, local_info);
441
    }
442
 
443
  /* Keep walking the hash table.  */
444
  return 1;
445
}
446
 
447
/* We did not create any outgoing edges for the template block during
448
   block creation.  This hash table traversal callback creates the
449
   outgoing edge for the template block.  */
450
 
451
static int
452
fixup_template_block (void **slot, void *data)
453
{
454
  struct redirection_data *rd = (struct redirection_data *) *slot;
455
  struct local_info *local_info = (struct local_info *)data;
456
 
457
  /* If this is the template block halt the traversal after updating
458
     it appropriately.
459
 
460
     If we were threading through an joiner block, then we want
461
     to keep its control statement and redirect an outgoing edge.
462
     Else we want to remove the control statement & edges, then create
463
     a new outgoing edge.  In both cases we may need to update PHIs.  */
464
  if (rd->dup_block && rd->dup_block == local_info->template_block)
465
    {
466
      fix_duplicate_block_edges (rd, local_info);
467
      return 0;
468
    }
469
 
470
  return 1;
471
}
472
 
473
/* Hash table traversal callback to redirect each incoming edge
474
   associated with this hash table element to its new destination.  */
475
 
476
static int
477
redirect_edges (void **slot, void *data)
478
{
479
  struct redirection_data *rd = (struct redirection_data *) *slot;
480
  struct local_info *local_info = (struct local_info *)data;
481
  struct el *next, *el;
482
 
483
  /* Walk over all the incoming edges associated associated with this
484
     hash table entry.  */
485
  for (el = rd->incoming_edges; el; el = next)
486
    {
487
      edge e = el->e;
488
 
489
      /* Go ahead and free this element from the list.  Doing this now
490
         avoids the need for another list walk when we destroy the hash
491
         table.  */
492
      next = el->next;
493
      free (el);
494
 
495
      thread_stats.num_threaded_edges++;
496
      /* If we are threading through a joiner block, then we have to
497
         find the edge we want to redirect and update some PHI nodes.  */
498
      if (THREAD_TARGET2 (e))
499
        {
500
          edge e2;
501
 
502
          /* We want to redirect the incoming edge to the joiner block (E)
503
             to instead reach the duplicate of the joiner block.  */
504
          e2 = redirect_edge_and_branch (e, rd->dup_block);
505
          flush_pending_stmts (e2);
506
        }
507
      else if (rd->dup_block)
508
        {
509
          edge e2;
510
 
511
          if (dump_file && (dump_flags & TDF_DETAILS))
512
            fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
513
                     e->src->index, e->dest->index, rd->dup_block->index);
514
 
515
          rd->dup_block->count += e->count;
516
 
517
          /* Excessive jump threading may make frequencies large enough so
518
             the computation overflows.  */
519
          if (rd->dup_block->frequency < BB_FREQ_MAX * 2)
520
            rd->dup_block->frequency += EDGE_FREQUENCY (e);
521
          EDGE_SUCC (rd->dup_block, 0)->count += e->count;
522
          /* Redirect the incoming edge to the appropriate duplicate
523
             block.  */
524
          e2 = redirect_edge_and_branch (e, rd->dup_block);
525
          gcc_assert (e == e2);
526
          flush_pending_stmts (e2);
527
        }
528
 
529
      /* Go ahead and clear E->aux.  It's not needed anymore and failure
530
         to clear it will cause all kinds of unpleasant problems later.  */
531
      free (e->aux);
532
      e->aux = NULL;
533
 
534
    }
535
 
536
  /* Indicate that we actually threaded one or more jumps.  */
537
  if (rd->incoming_edges)
538
    local_info->jumps_threaded = true;
539
 
540
  return 1;
541
}
542
 
543
/* Return true if this block has no executable statements other than
544
   a simple ctrl flow instruction.  When the number of outgoing edges
545
   is one, this is equivalent to a "forwarder" block.  */
546
 
547
static bool
548
redirection_block_p (basic_block bb)
549
{
550
  gimple_stmt_iterator gsi;
551
 
552
  /* Advance to the first executable statement.  */
553
  gsi = gsi_start_bb (bb);
554
  while (!gsi_end_p (gsi)
555
         && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
556
             || is_gimple_debug (gsi_stmt (gsi))
557
             || gimple_nop_p (gsi_stmt (gsi))))
558
    gsi_next (&gsi);
559
 
560
  /* Check if this is an empty block.  */
561
  if (gsi_end_p (gsi))
562
    return true;
563
 
564
  /* Test that we've reached the terminating control statement.  */
565
  return gsi_stmt (gsi)
566
         && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
567
             || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
568
             || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
569
}
570
 
571
/* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
572
   is reached via one or more specific incoming edges, we know which
573
   outgoing edge from BB will be traversed.
574
 
575
   We want to redirect those incoming edges to the target of the
576
   appropriate outgoing edge.  Doing so avoids a conditional branch
577
   and may expose new optimization opportunities.  Note that we have
578
   to update dominator tree and SSA graph after such changes.
579
 
580
   The key to keeping the SSA graph update manageable is to duplicate
581
   the side effects occurring in BB so that those side effects still
582
   occur on the paths which bypass BB after redirecting edges.
583
 
584
   We accomplish this by creating duplicates of BB and arranging for
585
   the duplicates to unconditionally pass control to one specific
586
   successor of BB.  We then revector the incoming edges into BB to
587
   the appropriate duplicate of BB.
588
 
589
   If NOLOOP_ONLY is true, we only perform the threading as long as it
590
   does not affect the structure of the loops in a nontrivial way.  */
591
 
592
static bool
593
thread_block (basic_block bb, bool noloop_only)
594
{
595
  /* E is an incoming edge into BB that we may or may not want to
596
     redirect to a duplicate of BB.  */
597
  edge e, e2;
598
  edge_iterator ei;
599
  struct local_info local_info;
600
  struct loop *loop = bb->loop_father;
601
 
602
  /* To avoid scanning a linear array for the element we need we instead
603
     use a hash table.  For normal code there should be no noticeable
604
     difference.  However, if we have a block with a large number of
605
     incoming and outgoing edges such linear searches can get expensive.  */
606
  redirection_data = htab_create (EDGE_COUNT (bb->succs),
607
                                  redirection_data_hash,
608
                                  redirection_data_eq,
609
                                  free);
610
 
611
  /* If we thread the latch of the loop to its exit, the loop ceases to
612
     exist.  Make sure we do not restrict ourselves in order to preserve
613
     this loop.  */
614
  if (loop->header == bb)
615
    {
616
      e = loop_latch_edge (loop);
617
 
618
      if (e->aux)
619
        e2 = THREAD_TARGET (e);
620
      else
621
        e2 = NULL;
622
 
623
      if (e2 && loop_exit_edge_p (loop, e2))
624
        {
625
          loop->header = NULL;
626
          loop->latch = NULL;
627
        }
628
    }
629
 
630
  /* Record each unique threaded destination into a hash table for
631
     efficient lookups.  */
632
  FOR_EACH_EDGE (e, ei, bb->preds)
633
    {
634
      if (e->aux == NULL)
635
        continue;
636
 
637
      if (THREAD_TARGET2 (e))
638
        e2 = THREAD_TARGET2 (e);
639
      else
640
        e2 = THREAD_TARGET (e);
641
 
642
      if (!e2
643
          /* If NOLOOP_ONLY is true, we only allow threading through the
644
             header of a loop to exit edges.  */
645
          || (noloop_only
646
              && bb == bb->loop_father->header
647
              && (!loop_exit_edge_p (bb->loop_father, e2)
648
                  || THREAD_TARGET2 (e))))
649
        continue;
650
 
651
      if (e->dest == e2->src)
652
        update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
653
                                         e->count, THREAD_TARGET (e));
654
 
655
      /* Insert the outgoing edge into the hash table if it is not
656
         already in the hash table.  */
657
      lookup_redirection_data (e, INSERT);
658
    }
659
 
660
  /* We do not update dominance info.  */
661
  free_dominance_info (CDI_DOMINATORS);
662
 
663
  /* Now create duplicates of BB.
664
 
665
     Note that for a block with a high outgoing degree we can waste
666
     a lot of time and memory creating and destroying useless edges.
667
 
668
     So we first duplicate BB and remove the control structure at the
669
     tail of the duplicate as well as all outgoing edges from the
670
     duplicate.  We then use that duplicate block as a template for
671
     the rest of the duplicates.  */
672
  local_info.template_block = NULL;
673
  local_info.bb = bb;
674
  local_info.jumps_threaded = false;
675
  htab_traverse (redirection_data, create_duplicates, &local_info);
676
 
677
  /* The template does not have an outgoing edge.  Create that outgoing
678
     edge and update PHI nodes as the edge's target as necessary.
679
 
680
     We do this after creating all the duplicates to avoid creating
681
     unnecessary edges.  */
682
  htab_traverse (redirection_data, fixup_template_block, &local_info);
683
 
684
  /* The hash table traversals above created the duplicate blocks (and the
685
     statements within the duplicate blocks).  This loop creates PHI nodes for
686
     the duplicated blocks and redirects the incoming edges into BB to reach
687
     the duplicates of BB.  */
688
  htab_traverse (redirection_data, redirect_edges, &local_info);
689
 
690
  /* Done with this block.  Clear REDIRECTION_DATA.  */
691
  htab_delete (redirection_data);
692
  redirection_data = NULL;
693
 
694
  /* Indicate to our caller whether or not any jumps were threaded.  */
695
  return local_info.jumps_threaded;
696
}
697
 
698
/* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
699
   copy of E->dest created during threading, or E->dest if it was not necessary
700
   to copy it (E is its single predecessor).  */
701
 
702
static basic_block
703
thread_single_edge (edge e)
704
{
705
  basic_block bb = e->dest;
706
  edge eto = THREAD_TARGET (e);
707
  struct redirection_data rd;
708
 
709
  free (e->aux);
710
  e->aux = NULL;
711
 
712
  thread_stats.num_threaded_edges++;
713
 
714
  if (single_pred_p (bb))
715
    {
716
      /* If BB has just a single predecessor, we should only remove the
717
         control statements at its end, and successors except for ETO.  */
718
      remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
719
 
720
      /* And fixup the flags on the single remaining edge.  */
721
      eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
722
      eto->flags |= EDGE_FALLTHRU;
723
 
724
      return bb;
725
    }
726
 
727
  /* Otherwise, we need to create a copy.  */
728
  if (e->dest == eto->src)
729
    update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
730
 
731
  rd.outgoing_edge = eto;
732
 
733
  create_block_for_threading (bb, &rd);
734
  remove_ctrl_stmt_and_useless_edges (rd.dup_block, NULL);
735
  create_edge_and_update_destination_phis (&rd, rd.dup_block);
736
 
737
  if (dump_file && (dump_flags & TDF_DETAILS))
738
    fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
739
             e->src->index, e->dest->index, rd.dup_block->index);
740
 
741
  rd.dup_block->count = e->count;
742
  rd.dup_block->frequency = EDGE_FREQUENCY (e);
743
  single_succ_edge (rd.dup_block)->count = e->count;
744
  redirect_edge_and_branch (e, rd.dup_block);
745
  flush_pending_stmts (e);
746
 
747
  return rd.dup_block;
748
}
749
 
750
/* Callback for dfs_enumerate_from.  Returns true if BB is different
751
   from STOP and DBDS_CE_STOP.  */
752
 
753
static basic_block dbds_ce_stop;
754
static bool
755
dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
756
{
757
  return (bb != (const_basic_block) stop
758
          && bb != dbds_ce_stop);
759
}
760
 
761
/* Evaluates the dominance relationship of latch of the LOOP and BB, and
762
   returns the state.  */
763
 
764
enum bb_dom_status
765
{
766
  /* BB does not dominate latch of the LOOP.  */
767
  DOMST_NONDOMINATING,
768
  /* The LOOP is broken (there is no path from the header to its latch.  */
769
  DOMST_LOOP_BROKEN,
770
  /* BB dominates the latch of the LOOP.  */
771
  DOMST_DOMINATING
772
};
773
 
774
static enum bb_dom_status
775
determine_bb_domination_status (struct loop *loop, basic_block bb)
776
{
777
  basic_block *bblocks;
778
  unsigned nblocks, i;
779
  bool bb_reachable = false;
780
  edge_iterator ei;
781
  edge e;
782
 
783
  /* This function assumes BB is a successor of LOOP->header.
784
     If that is not the case return DOMST_NONDOMINATING which
785
     is always safe.  */
786
    {
787
      bool ok = false;
788
 
789
      FOR_EACH_EDGE (e, ei, bb->preds)
790
        {
791
          if (e->src == loop->header)
792
            {
793
              ok = true;
794
              break;
795
            }
796
        }
797
 
798
      if (!ok)
799
        return DOMST_NONDOMINATING;
800
    }
801
 
802
  if (bb == loop->latch)
803
    return DOMST_DOMINATING;
804
 
805
  /* Check that BB dominates LOOP->latch, and that it is back-reachable
806
     from it.  */
807
 
808
  bblocks = XCNEWVEC (basic_block, loop->num_nodes);
809
  dbds_ce_stop = loop->header;
810
  nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
811
                                bblocks, loop->num_nodes, bb);
812
  for (i = 0; i < nblocks; i++)
813
    FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
814
      {
815
        if (e->src == loop->header)
816
          {
817
            free (bblocks);
818
            return DOMST_NONDOMINATING;
819
          }
820
        if (e->src == bb)
821
          bb_reachable = true;
822
      }
823
 
824
  free (bblocks);
825
  return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
826
}
827
 
828
/* Thread jumps through the header of LOOP.  Returns true if cfg changes.
829
   If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
830
   to the inside of the loop.  */
831
 
832
static bool
833
thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
834
{
835
  basic_block header = loop->header;
836
  edge e, tgt_edge, latch = loop_latch_edge (loop);
837
  edge_iterator ei;
838
  basic_block tgt_bb, atgt_bb;
839
  enum bb_dom_status domst;
840
 
841
  /* We have already threaded through headers to exits, so all the threading
842
     requests now are to the inside of the loop.  We need to avoid creating
843
     irreducible regions (i.e., loops with more than one entry block), and
844
     also loop with several latch edges, or new subloops of the loop (although
845
     there are cases where it might be appropriate, it is difficult to decide,
846
     and doing it wrongly may confuse other optimizers).
847
 
848
     We could handle more general cases here.  However, the intention is to
849
     preserve some information about the loop, which is impossible if its
850
     structure changes significantly, in a way that is not well understood.
851
     Thus we only handle few important special cases, in which also updating
852
     of the loop-carried information should be feasible:
853
 
854
     1) Propagation of latch edge to a block that dominates the latch block
855
        of a loop.  This aims to handle the following idiom:
856
 
857
        first = 1;
858
        while (1)
859
          {
860
            if (first)
861
              initialize;
862
            first = 0;
863
            body;
864
          }
865
 
866
        After threading the latch edge, this becomes
867
 
868
        first = 1;
869
        if (first)
870
          initialize;
871
        while (1)
872
          {
873
            first = 0;
874
            body;
875
          }
876
 
877
        The original header of the loop is moved out of it, and we may thread
878
        the remaining edges through it without further constraints.
879
 
880
     2) All entry edges are propagated to a single basic block that dominates
881
        the latch block of the loop.  This aims to handle the following idiom
882
        (normally created for "for" loops):
883
 
884
        i = 0;
885
        while (1)
886
          {
887
            if (i >= 100)
888
              break;
889
            body;
890
            i++;
891
          }
892
 
893
        This becomes
894
 
895
        i = 0;
896
        while (1)
897
          {
898
            body;
899
            i++;
900
            if (i >= 100)
901
              break;
902
          }
903
     */
904
 
905
  /* Threading through the header won't improve the code if the header has just
906
     one successor.  */
907
  if (single_succ_p (header))
908
    goto fail;
909
 
910
  if (latch->aux)
911
    {
912
      if (THREAD_TARGET2 (latch))
913
        goto fail;
914
      tgt_edge = THREAD_TARGET (latch);
915
      tgt_bb = tgt_edge->dest;
916
    }
917
  else if (!may_peel_loop_headers
918
           && !redirection_block_p (loop->header))
919
    goto fail;
920
  else
921
    {
922
      tgt_bb = NULL;
923
      tgt_edge = NULL;
924
      FOR_EACH_EDGE (e, ei, header->preds)
925
        {
926
          if (!e->aux)
927
            {
928
              if (e == latch)
929
                continue;
930
 
931
              /* If latch is not threaded, and there is a header
932
                 edge that is not threaded, we would create loop
933
                 with multiple entries.  */
934
              goto fail;
935
            }
936
 
937
          if (THREAD_TARGET2 (e))
938
            goto fail;
939
          tgt_edge = THREAD_TARGET (e);
940
          atgt_bb = tgt_edge->dest;
941
          if (!tgt_bb)
942
            tgt_bb = atgt_bb;
943
          /* Two targets of threading would make us create loop
944
             with multiple entries.  */
945
          else if (tgt_bb != atgt_bb)
946
            goto fail;
947
        }
948
 
949
      if (!tgt_bb)
950
        {
951
          /* There are no threading requests.  */
952
          return false;
953
        }
954
 
955
      /* Redirecting to empty loop latch is useless.  */
956
      if (tgt_bb == loop->latch
957
          && empty_block_p (loop->latch))
958
        goto fail;
959
    }
960
 
961
  /* The target block must dominate the loop latch, otherwise we would be
962
     creating a subloop.  */
963
  domst = determine_bb_domination_status (loop, tgt_bb);
964
  if (domst == DOMST_NONDOMINATING)
965
    goto fail;
966
  if (domst == DOMST_LOOP_BROKEN)
967
    {
968
      /* If the loop ceased to exist, mark it as such, and thread through its
969
         original header.  */
970
      loop->header = NULL;
971
      loop->latch = NULL;
972
      return thread_block (header, false);
973
    }
974
 
975
  if (tgt_bb->loop_father->header == tgt_bb)
976
    {
977
      /* If the target of the threading is a header of a subloop, we need
978
         to create a preheader for it, so that the headers of the two loops
979
         do not merge.  */
980
      if (EDGE_COUNT (tgt_bb->preds) > 2)
981
        {
982
          tgt_bb = create_preheader (tgt_bb->loop_father, 0);
983
          gcc_assert (tgt_bb != NULL);
984
        }
985
      else
986
        tgt_bb = split_edge (tgt_edge);
987
    }
988
 
989
  if (latch->aux)
990
    {
991
      /* First handle the case latch edge is redirected.  */
992
      loop->latch = thread_single_edge (latch);
993
      gcc_assert (single_succ (loop->latch) == tgt_bb);
994
      loop->header = tgt_bb;
995
 
996
      /* Thread the remaining edges through the former header.  */
997
      thread_block (header, false);
998
    }
999
  else
1000
    {
1001
      basic_block new_preheader;
1002
 
1003
      /* Now consider the case entry edges are redirected to the new entry
1004
         block.  Remember one entry edge, so that we can find the new
1005
         preheader (its destination after threading).  */
1006
      FOR_EACH_EDGE (e, ei, header->preds)
1007
        {
1008
          if (e->aux)
1009
            break;
1010
        }
1011
 
1012
      /* The duplicate of the header is the new preheader of the loop.  Ensure
1013
         that it is placed correctly in the loop hierarchy.  */
1014
      set_loop_copy (loop, loop_outer (loop));
1015
 
1016
      thread_block (header, false);
1017
      set_loop_copy (loop, NULL);
1018
      new_preheader = e->dest;
1019
 
1020
      /* Create the new latch block.  This is always necessary, as the latch
1021
         must have only a single successor, but the original header had at
1022
         least two successors.  */
1023
      loop->latch = NULL;
1024
      mfb_kj_edge = single_succ_edge (new_preheader);
1025
      loop->header = mfb_kj_edge->dest;
1026
      latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1027
      loop->header = latch->dest;
1028
      loop->latch = latch->src;
1029
    }
1030
 
1031
  return true;
1032
 
1033
fail:
1034
  /* We failed to thread anything.  Cancel the requests.  */
1035
  FOR_EACH_EDGE (e, ei, header->preds)
1036
    {
1037
      free (e->aux);
1038
      e->aux = NULL;
1039
    }
1040
  return false;
1041
}
1042
 
1043
/* Walk through the registered jump threads and convert them into a
1044
   form convenient for this pass.
1045
 
1046
   Any block which has incoming edges threaded to outgoing edges
1047
   will have its entry in THREADED_BLOCK set.
1048
 
1049
   Any threaded edge will have its new outgoing edge stored in the
1050
   original edge's AUX field.
1051
 
1052
   This form avoids the need to walk all the edges in the CFG to
1053
   discover blocks which need processing and avoids unnecessary
1054
   hash table lookups to map from threaded edge to new target.  */
1055
 
1056
static void
1057
mark_threaded_blocks (bitmap threaded_blocks)
1058
{
1059
  unsigned int i;
1060
  bitmap_iterator bi;
1061
  bitmap tmp = BITMAP_ALLOC (NULL);
1062
  basic_block bb;
1063
  edge e;
1064
  edge_iterator ei;
1065
 
1066
  for (i = 0; i < VEC_length (edge, threaded_edges); i += 3)
1067
    {
1068
      edge e = VEC_index (edge, threaded_edges, i);
1069
      edge *x = (edge *) XNEWVEC (edge, 2);
1070
 
1071
      e->aux = x;
1072
      THREAD_TARGET (e) = VEC_index (edge, threaded_edges, i + 1);
1073
      THREAD_TARGET2 (e) = VEC_index (edge, threaded_edges, i + 2);
1074
      bitmap_set_bit (tmp, e->dest->index);
1075
    }
1076
 
1077
  /* If optimizing for size, only thread through block if we don't have
1078
     to duplicate it or it's an otherwise empty redirection block.  */
1079
  if (optimize_function_for_size_p (cfun))
1080
    {
1081
      EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1082
        {
1083
          bb = BASIC_BLOCK (i);
1084
          if (EDGE_COUNT (bb->preds) > 1
1085
              && !redirection_block_p (bb))
1086
            {
1087
              FOR_EACH_EDGE (e, ei, bb->preds)
1088
                {
1089
                  free (e->aux);
1090
                  e->aux = NULL;
1091
                }
1092
            }
1093
          else
1094
            bitmap_set_bit (threaded_blocks, i);
1095
        }
1096
    }
1097
  else
1098
    bitmap_copy (threaded_blocks, tmp);
1099
 
1100
  BITMAP_FREE(tmp);
1101
}
1102
 
1103
 
1104
/* Walk through all blocks and thread incoming edges to the appropriate
1105
   outgoing edge for each edge pair recorded in THREADED_EDGES.
1106
 
1107
   It is the caller's responsibility to fix the dominance information
1108
   and rewrite duplicated SSA_NAMEs back into SSA form.
1109
 
1110
   If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
1111
   loop headers if it does not simplify the loop.
1112
 
1113
   Returns true if one or more edges were threaded, false otherwise.  */
1114
 
1115
bool
1116
thread_through_all_blocks (bool may_peel_loop_headers)
1117
{
1118
  bool retval = false;
1119
  unsigned int i;
1120
  bitmap_iterator bi;
1121
  bitmap threaded_blocks;
1122
  struct loop *loop;
1123
  loop_iterator li;
1124
 
1125
  /* We must know about loops in order to preserve them.  */
1126
  gcc_assert (current_loops != NULL);
1127
 
1128
  if (threaded_edges == NULL)
1129
    return false;
1130
 
1131
  threaded_blocks = BITMAP_ALLOC (NULL);
1132
  memset (&thread_stats, 0, sizeof (thread_stats));
1133
 
1134
  mark_threaded_blocks (threaded_blocks);
1135
 
1136
  initialize_original_copy_tables ();
1137
 
1138
  /* First perform the threading requests that do not affect
1139
     loop structure.  */
1140
  EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
1141
    {
1142
      basic_block bb = BASIC_BLOCK (i);
1143
 
1144
      if (EDGE_COUNT (bb->preds) > 0)
1145
        retval |= thread_block (bb, true);
1146
    }
1147
 
1148
  /* Then perform the threading through loop headers.  We start with the
1149
     innermost loop, so that the changes in cfg we perform won't affect
1150
     further threading.  */
1151
  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1152
    {
1153
      if (!loop->header
1154
          || !bitmap_bit_p (threaded_blocks, loop->header->index))
1155
        continue;
1156
 
1157
      retval |= thread_through_loop_header (loop, may_peel_loop_headers);
1158
    }
1159
 
1160
  statistics_counter_event (cfun, "Jumps threaded",
1161
                            thread_stats.num_threaded_edges);
1162
 
1163
  free_original_copy_tables ();
1164
 
1165
  BITMAP_FREE (threaded_blocks);
1166
  threaded_blocks = NULL;
1167
  VEC_free (edge, heap, threaded_edges);
1168
  threaded_edges = NULL;
1169
 
1170
  if (retval)
1171
    loops_state_set (LOOPS_NEED_FIXUP);
1172
 
1173
  return retval;
1174
}
1175
 
1176
/* Register a jump threading opportunity.  We queue up all the jump
1177
   threading opportunities discovered by a pass and update the CFG
1178
   and SSA form all at once.
1179
 
1180
   E is the edge we can thread, E2 is the new target edge, i.e., we
1181
   are effectively recording that E->dest can be changed to E2->dest
1182
   after fixing the SSA graph.  */
1183
 
1184
void
1185
register_jump_thread (edge e, edge e2, edge e3)
1186
{
1187
  /* This can occur if we're jumping to a constant address or
1188
     or something similar.  Just get out now.  */
1189
  if (e2 == NULL)
1190
    return;
1191
 
1192
  if (threaded_edges == NULL)
1193
    threaded_edges = VEC_alloc (edge, heap, 15);
1194
 
1195
  if (dump_file && (dump_flags & TDF_DETAILS)
1196
      && e->dest != e2->src)
1197
    fprintf (dump_file,
1198
             "  Registering jump thread around one or more intermediate blocks\n");
1199
 
1200
  VEC_safe_push (edge, heap, threaded_edges, e);
1201
  VEC_safe_push (edge, heap, threaded_edges, e2);
1202
  VEC_safe_push (edge, heap, threaded_edges, e3);
1203
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.