OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [gcc/] [bb-reorder.c] - Blame information for rev 827

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Basic block reordering routines for the GNU compiler.
2
   Copyright (C) 2000, 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
3
 
4
   This file is part of GCC.
5
 
6
   GCC is free software; you can redistribute it and/or modify it
7
   under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
 
11
   GCC is distributed in the hope that it will be useful, but WITHOUT
12
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14
   License for more details.
15
 
16
   You should have received a copy of the GNU General Public License
17
   along with GCC; see the file COPYING3.  If not see
18
   <http://www.gnu.org/licenses/>.  */
19
 
20
/* This (greedy) algorithm constructs traces in several rounds.
21
   The construction starts from "seeds".  The seed for the first round
22
   is the entry point of function.  When there are more than one seed
23
   that one is selected first that has the lowest key in the heap
24
   (see function bb_to_key).  Then the algorithm repeatedly adds the most
25
   probable successor to the end of a trace.  Finally it connects the traces.
26
 
27
   There are two parameters: Branch Threshold and Exec Threshold.
28
   If the edge to a successor of the actual basic block is lower than
29
   Branch Threshold or the frequency of the successor is lower than
30
   Exec Threshold the successor will be the seed in one of the next rounds.
31
   Each round has these parameters lower than the previous one.
32
   The last round has to have these parameters set to zero
33
   so that the remaining blocks are picked up.
34
 
35
   The algorithm selects the most probable successor from all unvisited
36
   successors and successors that have been added to this trace.
37
   The other successors (that has not been "sent" to the next round) will be
38
   other seeds for this round and the secondary traces will start in them.
39
   If the successor has not been visited in this trace it is added to the trace
40
   (however, there is some heuristic for simple branches).
41
   If the successor has been visited in this trace the loop has been found.
42
   If the loop has many iterations the loop is rotated so that the
43
   source block of the most probable edge going out from the loop
44
   is the last block of the trace.
45
   If the loop has few iterations and there is no edge from the last block of
46
   the loop going out from loop the loop header is duplicated.
47
   Finally, the construction of the trace is terminated.
48
 
49
   When connecting traces it first checks whether there is an edge from the
50
   last block of one trace to the first block of another trace.
51
   When there are still some unconnected traces it checks whether there exists
52
   a basic block BB such that BB is a successor of the last bb of one trace
53
   and BB is a predecessor of the first block of another trace. In this case,
54
   BB is duplicated and the traces are connected through this duplicate.
55
   The rest of traces are simply connected so there will be a jump to the
56
   beginning of the rest of trace.
57
 
58
 
59
   References:
60
 
61
   "Software Trace Cache"
62
   A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
63
   http://citeseer.nj.nec.com/15361.html
64
 
65
*/
66
 
67
#include "config.h"
68
#include "system.h"
69
#include "coretypes.h"
70
#include "tm.h"
71
#include "rtl.h"
72
#include "regs.h"
73
#include "flags.h"
74
#include "timevar.h"
75
#include "output.h"
76
#include "cfglayout.h"
77
#include "fibheap.h"
78
#include "target.h"
79
#include "function.h"
80
#include "tm_p.h"
81
#include "obstack.h"
82
#include "expr.h"
83
#include "params.h"
84
#include "toplev.h"
85
#include "tree-pass.h"
86
 
87
#ifndef HAVE_conditional_execution
88
#define HAVE_conditional_execution 0
89
#endif
90
 
91
/* The number of rounds.  In most cases there will only be 4 rounds, but
92
   when partitioning hot and cold basic blocks into separate sections of
93
   the .o file there will be an extra round.*/
94
#define N_ROUNDS 5
95
 
96
/* Stubs in case we don't have a return insn.
97
   We have to check at runtime too, not only compiletime.  */
98
 
99
#ifndef HAVE_return
100
#define HAVE_return 0
101
#define gen_return() NULL_RTX
102
#endif
103
 
104
 
105
/* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
106
static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
107
 
108
/* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
109
static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
110
 
111
/* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
112
   block the edge destination is not duplicated while connecting traces.  */
113
#define DUPLICATION_THRESHOLD 100
114
 
115
/* Length of unconditional jump instruction.  */
116
static int uncond_jump_length;
117
 
118
/* Structure to hold needed information for each basic block.  */
119
typedef struct bbro_basic_block_data_def
120
{
121
  /* Which trace is the bb start of (-1 means it is not a start of a trace).  */
122
  int start_of_trace;
123
 
124
  /* Which trace is the bb end of (-1 means it is not an end of a trace).  */
125
  int end_of_trace;
126
 
127
  /* Which trace is the bb in?  */
128
  int in_trace;
129
 
130
  /* Which heap is BB in (if any)?  */
131
  fibheap_t heap;
132
 
133
  /* Which heap node is BB in (if any)?  */
134
  fibnode_t node;
135
} bbro_basic_block_data;
136
 
137
/* The current size of the following dynamic array.  */
138
static int array_size;
139
 
140
/* The array which holds needed information for basic blocks.  */
141
static bbro_basic_block_data *bbd;
142
 
143
/* To avoid frequent reallocation the size of arrays is greater than needed,
144
   the number of elements is (not less than) 1.25 * size_wanted.  */
145
#define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
146
 
147
/* Free the memory and set the pointer to NULL.  */
148
#define FREE(P) (gcc_assert (P), free (P), P = 0)
149
 
150
/* Structure for holding information about a trace.  */
151
struct trace
152
{
153
  /* First and last basic block of the trace.  */
154
  basic_block first, last;
155
 
156
  /* The round of the STC creation which this trace was found in.  */
157
  int round;
158
 
159
  /* The length (i.e. the number of basic blocks) of the trace.  */
160
  int length;
161
};
162
 
163
/* Maximum frequency and count of one of the entry blocks.  */
164
static int max_entry_frequency;
165
static gcov_type max_entry_count;
166
 
167
/* Local function prototypes.  */
168
static void find_traces (int *, struct trace *);
169
static basic_block rotate_loop (edge, struct trace *, int);
170
static void mark_bb_visited (basic_block, int);
171
static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
172
                                 int, fibheap_t *, int);
173
static basic_block copy_bb (basic_block, edge, basic_block, int);
174
static fibheapkey_t bb_to_key (basic_block);
175
static bool better_edge_p (basic_block, edge, int, int, int, int, edge);
176
static void connect_traces (int, struct trace *);
177
static bool copy_bb_p (basic_block, int);
178
static int get_uncond_jump_length (void);
179
static bool push_to_next_round_p (basic_block, int, int, int, gcov_type);
180
static void find_rarely_executed_basic_blocks_and_crossing_edges (edge *,
181
                                                                  int *,
182
                                                                  int *);
183
static void add_labels_and_missing_jumps (edge *, int);
184
static void add_reg_crossing_jump_notes (void);
185
static void fix_up_fall_thru_edges (void);
186
static void fix_edges_for_rarely_executed_code (edge *, int);
187
static void fix_crossing_conditional_branches (void);
188
static void fix_crossing_unconditional_branches (void);
189
 
190
/* Check to see if bb should be pushed into the next round of trace
191
   collections or not.  Reasons for pushing the block forward are 1).
192
   If the block is cold, we are doing partitioning, and there will be
193
   another round (cold partition blocks are not supposed to be
194
   collected into traces until the very last round); or 2). There will
195
   be another round, and the basic block is not "hot enough" for the
196
   current round of trace collection.  */
197
 
198
static bool
199
push_to_next_round_p (basic_block bb, int round, int number_of_rounds,
200
                      int exec_th, gcov_type count_th)
201
{
202
  bool there_exists_another_round;
203
  bool block_not_hot_enough;
204
 
205
  there_exists_another_round = round < number_of_rounds - 1;
206
 
207
  block_not_hot_enough = (bb->frequency < exec_th
208
                          || bb->count < count_th
209
                          || probably_never_executed_bb_p (bb));
210
 
211
  if (there_exists_another_round
212
      && block_not_hot_enough)
213
    return true;
214
  else
215
    return false;
216
}
217
 
218
/* Find the traces for Software Trace Cache.  Chain each trace through
219
   RBI()->next.  Store the number of traces to N_TRACES and description of
220
   traces to TRACES.  */
221
 
222
static void
223
find_traces (int *n_traces, struct trace *traces)
224
{
225
  int i;
226
  int number_of_rounds;
227
  edge e;
228
  edge_iterator ei;
229
  fibheap_t heap;
230
 
231
  /* Add one extra round of trace collection when partitioning hot/cold
232
     basic blocks into separate sections.  The last round is for all the
233
     cold blocks (and ONLY the cold blocks).  */
234
 
235
  number_of_rounds = N_ROUNDS - 1;
236
 
237
  /* Insert entry points of function into heap.  */
238
  heap = fibheap_new ();
239
  max_entry_frequency = 0;
240
  max_entry_count = 0;
241
  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
242
    {
243
      bbd[e->dest->index].heap = heap;
244
      bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
245
                                                    e->dest);
246
      if (e->dest->frequency > max_entry_frequency)
247
        max_entry_frequency = e->dest->frequency;
248
      if (e->dest->count > max_entry_count)
249
        max_entry_count = e->dest->count;
250
    }
251
 
252
  /* Find the traces.  */
253
  for (i = 0; i < number_of_rounds; i++)
254
    {
255
      gcov_type count_threshold;
256
 
257
      if (dump_file)
258
        fprintf (dump_file, "STC - round %d\n", i + 1);
259
 
260
      if (max_entry_count < INT_MAX / 1000)
261
        count_threshold = max_entry_count * exec_threshold[i] / 1000;
262
      else
263
        count_threshold = max_entry_count / 1000 * exec_threshold[i];
264
 
265
      find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
266
                           max_entry_frequency * exec_threshold[i] / 1000,
267
                           count_threshold, traces, n_traces, i, &heap,
268
                           number_of_rounds);
269
    }
270
  fibheap_delete (heap);
271
 
272
  if (dump_file)
273
    {
274
      for (i = 0; i < *n_traces; i++)
275
        {
276
          basic_block bb;
277
          fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
278
                   traces[i].round + 1);
279
          for (bb = traces[i].first; bb != traces[i].last; bb = bb->aux)
280
            fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
281
          fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
282
        }
283
      fflush (dump_file);
284
    }
285
}
286
 
287
/* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
288
   (with sequential number TRACE_N).  */
289
 
290
static basic_block
291
rotate_loop (edge back_edge, struct trace *trace, int trace_n)
292
{
293
  basic_block bb;
294
 
295
  /* Information about the best end (end after rotation) of the loop.  */
296
  basic_block best_bb = NULL;
297
  edge best_edge = NULL;
298
  int best_freq = -1;
299
  gcov_type best_count = -1;
300
  /* The best edge is preferred when its destination is not visited yet
301
     or is a start block of some trace.  */
302
  bool is_preferred = false;
303
 
304
  /* Find the most frequent edge that goes out from current trace.  */
305
  bb = back_edge->dest;
306
  do
307
    {
308
      edge e;
309
      edge_iterator ei;
310
 
311
      FOR_EACH_EDGE (e, ei, bb->succs)
312
        if (e->dest != EXIT_BLOCK_PTR
313
            && e->dest->il.rtl->visited != trace_n
314
            && (e->flags & EDGE_CAN_FALLTHRU)
315
            && !(e->flags & EDGE_COMPLEX))
316
        {
317
          if (is_preferred)
318
            {
319
              /* The best edge is preferred.  */
320
              if (!e->dest->il.rtl->visited
321
                  || bbd[e->dest->index].start_of_trace >= 0)
322
                {
323
                  /* The current edge E is also preferred.  */
324
                  int freq = EDGE_FREQUENCY (e);
325
                  if (freq > best_freq || e->count > best_count)
326
                    {
327
                      best_freq = freq;
328
                      best_count = e->count;
329
                      best_edge = e;
330
                      best_bb = bb;
331
                    }
332
                }
333
            }
334
          else
335
            {
336
              if (!e->dest->il.rtl->visited
337
                  || bbd[e->dest->index].start_of_trace >= 0)
338
                {
339
                  /* The current edge E is preferred.  */
340
                  is_preferred = true;
341
                  best_freq = EDGE_FREQUENCY (e);
342
                  best_count = e->count;
343
                  best_edge = e;
344
                  best_bb = bb;
345
                }
346
              else
347
                {
348
                  int freq = EDGE_FREQUENCY (e);
349
                  if (!best_edge || freq > best_freq || e->count > best_count)
350
                    {
351
                      best_freq = freq;
352
                      best_count = e->count;
353
                      best_edge = e;
354
                      best_bb = bb;
355
                    }
356
                }
357
            }
358
        }
359
      bb = bb->aux;
360
    }
361
  while (bb != back_edge->dest);
362
 
363
  if (best_bb)
364
    {
365
      /* Rotate the loop so that the BEST_EDGE goes out from the last block of
366
         the trace.  */
367
      if (back_edge->dest == trace->first)
368
        {
369
          trace->first = best_bb->aux;
370
        }
371
      else
372
        {
373
          basic_block prev_bb;
374
 
375
          for (prev_bb = trace->first;
376
               prev_bb->aux != back_edge->dest;
377
               prev_bb = prev_bb->aux)
378
            ;
379
          prev_bb->aux = best_bb->aux;
380
 
381
          /* Try to get rid of uncond jump to cond jump.  */
382
          if (single_succ_p (prev_bb))
383
            {
384
              basic_block header = single_succ (prev_bb);
385
 
386
              /* Duplicate HEADER if it is a small block containing cond jump
387
                 in the end.  */
388
              if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
389
                  && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
390
                                     NULL_RTX))
391
                copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
392
            }
393
        }
394
    }
395
  else
396
    {
397
      /* We have not found suitable loop tail so do no rotation.  */
398
      best_bb = back_edge->src;
399
    }
400
  best_bb->aux = NULL;
401
  return best_bb;
402
}
403
 
404
/* This function marks BB that it was visited in trace number TRACE.  */
405
 
406
static void
407
mark_bb_visited (basic_block bb, int trace)
408
{
409
  bb->il.rtl->visited = trace;
410
  if (bbd[bb->index].heap)
411
    {
412
      fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
413
      bbd[bb->index].heap = NULL;
414
      bbd[bb->index].node = NULL;
415
    }
416
}
417
 
418
/* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
419
   not include basic blocks their probability is lower than BRANCH_TH or their
420
   frequency is lower than EXEC_TH into traces (or count is lower than
421
   COUNT_TH).  It stores the new traces into TRACES and modifies the number of
422
   traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
423
   expects that starting basic blocks are in *HEAP and at the end it deletes
424
   *HEAP and stores starting points for the next round into new *HEAP.  */
425
 
426
static void
427
find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
428
                     struct trace *traces, int *n_traces, int round,
429
                     fibheap_t *heap, int number_of_rounds)
430
{
431
  /* Heap for discarded basic blocks which are possible starting points for
432
     the next round.  */
433
  fibheap_t new_heap = fibheap_new ();
434
 
435
  while (!fibheap_empty (*heap))
436
    {
437
      basic_block bb;
438
      struct trace *trace;
439
      edge best_edge, e;
440
      fibheapkey_t key;
441
      edge_iterator ei;
442
 
443
      bb = fibheap_extract_min (*heap);
444
      bbd[bb->index].heap = NULL;
445
      bbd[bb->index].node = NULL;
446
 
447
      if (dump_file)
448
        fprintf (dump_file, "Getting bb %d\n", bb->index);
449
 
450
      /* If the BB's frequency is too low send BB to the next round.  When
451
         partitioning hot/cold blocks into separate sections, make sure all
452
         the cold blocks (and ONLY the cold blocks) go into the (extra) final
453
         round.  */
454
 
455
      if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
456
                                count_th))
457
        {
458
          int key = bb_to_key (bb);
459
          bbd[bb->index].heap = new_heap;
460
          bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
461
 
462
          if (dump_file)
463
            fprintf (dump_file,
464
                     "  Possible start point of next round: %d (key: %d)\n",
465
                     bb->index, key);
466
          continue;
467
        }
468
 
469
      trace = traces + *n_traces;
470
      trace->first = bb;
471
      trace->round = round;
472
      trace->length = 0;
473
      bbd[bb->index].in_trace = *n_traces;
474
      (*n_traces)++;
475
 
476
      do
477
        {
478
          int prob, freq;
479
          bool ends_in_call;
480
 
481
          /* The probability and frequency of the best edge.  */
482
          int best_prob = INT_MIN / 2;
483
          int best_freq = INT_MIN / 2;
484
 
485
          best_edge = NULL;
486
          mark_bb_visited (bb, *n_traces);
487
          trace->length++;
488
 
489
          if (dump_file)
490
            fprintf (dump_file, "Basic block %d was visited in trace %d\n",
491
                     bb->index, *n_traces - 1);
492
 
493
          ends_in_call = block_ends_with_call_p (bb);
494
 
495
          /* Select the successor that will be placed after BB.  */
496
          FOR_EACH_EDGE (e, ei, bb->succs)
497
            {
498
              gcc_assert (!(e->flags & EDGE_FAKE));
499
 
500
              if (e->dest == EXIT_BLOCK_PTR)
501
                continue;
502
 
503
              if (e->dest->il.rtl->visited
504
                  && e->dest->il.rtl->visited != *n_traces)
505
                continue;
506
 
507
              if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
508
                continue;
509
 
510
              prob = e->probability;
511
              freq = e->dest->frequency;
512
 
513
              /* The only sensible preference for a call instruction is the
514
                 fallthru edge.  Don't bother selecting anything else.  */
515
              if (ends_in_call)
516
                {
517
                  if (e->flags & EDGE_CAN_FALLTHRU)
518
                    {
519
                      best_edge = e;
520
                      best_prob = prob;
521
                      best_freq = freq;
522
                    }
523
                  continue;
524
                }
525
 
526
              /* Edge that cannot be fallthru or improbable or infrequent
527
                 successor (i.e. it is unsuitable successor).  */
528
              if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
529
                  || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
530
                  || e->count < count_th)
531
                continue;
532
 
533
              /* If partitioning hot/cold basic blocks, don't consider edges
534
                 that cross section boundaries.  */
535
 
536
              if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
537
                                 best_edge))
538
                {
539
                  best_edge = e;
540
                  best_prob = prob;
541
                  best_freq = freq;
542
                }
543
            }
544
 
545
          /* If the best destination has multiple predecessors, and can be
546
             duplicated cheaper than a jump, don't allow it to be added
547
             to a trace.  We'll duplicate it when connecting traces.  */
548
          if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
549
              && copy_bb_p (best_edge->dest, 0))
550
            best_edge = NULL;
551
 
552
          /* Add all non-selected successors to the heaps.  */
553
          FOR_EACH_EDGE (e, ei, bb->succs)
554
            {
555
              if (e == best_edge
556
                  || e->dest == EXIT_BLOCK_PTR
557
                  || e->dest->il.rtl->visited)
558
                continue;
559
 
560
              key = bb_to_key (e->dest);
561
 
562
              if (bbd[e->dest->index].heap)
563
                {
564
                  /* E->DEST is already in some heap.  */
565
                  if (key != bbd[e->dest->index].node->key)
566
                    {
567
                      if (dump_file)
568
                        {
569
                          fprintf (dump_file,
570
                                   "Changing key for bb %d from %ld to %ld.\n",
571
                                   e->dest->index,
572
                                   (long) bbd[e->dest->index].node->key,
573
                                   key);
574
                        }
575
                      fibheap_replace_key (bbd[e->dest->index].heap,
576
                                           bbd[e->dest->index].node, key);
577
                    }
578
                }
579
              else
580
                {
581
                  fibheap_t which_heap = *heap;
582
 
583
                  prob = e->probability;
584
                  freq = EDGE_FREQUENCY (e);
585
 
586
                  if (!(e->flags & EDGE_CAN_FALLTHRU)
587
                      || (e->flags & EDGE_COMPLEX)
588
                      || prob < branch_th || freq < exec_th
589
                      || e->count < count_th)
590
                    {
591
                      /* When partitioning hot/cold basic blocks, make sure
592
                         the cold blocks (and only the cold blocks) all get
593
                         pushed to the last round of trace collection.  */
594
 
595
                      if (push_to_next_round_p (e->dest, round,
596
                                                number_of_rounds,
597
                                                exec_th, count_th))
598
                        which_heap = new_heap;
599
                    }
600
 
601
                  bbd[e->dest->index].heap = which_heap;
602
                  bbd[e->dest->index].node = fibheap_insert (which_heap,
603
                                                                key, e->dest);
604
 
605
                  if (dump_file)
606
                    {
607
                      fprintf (dump_file,
608
                               "  Possible start of %s round: %d (key: %ld)\n",
609
                               (which_heap == new_heap) ? "next" : "this",
610
                               e->dest->index, (long) key);
611
                    }
612
 
613
                }
614
            }
615
 
616
          if (best_edge) /* Suitable successor was found.  */
617
            {
618
              if (best_edge->dest->il.rtl->visited == *n_traces)
619
                {
620
                  /* We do nothing with one basic block loops.  */
621
                  if (best_edge->dest != bb)
622
                    {
623
                      if (EDGE_FREQUENCY (best_edge)
624
                          > 4 * best_edge->dest->frequency / 5)
625
                        {
626
                          /* The loop has at least 4 iterations.  If the loop
627
                             header is not the first block of the function
628
                             we can rotate the loop.  */
629
 
630
                          if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
631
                            {
632
                              if (dump_file)
633
                                {
634
                                  fprintf (dump_file,
635
                                           "Rotating loop %d - %d\n",
636
                                           best_edge->dest->index, bb->index);
637
                                }
638
                              bb->aux = best_edge->dest;
639
                              bbd[best_edge->dest->index].in_trace =
640
                                                             (*n_traces) - 1;
641
                              bb = rotate_loop (best_edge, trace, *n_traces);
642
                            }
643
                        }
644
                      else
645
                        {
646
                          /* The loop has less than 4 iterations.  */
647
 
648
                          if (single_succ_p (bb)
649
                              && copy_bb_p (best_edge->dest, !optimize_size))
650
                            {
651
                              bb = copy_bb (best_edge->dest, best_edge, bb,
652
                                            *n_traces);
653
                              trace->length++;
654
                            }
655
                        }
656
                    }
657
 
658
                  /* Terminate the trace.  */
659
                  break;
660
                }
661
              else
662
                {
663
                  /* Check for a situation
664
 
665
                    A
666
                   /|
667
                  B |
668
                   \|
669
                    C
670
 
671
                  where
672
                  EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
673
                    >= EDGE_FREQUENCY (AC).
674
                  (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
675
                  Best ordering is then A B C.
676
 
677
                  This situation is created for example by:
678
 
679
                  if (A) B;
680
                  C;
681
 
682
                  */
683
 
684
                  FOR_EACH_EDGE (e, ei, bb->succs)
685
                    if (e != best_edge
686
                        && (e->flags & EDGE_CAN_FALLTHRU)
687
                        && !(e->flags & EDGE_COMPLEX)
688
                        && !e->dest->il.rtl->visited
689
                        && single_pred_p (e->dest)
690
                        && !(e->flags & EDGE_CROSSING)
691
                        && single_succ_p (e->dest)
692
                        && (single_succ_edge (e->dest)->flags
693
                            & EDGE_CAN_FALLTHRU)
694
                        && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
695
                        && single_succ (e->dest) == best_edge->dest
696
                        && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
697
                      {
698
                        best_edge = e;
699
                        if (dump_file)
700
                          fprintf (dump_file, "Selecting BB %d\n",
701
                                   best_edge->dest->index);
702
                        break;
703
                      }
704
 
705
                  bb->aux = best_edge->dest;
706
                  bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
707
                  bb = best_edge->dest;
708
                }
709
            }
710
        }
711
      while (best_edge);
712
      trace->last = bb;
713
      bbd[trace->first->index].start_of_trace = *n_traces - 1;
714
      bbd[trace->last->index].end_of_trace = *n_traces - 1;
715
 
716
      /* The trace is terminated so we have to recount the keys in heap
717
         (some block can have a lower key because now one of its predecessors
718
         is an end of the trace).  */
719
      FOR_EACH_EDGE (e, ei, bb->succs)
720
        {
721
          if (e->dest == EXIT_BLOCK_PTR
722
              || e->dest->il.rtl->visited)
723
            continue;
724
 
725
          if (bbd[e->dest->index].heap)
726
            {
727
              key = bb_to_key (e->dest);
728
              if (key != bbd[e->dest->index].node->key)
729
                {
730
                  if (dump_file)
731
                    {
732
                      fprintf (dump_file,
733
                               "Changing key for bb %d from %ld to %ld.\n",
734
                               e->dest->index,
735
                               (long) bbd[e->dest->index].node->key, key);
736
                    }
737
                  fibheap_replace_key (bbd[e->dest->index].heap,
738
                                       bbd[e->dest->index].node,
739
                                       key);
740
                }
741
            }
742
        }
743
    }
744
 
745
  fibheap_delete (*heap);
746
 
747
  /* "Return" the new heap.  */
748
  *heap = new_heap;
749
}
750
 
751
/* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
752
   it to trace after BB, mark OLD_BB visited and update pass' data structures
753
   (TRACE is a number of trace which OLD_BB is duplicated to).  */
754
 
755
static basic_block
756
copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
757
{
758
  basic_block new_bb;
759
 
760
  new_bb = duplicate_block (old_bb, e, bb);
761
  BB_COPY_PARTITION (new_bb, old_bb);
762
 
763
  gcc_assert (e->dest == new_bb);
764
  gcc_assert (!e->dest->il.rtl->visited);
765
 
766
  if (dump_file)
767
    fprintf (dump_file,
768
             "Duplicated bb %d (created bb %d)\n",
769
             old_bb->index, new_bb->index);
770
  new_bb->il.rtl->visited = trace;
771
  new_bb->aux = bb->aux;
772
  bb->aux = new_bb;
773
 
774
  if (new_bb->index >= array_size || last_basic_block > array_size)
775
    {
776
      int i;
777
      int new_size;
778
 
779
      new_size = MAX (last_basic_block, new_bb->index + 1);
780
      new_size = GET_ARRAY_SIZE (new_size);
781
      bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
782
      for (i = array_size; i < new_size; i++)
783
        {
784
          bbd[i].start_of_trace = -1;
785
          bbd[i].in_trace = -1;
786
          bbd[i].end_of_trace = -1;
787
          bbd[i].heap = NULL;
788
          bbd[i].node = NULL;
789
        }
790
      array_size = new_size;
791
 
792
      if (dump_file)
793
        {
794
          fprintf (dump_file,
795
                   "Growing the dynamic array to %d elements.\n",
796
                   array_size);
797
        }
798
    }
799
 
800
  bbd[new_bb->index].in_trace = trace;
801
 
802
  return new_bb;
803
}
804
 
805
/* Compute and return the key (for the heap) of the basic block BB.  */
806
 
807
static fibheapkey_t
808
bb_to_key (basic_block bb)
809
{
810
  edge e;
811
  edge_iterator ei;
812
  int priority = 0;
813
 
814
  /* Do not start in probably never executed blocks.  */
815
 
816
  if (BB_PARTITION (bb) == BB_COLD_PARTITION
817
      || probably_never_executed_bb_p (bb))
818
    return BB_FREQ_MAX;
819
 
820
  /* Prefer blocks whose predecessor is an end of some trace
821
     or whose predecessor edge is EDGE_DFS_BACK.  */
822
  FOR_EACH_EDGE (e, ei, bb->preds)
823
    {
824
      if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
825
          || (e->flags & EDGE_DFS_BACK))
826
        {
827
          int edge_freq = EDGE_FREQUENCY (e);
828
 
829
          if (edge_freq > priority)
830
            priority = edge_freq;
831
        }
832
    }
833
 
834
  if (priority)
835
    /* The block with priority should have significantly lower key.  */
836
    return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
837
  return -bb->frequency;
838
}
839
 
840
/* Return true when the edge E from basic block BB is better than the temporary
841
   best edge (details are in function).  The probability of edge E is PROB. The
842
   frequency of the successor is FREQ.  The current best probability is
843
   BEST_PROB, the best frequency is BEST_FREQ.
844
   The edge is considered to be equivalent when PROB does not differ much from
845
   BEST_PROB; similarly for frequency.  */
846
 
847
static bool
848
better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob,
849
               int best_freq, edge cur_best_edge)
850
{
851
  bool is_better_edge;
852
 
853
  /* The BEST_* values do not have to be best, but can be a bit smaller than
854
     maximum values.  */
855
  int diff_prob = best_prob / 10;
856
  int diff_freq = best_freq / 10;
857
 
858
  if (prob > best_prob + diff_prob)
859
    /* The edge has higher probability than the temporary best edge.  */
860
    is_better_edge = true;
861
  else if (prob < best_prob - diff_prob)
862
    /* The edge has lower probability than the temporary best edge.  */
863
    is_better_edge = false;
864
  else if (freq < best_freq - diff_freq)
865
    /* The edge and the temporary best edge  have almost equivalent
866
       probabilities.  The higher frequency of a successor now means
867
       that there is another edge going into that successor.
868
       This successor has lower frequency so it is better.  */
869
    is_better_edge = true;
870
  else if (freq > best_freq + diff_freq)
871
    /* This successor has higher frequency so it is worse.  */
872
    is_better_edge = false;
873
  else if (e->dest->prev_bb == bb)
874
    /* The edges have equivalent probabilities and the successors
875
       have equivalent frequencies.  Select the previous successor.  */
876
    is_better_edge = true;
877
  else
878
    is_better_edge = false;
879
 
880
  /* If we are doing hot/cold partitioning, make sure that we always favor
881
     non-crossing edges over crossing edges.  */
882
 
883
  if (!is_better_edge
884
      && flag_reorder_blocks_and_partition
885
      && cur_best_edge
886
      && (cur_best_edge->flags & EDGE_CROSSING)
887
      && !(e->flags & EDGE_CROSSING))
888
    is_better_edge = true;
889
 
890
  return is_better_edge;
891
}
892
 
893
/* Connect traces in array TRACES, N_TRACES is the count of traces.  */
894
 
895
static void
896
connect_traces (int n_traces, struct trace *traces)
897
{
898
  int i;
899
  bool *connected;
900
  bool two_passes;
901
  int last_trace;
902
  int current_pass;
903
  int current_partition;
904
  int freq_threshold;
905
  gcov_type count_threshold;
906
 
907
  freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
908
  if (max_entry_count < INT_MAX / 1000)
909
    count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
910
  else
911
    count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
912
 
913
  connected = XCNEWVEC (bool, n_traces);
914
  last_trace = -1;
915
  current_pass = 1;
916
  current_partition = BB_PARTITION (traces[0].first);
917
  two_passes = false;
918
 
919
  if (flag_reorder_blocks_and_partition)
920
    for (i = 0; i < n_traces && !two_passes; i++)
921
      if (BB_PARTITION (traces[0].first)
922
          != BB_PARTITION (traces[i].first))
923
        two_passes = true;
924
 
925
  for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
926
    {
927
      int t = i;
928
      int t2;
929
      edge e, best;
930
      int best_len;
931
 
932
      if (i >= n_traces)
933
        {
934
          gcc_assert (two_passes && current_pass == 1);
935
          i = 0;
936
          t = i;
937
          current_pass = 2;
938
          if (current_partition == BB_HOT_PARTITION)
939
            current_partition = BB_COLD_PARTITION;
940
          else
941
            current_partition = BB_HOT_PARTITION;
942
        }
943
 
944
      if (connected[t])
945
        continue;
946
 
947
      if (two_passes
948
          && BB_PARTITION (traces[t].first) != current_partition)
949
        continue;
950
 
951
      connected[t] = true;
952
 
953
      /* Find the predecessor traces.  */
954
      for (t2 = t; t2 > 0;)
955
        {
956
          edge_iterator ei;
957
          best = NULL;
958
          best_len = 0;
959
          FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
960
            {
961
              int si = e->src->index;
962
 
963
              if (e->src != ENTRY_BLOCK_PTR
964
                  && (e->flags & EDGE_CAN_FALLTHRU)
965
                  && !(e->flags & EDGE_COMPLEX)
966
                  && bbd[si].end_of_trace >= 0
967
                  && !connected[bbd[si].end_of_trace]
968
                  && (BB_PARTITION (e->src) == current_partition)
969
                  && (!best
970
                      || e->probability > best->probability
971
                      || (e->probability == best->probability
972
                          && traces[bbd[si].end_of_trace].length > best_len)))
973
                {
974
                  best = e;
975
                  best_len = traces[bbd[si].end_of_trace].length;
976
                }
977
            }
978
          if (best)
979
            {
980
              best->src->aux = best->dest;
981
              t2 = bbd[best->src->index].end_of_trace;
982
              connected[t2] = true;
983
 
984
              if (dump_file)
985
                {
986
                  fprintf (dump_file, "Connection: %d %d\n",
987
                           best->src->index, best->dest->index);
988
                }
989
            }
990
          else
991
            break;
992
        }
993
 
994
      if (last_trace >= 0)
995
        traces[last_trace].last->aux = traces[t2].first;
996
      last_trace = t;
997
 
998
      /* Find the successor traces.  */
999
      while (1)
1000
        {
1001
          /* Find the continuation of the chain.  */
1002
          edge_iterator ei;
1003
          best = NULL;
1004
          best_len = 0;
1005
          FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1006
            {
1007
              int di = e->dest->index;
1008
 
1009
              if (e->dest != EXIT_BLOCK_PTR
1010
                  && (e->flags & EDGE_CAN_FALLTHRU)
1011
                  && !(e->flags & EDGE_COMPLEX)
1012
                  && bbd[di].start_of_trace >= 0
1013
                  && !connected[bbd[di].start_of_trace]
1014
                  && (BB_PARTITION (e->dest) == current_partition)
1015
                  && (!best
1016
                      || e->probability > best->probability
1017
                      || (e->probability == best->probability
1018
                          && traces[bbd[di].start_of_trace].length > best_len)))
1019
                {
1020
                  best = e;
1021
                  best_len = traces[bbd[di].start_of_trace].length;
1022
                }
1023
            }
1024
 
1025
          if (best)
1026
            {
1027
              if (dump_file)
1028
                {
1029
                  fprintf (dump_file, "Connection: %d %d\n",
1030
                           best->src->index, best->dest->index);
1031
                }
1032
              t = bbd[best->dest->index].start_of_trace;
1033
              traces[last_trace].last->aux = traces[t].first;
1034
              connected[t] = true;
1035
              last_trace = t;
1036
            }
1037
          else
1038
            {
1039
              /* Try to connect the traces by duplication of 1 block.  */
1040
              edge e2;
1041
              basic_block next_bb = NULL;
1042
              bool try_copy = false;
1043
 
1044
              FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1045
                if (e->dest != EXIT_BLOCK_PTR
1046
                    && (e->flags & EDGE_CAN_FALLTHRU)
1047
                    && !(e->flags & EDGE_COMPLEX)
1048
                    && (!best || e->probability > best->probability))
1049
                  {
1050
                    edge_iterator ei;
1051
                    edge best2 = NULL;
1052
                    int best2_len = 0;
1053
 
1054
                    /* If the destination is a start of a trace which is only
1055
                       one block long, then no need to search the successor
1056
                       blocks of the trace.  Accept it.  */
1057
                    if (bbd[e->dest->index].start_of_trace >= 0
1058
                        && traces[bbd[e->dest->index].start_of_trace].length
1059
                           == 1)
1060
                      {
1061
                        best = e;
1062
                        try_copy = true;
1063
                        continue;
1064
                      }
1065
 
1066
                    FOR_EACH_EDGE (e2, ei, e->dest->succs)
1067
                      {
1068
                        int di = e2->dest->index;
1069
 
1070
                        if (e2->dest == EXIT_BLOCK_PTR
1071
                            || ((e2->flags & EDGE_CAN_FALLTHRU)
1072
                                && !(e2->flags & EDGE_COMPLEX)
1073
                                && bbd[di].start_of_trace >= 0
1074
                                && !connected[bbd[di].start_of_trace]
1075
                                && (BB_PARTITION (e2->dest) == current_partition)
1076
                                && (EDGE_FREQUENCY (e2) >= freq_threshold)
1077
                                && (e2->count >= count_threshold)
1078
                                && (!best2
1079
                                    || e2->probability > best2->probability
1080
                                    || (e2->probability == best2->probability
1081
                                        && traces[bbd[di].start_of_trace].length
1082
                                           > best2_len))))
1083
                          {
1084
                            best = e;
1085
                            best2 = e2;
1086
                            if (e2->dest != EXIT_BLOCK_PTR)
1087
                              best2_len = traces[bbd[di].start_of_trace].length;
1088
                            else
1089
                              best2_len = INT_MAX;
1090
                            next_bb = e2->dest;
1091
                            try_copy = true;
1092
                          }
1093
                      }
1094
                  }
1095
 
1096
              if (flag_reorder_blocks_and_partition)
1097
                try_copy = false;
1098
 
1099
              /* Copy tiny blocks always; copy larger blocks only when the
1100
                 edge is traversed frequently enough.  */
1101
              if (try_copy
1102
                  && copy_bb_p (best->dest,
1103
                                !optimize_size
1104
                                && EDGE_FREQUENCY (best) >= freq_threshold
1105
                                && best->count >= count_threshold))
1106
                {
1107
                  basic_block new_bb;
1108
 
1109
                  if (dump_file)
1110
                    {
1111
                      fprintf (dump_file, "Connection: %d %d ",
1112
                               traces[t].last->index, best->dest->index);
1113
                      if (!next_bb)
1114
                        fputc ('\n', dump_file);
1115
                      else if (next_bb == EXIT_BLOCK_PTR)
1116
                        fprintf (dump_file, "exit\n");
1117
                      else
1118
                        fprintf (dump_file, "%d\n", next_bb->index);
1119
                    }
1120
 
1121
                  new_bb = copy_bb (best->dest, best, traces[t].last, t);
1122
                  traces[t].last = new_bb;
1123
                  if (next_bb && next_bb != EXIT_BLOCK_PTR)
1124
                    {
1125
                      t = bbd[next_bb->index].start_of_trace;
1126
                      traces[last_trace].last->aux = traces[t].first;
1127
                      connected[t] = true;
1128
                      last_trace = t;
1129
                    }
1130
                  else
1131
                    break;      /* Stop finding the successor traces.  */
1132
                }
1133
              else
1134
                break;  /* Stop finding the successor traces.  */
1135
            }
1136
        }
1137
    }
1138
 
1139
  if (dump_file)
1140
    {
1141
      basic_block bb;
1142
 
1143
      fprintf (dump_file, "Final order:\n");
1144
      for (bb = traces[0].first; bb; bb = bb->aux)
1145
        fprintf (dump_file, "%d ", bb->index);
1146
      fprintf (dump_file, "\n");
1147
      fflush (dump_file);
1148
    }
1149
 
1150
  FREE (connected);
1151
}
1152
 
1153
/* Return true when BB can and should be copied. CODE_MAY_GROW is true
1154
   when code size is allowed to grow by duplication.  */
1155
 
1156
static bool
1157
copy_bb_p (basic_block bb, int code_may_grow)
1158
{
1159
  int size = 0;
1160
  int max_size = uncond_jump_length;
1161
  rtx insn;
1162
 
1163
  if (!bb->frequency)
1164
    return false;
1165
  if (EDGE_COUNT (bb->preds) < 2)
1166
    return false;
1167
  if (!can_duplicate_block_p (bb))
1168
    return false;
1169
 
1170
  /* Avoid duplicating blocks which have many successors (PR/13430).  */
1171
  if (EDGE_COUNT (bb->succs) > 8)
1172
    return false;
1173
 
1174
  if (code_may_grow && maybe_hot_bb_p (bb))
1175
    max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1176
 
1177
  FOR_BB_INSNS (bb, insn)
1178
    {
1179
      if (INSN_P (insn))
1180
        size += get_attr_min_length (insn);
1181
    }
1182
 
1183
  if (size <= max_size)
1184
    return true;
1185
 
1186
  if (dump_file)
1187
    {
1188
      fprintf (dump_file,
1189
               "Block %d can't be copied because its size = %d.\n",
1190
               bb->index, size);
1191
    }
1192
 
1193
  return false;
1194
}
1195
 
1196
/* Return the length of unconditional jump instruction.  */
1197
 
1198
static int
1199
get_uncond_jump_length (void)
1200
{
1201
  rtx label, jump;
1202
  int length;
1203
 
1204
  label = emit_label_before (gen_label_rtx (), get_insns ());
1205
  jump = emit_jump_insn (gen_jump (label));
1206
 
1207
  length = get_attr_min_length (jump);
1208
 
1209
  delete_insn (jump);
1210
  delete_insn (label);
1211
  return length;
1212
}
1213
 
1214
/* Find the basic blocks that are rarely executed and need to be moved to
1215
   a separate section of the .o file (to cut down on paging and improve
1216
   cache locality).  */
1217
 
1218
static void
1219
find_rarely_executed_basic_blocks_and_crossing_edges (edge *crossing_edges,
1220
                                                      int *n_crossing_edges,
1221
                                                      int *max_idx)
1222
{
1223
  basic_block bb;
1224
  bool has_hot_blocks = false;
1225
  edge e;
1226
  int i;
1227
  edge_iterator ei;
1228
 
1229
  /* Mark which partition (hot/cold) each basic block belongs in.  */
1230
 
1231
  FOR_EACH_BB (bb)
1232
    {
1233
      if (probably_never_executed_bb_p (bb))
1234
        BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1235
      else
1236
        {
1237
          BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1238
          has_hot_blocks = true;
1239
        }
1240
    }
1241
 
1242
  /* Mark every edge that crosses between sections.  */
1243
 
1244
  i = 0;
1245
  FOR_EACH_BB (bb)
1246
    FOR_EACH_EDGE (e, ei, bb->succs)
1247
    {
1248
      if (e->src != ENTRY_BLOCK_PTR
1249
          && e->dest != EXIT_BLOCK_PTR
1250
          && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1251
        {
1252
          e->flags |= EDGE_CROSSING;
1253
          if (i == *max_idx)
1254
            {
1255
              *max_idx *= 2;
1256
              crossing_edges = xrealloc (crossing_edges,
1257
                                         (*max_idx) * sizeof (edge));
1258
            }
1259
          crossing_edges[i++] = e;
1260
        }
1261
      else
1262
        e->flags &= ~EDGE_CROSSING;
1263
    }
1264
  *n_crossing_edges = i;
1265
}
1266
 
1267
/* If any destination of a crossing edge does not have a label, add label;
1268
   Convert any fall-through crossing edges (for blocks that do not contain
1269
   a jump) to unconditional jumps.  */
1270
 
1271
static void
1272
add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges)
1273
{
1274
  int i;
1275
  basic_block src;
1276
  basic_block dest;
1277
  rtx label;
1278
  rtx barrier;
1279
  rtx new_jump;
1280
 
1281
  for (i=0; i < n_crossing_edges; i++)
1282
    {
1283
      if (crossing_edges[i])
1284
        {
1285
          src = crossing_edges[i]->src;
1286
          dest = crossing_edges[i]->dest;
1287
 
1288
          /* Make sure dest has a label.  */
1289
 
1290
          if (dest && (dest != EXIT_BLOCK_PTR))
1291
            {
1292
              label = block_label (dest);
1293
 
1294
              /* Make sure source block ends with a jump.  */
1295
 
1296
              if (src && (src != ENTRY_BLOCK_PTR))
1297
                {
1298
                  if (!JUMP_P (BB_END (src)))
1299
                    /* bb just falls through.  */
1300
                    {
1301
                      /* make sure there's only one successor */
1302
                      gcc_assert (single_succ_p (src));
1303
 
1304
                      /* Find label in dest block.  */
1305
                      label = block_label (dest);
1306
 
1307
                      new_jump = emit_jump_insn_after (gen_jump (label),
1308
                                                       BB_END (src));
1309
                      barrier = emit_barrier_after (new_jump);
1310
                      JUMP_LABEL (new_jump) = label;
1311
                      LABEL_NUSES (label) += 1;
1312
                      src->il.rtl->footer = unlink_insn_chain (barrier, barrier);
1313
                      /* Mark edge as non-fallthru.  */
1314
                      crossing_edges[i]->flags &= ~EDGE_FALLTHRU;
1315
                    } /* end: 'if (GET_CODE ... '  */
1316
                } /* end: 'if (src && src->index...'  */
1317
            } /* end: 'if (dest && dest->index...'  */
1318
        } /* end: 'if (crossing_edges[i]...'  */
1319
    } /* end for loop  */
1320
}
1321
 
1322
/* Find any bb's where the fall-through edge is a crossing edge (note that
1323
   these bb's must also contain a conditional jump; we've already
1324
   dealt with fall-through edges for blocks that didn't have a
1325
   conditional jump in the call to add_labels_and_missing_jumps).
1326
   Convert the fall-through edge to non-crossing edge by inserting a
1327
   new bb to fall-through into.  The new bb will contain an
1328
   unconditional jump (crossing edge) to the original fall through
1329
   destination.  */
1330
 
1331
static void
1332
fix_up_fall_thru_edges (void)
1333
{
1334
  basic_block cur_bb;
1335
  basic_block new_bb;
1336
  edge succ1;
1337
  edge succ2;
1338
  edge fall_thru;
1339
  edge cond_jump = NULL;
1340
  edge e;
1341
  bool cond_jump_crosses;
1342
  int invert_worked;
1343
  rtx old_jump;
1344
  rtx fall_thru_label;
1345
  rtx barrier;
1346
 
1347
  FOR_EACH_BB (cur_bb)
1348
    {
1349
      fall_thru = NULL;
1350
      if (EDGE_COUNT (cur_bb->succs) > 0)
1351
        succ1 = EDGE_SUCC (cur_bb, 0);
1352
      else
1353
        succ1 = NULL;
1354
 
1355
      if (EDGE_COUNT (cur_bb->succs) > 1)
1356
        succ2 = EDGE_SUCC (cur_bb, 1);
1357
      else
1358
        succ2 = NULL;
1359
 
1360
      /* Find the fall-through edge.  */
1361
 
1362
      if (succ1
1363
          && (succ1->flags & EDGE_FALLTHRU))
1364
        {
1365
          fall_thru = succ1;
1366
          cond_jump = succ2;
1367
        }
1368
      else if (succ2
1369
               && (succ2->flags & EDGE_FALLTHRU))
1370
        {
1371
          fall_thru = succ2;
1372
          cond_jump = succ1;
1373
        }
1374
 
1375
      if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
1376
        {
1377
          /* Check to see if the fall-thru edge is a crossing edge.  */
1378
 
1379
          if (fall_thru->flags & EDGE_CROSSING)
1380
            {
1381
              /* The fall_thru edge crosses; now check the cond jump edge, if
1382
                 it exists.  */
1383
 
1384
              cond_jump_crosses = true;
1385
              invert_worked  = 0;
1386
              old_jump = BB_END (cur_bb);
1387
 
1388
              /* Find the jump instruction, if there is one.  */
1389
 
1390
              if (cond_jump)
1391
                {
1392
                  if (!(cond_jump->flags & EDGE_CROSSING))
1393
                    cond_jump_crosses = false;
1394
 
1395
                  /* We know the fall-thru edge crosses; if the cond
1396
                     jump edge does NOT cross, and its destination is the
1397
                     next block in the bb order, invert the jump
1398
                     (i.e. fix it so the fall thru does not cross and
1399
                     the cond jump does).  */
1400
 
1401
                  if (!cond_jump_crosses
1402
                      && cur_bb->aux == cond_jump->dest)
1403
                    {
1404
                      /* Find label in fall_thru block. We've already added
1405
                         any missing labels, so there must be one.  */
1406
 
1407
                      fall_thru_label = block_label (fall_thru->dest);
1408
 
1409
                      if (old_jump && fall_thru_label)
1410
                        invert_worked = invert_jump (old_jump,
1411
                                                     fall_thru_label,0);
1412
                      if (invert_worked)
1413
                        {
1414
                          fall_thru->flags &= ~EDGE_FALLTHRU;
1415
                          cond_jump->flags |= EDGE_FALLTHRU;
1416
                          update_br_prob_note (cur_bb);
1417
                          e = fall_thru;
1418
                          fall_thru = cond_jump;
1419
                          cond_jump = e;
1420
                          cond_jump->flags |= EDGE_CROSSING;
1421
                          fall_thru->flags &= ~EDGE_CROSSING;
1422
                        }
1423
                    }
1424
                }
1425
 
1426
              if (cond_jump_crosses || !invert_worked)
1427
                {
1428
                  /* This is the case where both edges out of the basic
1429
                     block are crossing edges. Here we will fix up the
1430
                     fall through edge. The jump edge will be taken care
1431
                     of later.  */
1432
 
1433
                  new_bb = force_nonfallthru (fall_thru);
1434
 
1435
                  if (new_bb)
1436
                    {
1437
                      new_bb->aux = cur_bb->aux;
1438
                      cur_bb->aux = new_bb;
1439
 
1440
                      /* Make sure new fall-through bb is in same
1441
                         partition as bb it's falling through from.  */
1442
 
1443
                      BB_COPY_PARTITION (new_bb, cur_bb);
1444
                      single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1445
                    }
1446
 
1447
                  /* Add barrier after new jump */
1448
 
1449
                  if (new_bb)
1450
                    {
1451
                      barrier = emit_barrier_after (BB_END (new_bb));
1452
                      new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1453
                                                               barrier);
1454
                    }
1455
                  else
1456
                    {
1457
                      barrier = emit_barrier_after (BB_END (cur_bb));
1458
                      cur_bb->il.rtl->footer = unlink_insn_chain (barrier,
1459
                                                               barrier);
1460
                    }
1461
                }
1462
            }
1463
        }
1464
    }
1465
}
1466
 
1467
/* This function checks the destination blockof a "crossing jump" to
1468
   see if it has any crossing predecessors that begin with a code label
1469
   and end with an unconditional jump.  If so, it returns that predecessor
1470
   block.  (This is to avoid creating lots of new basic blocks that all
1471
   contain unconditional jumps to the same destination).  */
1472
 
1473
static basic_block
1474
find_jump_block (basic_block jump_dest)
1475
{
1476
  basic_block source_bb = NULL;
1477
  edge e;
1478
  rtx insn;
1479
  edge_iterator ei;
1480
 
1481
  FOR_EACH_EDGE (e, ei, jump_dest->preds)
1482
    if (e->flags & EDGE_CROSSING)
1483
      {
1484
        basic_block src = e->src;
1485
 
1486
        /* Check each predecessor to see if it has a label, and contains
1487
           only one executable instruction, which is an unconditional jump.
1488
           If so, we can use it.  */
1489
 
1490
        if (LABEL_P (BB_HEAD (src)))
1491
          for (insn = BB_HEAD (src);
1492
               !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1493
               insn = NEXT_INSN (insn))
1494
            {
1495
              if (INSN_P (insn)
1496
                  && insn == BB_END (src)
1497
                  && JUMP_P (insn)
1498
                  && !any_condjump_p (insn))
1499
                {
1500
                  source_bb = src;
1501
                  break;
1502
                }
1503
            }
1504
 
1505
        if (source_bb)
1506
          break;
1507
      }
1508
 
1509
  return source_bb;
1510
}
1511
 
1512
/* Find all BB's with conditional jumps that are crossing edges;
1513
   insert a new bb and make the conditional jump branch to the new
1514
   bb instead (make the new bb same color so conditional branch won't
1515
   be a 'crossing' edge).  Insert an unconditional jump from the
1516
   new bb to the original destination of the conditional jump.  */
1517
 
1518
static void
1519
fix_crossing_conditional_branches (void)
1520
{
1521
  basic_block cur_bb;
1522
  basic_block new_bb;
1523
  basic_block last_bb;
1524
  basic_block dest;
1525
  basic_block prev_bb;
1526
  edge succ1;
1527
  edge succ2;
1528
  edge crossing_edge;
1529
  edge new_edge;
1530
  rtx old_jump;
1531
  rtx set_src;
1532
  rtx old_label = NULL_RTX;
1533
  rtx new_label;
1534
  rtx new_jump;
1535
  rtx barrier;
1536
 
1537
 last_bb = EXIT_BLOCK_PTR->prev_bb;
1538
 
1539
  FOR_EACH_BB (cur_bb)
1540
    {
1541
      crossing_edge = NULL;
1542
      if (EDGE_COUNT (cur_bb->succs) > 0)
1543
        succ1 = EDGE_SUCC (cur_bb, 0);
1544
      else
1545
        succ1 = NULL;
1546
 
1547
      if (EDGE_COUNT (cur_bb->succs) > 1)
1548
        succ2 = EDGE_SUCC (cur_bb, 1);
1549
      else
1550
        succ2 = NULL;
1551
 
1552
      /* We already took care of fall-through edges, so only one successor
1553
         can be a crossing edge.  */
1554
 
1555
      if (succ1 && (succ1->flags & EDGE_CROSSING))
1556
        crossing_edge = succ1;
1557
      else if (succ2 && (succ2->flags & EDGE_CROSSING))
1558
        crossing_edge = succ2;
1559
 
1560
      if (crossing_edge)
1561
        {
1562
          old_jump = BB_END (cur_bb);
1563
 
1564
          /* Check to make sure the jump instruction is a
1565
             conditional jump.  */
1566
 
1567
          set_src = NULL_RTX;
1568
 
1569
          if (any_condjump_p (old_jump))
1570
            {
1571
              if (GET_CODE (PATTERN (old_jump)) == SET)
1572
                set_src = SET_SRC (PATTERN (old_jump));
1573
              else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1574
                {
1575
                  set_src = XVECEXP (PATTERN (old_jump), 0,0);
1576
                  if (GET_CODE (set_src) == SET)
1577
                    set_src = SET_SRC (set_src);
1578
                  else
1579
                    set_src = NULL_RTX;
1580
                }
1581
            }
1582
 
1583
          if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1584
            {
1585
              if (GET_CODE (XEXP (set_src, 1)) == PC)
1586
                old_label = XEXP (set_src, 2);
1587
              else if (GET_CODE (XEXP (set_src, 2)) == PC)
1588
                old_label = XEXP (set_src, 1);
1589
 
1590
              /* Check to see if new bb for jumping to that dest has
1591
                 already been created; if so, use it; if not, create
1592
                 a new one.  */
1593
 
1594
              new_bb = find_jump_block (crossing_edge->dest);
1595
 
1596
              if (new_bb)
1597
                new_label = block_label (new_bb);
1598
              else
1599
                {
1600
                  /* Create new basic block to be dest for
1601
                     conditional jump.  */
1602
 
1603
                  new_bb = create_basic_block (NULL, NULL, last_bb);
1604
                  new_bb->aux = last_bb->aux;
1605
                  last_bb->aux = new_bb;
1606
                  prev_bb = last_bb;
1607
                  last_bb = new_bb;
1608
 
1609
                  /* Update register liveness information.  */
1610
 
1611
                  new_bb->il.rtl->global_live_at_start = ALLOC_REG_SET (&reg_obstack);
1612
                  new_bb->il.rtl->global_live_at_end = ALLOC_REG_SET (&reg_obstack);
1613
                  COPY_REG_SET (new_bb->il.rtl->global_live_at_end,
1614
                                prev_bb->il.rtl->global_live_at_end);
1615
                  COPY_REG_SET (new_bb->il.rtl->global_live_at_start,
1616
                                prev_bb->il.rtl->global_live_at_end);
1617
 
1618
                  /* Put appropriate instructions in new bb.  */
1619
 
1620
                  new_label = gen_label_rtx ();
1621
                  emit_label_before (new_label, BB_HEAD (new_bb));
1622
                  BB_HEAD (new_bb) = new_label;
1623
 
1624
                  if (GET_CODE (old_label) == LABEL_REF)
1625
                    {
1626
                      old_label = JUMP_LABEL (old_jump);
1627
                      new_jump = emit_jump_insn_after (gen_jump
1628
                                                       (old_label),
1629
                                                       BB_END (new_bb));
1630
                    }
1631
                  else
1632
                    {
1633
                      gcc_assert (HAVE_return
1634
                                  && GET_CODE (old_label) == RETURN);
1635
                      new_jump = emit_jump_insn_after (gen_return (),
1636
                                                       BB_END (new_bb));
1637
                    }
1638
 
1639
                  barrier = emit_barrier_after (new_jump);
1640
                  JUMP_LABEL (new_jump) = old_label;
1641
                  new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1642
                                                           barrier);
1643
 
1644
                  /* Make sure new bb is in same partition as source
1645
                     of conditional branch.  */
1646
                  BB_COPY_PARTITION (new_bb, cur_bb);
1647
                }
1648
 
1649
              /* Make old jump branch to new bb.  */
1650
 
1651
              redirect_jump (old_jump, new_label, 0);
1652
 
1653
              /* Remove crossing_edge as predecessor of 'dest'.  */
1654
 
1655
              dest = crossing_edge->dest;
1656
 
1657
              redirect_edge_succ (crossing_edge, new_bb);
1658
 
1659
              /* Make a new edge from new_bb to old dest; new edge
1660
                 will be a successor for new_bb and a predecessor
1661
                 for 'dest'.  */
1662
 
1663
              if (EDGE_COUNT (new_bb->succs) == 0)
1664
                new_edge = make_edge (new_bb, dest, 0);
1665
              else
1666
                new_edge = EDGE_SUCC (new_bb, 0);
1667
 
1668
              crossing_edge->flags &= ~EDGE_CROSSING;
1669
              new_edge->flags |= EDGE_CROSSING;
1670
            }
1671
        }
1672
    }
1673
}
1674
 
1675
/* Find any unconditional branches that cross between hot and cold
1676
   sections.  Convert them into indirect jumps instead.  */
1677
 
1678
static void
1679
fix_crossing_unconditional_branches (void)
1680
{
1681
  basic_block cur_bb;
1682
  rtx last_insn;
1683
  rtx label;
1684
  rtx label_addr;
1685
  rtx indirect_jump_sequence;
1686
  rtx jump_insn = NULL_RTX;
1687
  rtx new_reg;
1688
  rtx cur_insn;
1689
  edge succ;
1690
 
1691
  FOR_EACH_BB (cur_bb)
1692
    {
1693
      last_insn = BB_END (cur_bb);
1694
 
1695
      if (EDGE_COUNT (cur_bb->succs) < 1)
1696
        continue;
1697
 
1698
      succ = EDGE_SUCC (cur_bb, 0);
1699
 
1700
      /* Check to see if bb ends in a crossing (unconditional) jump.  At
1701
         this point, no crossing jumps should be conditional.  */
1702
 
1703
      if (JUMP_P (last_insn)
1704
          && (succ->flags & EDGE_CROSSING))
1705
        {
1706
          rtx label2, table;
1707
 
1708
          gcc_assert (!any_condjump_p (last_insn));
1709
 
1710
          /* Make sure the jump is not already an indirect or table jump.  */
1711
 
1712
          if (!computed_jump_p (last_insn)
1713
              && !tablejump_p (last_insn, &label2, &table))
1714
            {
1715
              /* We have found a "crossing" unconditional branch.  Now
1716
                 we must convert it to an indirect jump.  First create
1717
                 reference of label, as target for jump.  */
1718
 
1719
              label = JUMP_LABEL (last_insn);
1720
              label_addr = gen_rtx_LABEL_REF (Pmode, label);
1721
              LABEL_NUSES (label) += 1;
1722
 
1723
              /* Get a register to use for the indirect jump.  */
1724
 
1725
              new_reg = gen_reg_rtx (Pmode);
1726
 
1727
              /* Generate indirect the jump sequence.  */
1728
 
1729
              start_sequence ();
1730
              emit_move_insn (new_reg, label_addr);
1731
              emit_indirect_jump (new_reg);
1732
              indirect_jump_sequence = get_insns ();
1733
              end_sequence ();
1734
 
1735
              /* Make sure every instruction in the new jump sequence has
1736
                 its basic block set to be cur_bb.  */
1737
 
1738
              for (cur_insn = indirect_jump_sequence; cur_insn;
1739
                   cur_insn = NEXT_INSN (cur_insn))
1740
                {
1741
                  if (!BARRIER_P (cur_insn))
1742
                    BLOCK_FOR_INSN (cur_insn) = cur_bb;
1743
                  if (JUMP_P (cur_insn))
1744
                    jump_insn = cur_insn;
1745
                }
1746
 
1747
              /* Insert the new (indirect) jump sequence immediately before
1748
                 the unconditional jump, then delete the unconditional jump.  */
1749
 
1750
              emit_insn_before (indirect_jump_sequence, last_insn);
1751
              delete_insn (last_insn);
1752
 
1753
              /* Make BB_END for cur_bb be the jump instruction (NOT the
1754
                 barrier instruction at the end of the sequence...).  */
1755
 
1756
              BB_END (cur_bb) = jump_insn;
1757
            }
1758
        }
1759
    }
1760
}
1761
 
1762
/* Add REG_CROSSING_JUMP note to all crossing jump insns.  */
1763
 
1764
static void
1765
add_reg_crossing_jump_notes (void)
1766
{
1767
  basic_block bb;
1768
  edge e;
1769
  edge_iterator ei;
1770
 
1771
  FOR_EACH_BB (bb)
1772
    FOR_EACH_EDGE (e, ei, bb->succs)
1773
      if ((e->flags & EDGE_CROSSING)
1774
          && JUMP_P (BB_END (e->src)))
1775
        REG_NOTES (BB_END (e->src)) = gen_rtx_EXPR_LIST (REG_CROSSING_JUMP,
1776
                                                         NULL_RTX,
1777
                                                         REG_NOTES (BB_END
1778
                                                                  (e->src)));
1779
}
1780
 
1781
/* Hot and cold basic blocks are partitioned and put in separate
1782
   sections of the .o file, to reduce paging and improve cache
1783
   performance (hopefully).  This can result in bits of code from the
1784
   same function being widely separated in the .o file.  However this
1785
   is not obvious to the current bb structure.  Therefore we must take
1786
   care to ensure that: 1). There are no fall_thru edges that cross
1787
   between sections; 2). For those architectures which have "short"
1788
   conditional branches, all conditional branches that attempt to
1789
   cross between sections are converted to unconditional branches;
1790
   and, 3). For those architectures which have "short" unconditional
1791
   branches, all unconditional branches that attempt to cross between
1792
   sections are converted to indirect jumps.
1793
 
1794
   The code for fixing up fall_thru edges that cross between hot and
1795
   cold basic blocks does so by creating new basic blocks containing
1796
   unconditional branches to the appropriate label in the "other"
1797
   section.  The new basic block is then put in the same (hot or cold)
1798
   section as the original conditional branch, and the fall_thru edge
1799
   is modified to fall into the new basic block instead.  By adding
1800
   this level of indirection we end up with only unconditional branches
1801
   crossing between hot and cold sections.
1802
 
1803
   Conditional branches are dealt with by adding a level of indirection.
1804
   A new basic block is added in the same (hot/cold) section as the
1805
   conditional branch, and the conditional branch is retargeted to the
1806
   new basic block.  The new basic block contains an unconditional branch
1807
   to the original target of the conditional branch (in the other section).
1808
 
1809
   Unconditional branches are dealt with by converting them into
1810
   indirect jumps.  */
1811
 
1812
static void
1813
fix_edges_for_rarely_executed_code (edge *crossing_edges,
1814
                                    int n_crossing_edges)
1815
{
1816
  /* Make sure the source of any crossing edge ends in a jump and the
1817
     destination of any crossing edge has a label.  */
1818
 
1819
  add_labels_and_missing_jumps (crossing_edges, n_crossing_edges);
1820
 
1821
  /* Convert all crossing fall_thru edges to non-crossing fall
1822
     thrus to unconditional jumps (that jump to the original fall
1823
     thru dest).  */
1824
 
1825
  fix_up_fall_thru_edges ();
1826
 
1827
  /* If the architecture does not have conditional branches that can
1828
     span all of memory, convert crossing conditional branches into
1829
     crossing unconditional branches.  */
1830
 
1831
  if (!HAS_LONG_COND_BRANCH)
1832
    fix_crossing_conditional_branches ();
1833
 
1834
  /* If the architecture does not have unconditional branches that
1835
     can span all of memory, convert crossing unconditional branches
1836
     into indirect jumps.  Since adding an indirect jump also adds
1837
     a new register usage, update the register usage information as
1838
     well.  */
1839
 
1840
  if (!HAS_LONG_UNCOND_BRANCH)
1841
    {
1842
      fix_crossing_unconditional_branches ();
1843
      reg_scan (get_insns(), max_reg_num ());
1844
    }
1845
 
1846
  add_reg_crossing_jump_notes ();
1847
}
1848
 
1849
/* Verify, in the basic block chain, that there is at most one switch
1850
   between hot/cold partitions. This is modelled on
1851
   rtl_verify_flow_info_1, but it cannot go inside that function
1852
   because this condition will not be true until after
1853
   reorder_basic_blocks is called.  */
1854
 
1855
static void
1856
verify_hot_cold_block_grouping (void)
1857
{
1858
  basic_block bb;
1859
  int err = 0;
1860
  bool switched_sections = false;
1861
  int current_partition = 0;
1862
 
1863
  FOR_EACH_BB (bb)
1864
    {
1865
      if (!current_partition)
1866
        current_partition = BB_PARTITION (bb);
1867
      if (BB_PARTITION (bb) != current_partition)
1868
        {
1869
          if (switched_sections)
1870
            {
1871
              error ("multiple hot/cold transitions found (bb %i)",
1872
                     bb->index);
1873
              err = 1;
1874
            }
1875
          else
1876
            {
1877
              switched_sections = true;
1878
              current_partition = BB_PARTITION (bb);
1879
            }
1880
        }
1881
    }
1882
 
1883
  gcc_assert(!err);
1884
}
1885
 
1886
/* Reorder basic blocks.  The main entry point to this file.  FLAGS is
1887
   the set of flags to pass to cfg_layout_initialize().  */
1888
 
1889
void
1890
reorder_basic_blocks (unsigned int flags)
1891
{
1892
  int n_traces;
1893
  int i;
1894
  struct trace *traces;
1895
 
1896
  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
1897
    return;
1898
 
1899
  if (targetm.cannot_modify_jumps_p ())
1900
    return;
1901
 
1902
  cfg_layout_initialize (flags);
1903
 
1904
  set_edge_can_fallthru_flag ();
1905
  mark_dfs_back_edges ();
1906
 
1907
  /* We are estimating the length of uncond jump insn only once since the code
1908
     for getting the insn length always returns the minimal length now.  */
1909
  if (uncond_jump_length == 0)
1910
    uncond_jump_length = get_uncond_jump_length ();
1911
 
1912
  /* We need to know some information for each basic block.  */
1913
  array_size = GET_ARRAY_SIZE (last_basic_block);
1914
  bbd = XNEWVEC (bbro_basic_block_data, array_size);
1915
  for (i = 0; i < array_size; i++)
1916
    {
1917
      bbd[i].start_of_trace = -1;
1918
      bbd[i].in_trace = -1;
1919
      bbd[i].end_of_trace = -1;
1920
      bbd[i].heap = NULL;
1921
      bbd[i].node = NULL;
1922
    }
1923
 
1924
  traces = XNEWVEC (struct trace, n_basic_blocks);
1925
  n_traces = 0;
1926
  find_traces (&n_traces, traces);
1927
  connect_traces (n_traces, traces);
1928
  FREE (traces);
1929
  FREE (bbd);
1930
 
1931
  if (dump_file)
1932
    dump_flow_info (dump_file, dump_flags);
1933
 
1934
  cfg_layout_finalize ();
1935
  if (flag_reorder_blocks_and_partition)
1936
    verify_hot_cold_block_grouping ();
1937
}
1938
 
1939
/* Determine which partition the first basic block in the function
1940
   belongs to, then find the first basic block in the current function
1941
   that belongs to a different section, and insert a
1942
   NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
1943
   instruction stream.  When writing out the assembly code,
1944
   encountering this note will make the compiler switch between the
1945
   hot and cold text sections.  */
1946
 
1947
static void
1948
insert_section_boundary_note (void)
1949
{
1950
  basic_block bb;
1951
  rtx new_note;
1952
  int first_partition = 0;
1953
 
1954
  if (flag_reorder_blocks_and_partition)
1955
    FOR_EACH_BB (bb)
1956
    {
1957
      if (!first_partition)
1958
        first_partition = BB_PARTITION (bb);
1959
      if (BB_PARTITION (bb) != first_partition)
1960
        {
1961
          new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
1962
                                       BB_HEAD (bb));
1963
          break;
1964
        }
1965
    }
1966
}
1967
 
1968
/* Duplicate the blocks containing computed gotos.  This basically unfactors
1969
   computed gotos that were factored early on in the compilation process to
1970
   speed up edge based data flow.  We used to not unfactoring them again,
1971
   which can seriously pessimize code with many computed jumps in the source
1972
   code, such as interpreters.  See e.g. PR15242.  */
1973
 
1974
static bool
1975
gate_duplicate_computed_gotos (void)
1976
{
1977
  return (optimize > 0 && flag_expensive_optimizations && !optimize_size);
1978
}
1979
 
1980
 
1981
static unsigned int
1982
duplicate_computed_gotos (void)
1983
{
1984
  basic_block bb, new_bb;
1985
  bitmap candidates;
1986
  int max_size;
1987
 
1988
  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
1989
    return 0;
1990
 
1991
  if (targetm.cannot_modify_jumps_p ())
1992
    return 0;
1993
 
1994
  cfg_layout_initialize (0);
1995
 
1996
  /* We are estimating the length of uncond jump insn only once
1997
     since the code for getting the insn length always returns
1998
     the minimal length now.  */
1999
  if (uncond_jump_length == 0)
2000
    uncond_jump_length = get_uncond_jump_length ();
2001
 
2002
  max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2003
  candidates = BITMAP_ALLOC (NULL);
2004
 
2005
  /* Look for blocks that end in a computed jump, and see if such blocks
2006
     are suitable for unfactoring.  If a block is a candidate for unfactoring,
2007
     mark it in the candidates.  */
2008
  FOR_EACH_BB (bb)
2009
    {
2010
      rtx insn;
2011
      edge e;
2012
      edge_iterator ei;
2013
      int size, all_flags;
2014
 
2015
      /* Build the reorder chain for the original order of blocks.  */
2016
      if (bb->next_bb != EXIT_BLOCK_PTR)
2017
        bb->aux = bb->next_bb;
2018
 
2019
      /* Obviously the block has to end in a computed jump.  */
2020
      if (!computed_jump_p (BB_END (bb)))
2021
        continue;
2022
 
2023
      /* Only consider blocks that can be duplicated.  */
2024
      if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2025
          || !can_duplicate_block_p (bb))
2026
        continue;
2027
 
2028
      /* Make sure that the block is small enough.  */
2029
      size = 0;
2030
      FOR_BB_INSNS (bb, insn)
2031
        if (INSN_P (insn))
2032
          {
2033
            size += get_attr_min_length (insn);
2034
            if (size > max_size)
2035
               break;
2036
          }
2037
      if (size > max_size)
2038
        continue;
2039
 
2040
      /* Final check: there must not be any incoming abnormal edges.  */
2041
      all_flags = 0;
2042
      FOR_EACH_EDGE (e, ei, bb->preds)
2043
        all_flags |= e->flags;
2044
      if (all_flags & EDGE_COMPLEX)
2045
        continue;
2046
 
2047
      bitmap_set_bit (candidates, bb->index);
2048
    }
2049
 
2050
  /* Nothing to do if there is no computed jump here.  */
2051
  if (bitmap_empty_p (candidates))
2052
    goto done;
2053
 
2054
  /* Duplicate computed gotos.  */
2055
  FOR_EACH_BB (bb)
2056
    {
2057
      if (bb->il.rtl->visited)
2058
        continue;
2059
 
2060
      bb->il.rtl->visited = 1;
2061
 
2062
      /* BB must have one outgoing edge.  That edge must not lead to
2063
         the exit block or the next block.
2064
         The destination must have more than one predecessor.  */
2065
      if (!single_succ_p (bb)
2066
          || single_succ (bb) == EXIT_BLOCK_PTR
2067
          || single_succ (bb) == bb->next_bb
2068
          || single_pred_p (single_succ (bb)))
2069
        continue;
2070
 
2071
      /* The successor block has to be a duplication candidate.  */
2072
      if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2073
        continue;
2074
 
2075
      new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2076
      new_bb->aux = bb->aux;
2077
      bb->aux = new_bb;
2078
      new_bb->il.rtl->visited = 1;
2079
    }
2080
 
2081
done:
2082
  cfg_layout_finalize ();
2083
 
2084
  BITMAP_FREE (candidates);
2085
  return 0;
2086
}
2087
 
2088
struct tree_opt_pass pass_duplicate_computed_gotos =
2089
{
2090
  "compgotos",                          /* name */
2091
  gate_duplicate_computed_gotos,        /* gate */
2092
  duplicate_computed_gotos,             /* execute */
2093
  NULL,                                 /* sub */
2094
  NULL,                                 /* next */
2095
  0,                                    /* static_pass_number */
2096
  TV_REORDER_BLOCKS,                    /* tv_id */
2097
  0,                                    /* properties_required */
2098
  0,                                    /* properties_provided */
2099
  0,                                    /* properties_destroyed */
2100
  0,                                    /* todo_flags_start */
2101
  TODO_dump_func,                       /* todo_flags_finish */
2102
 
2103
};
2104
 
2105
 
2106
/* This function is the main 'entrance' for the optimization that
2107
   partitions hot and cold basic blocks into separate sections of the
2108
   .o file (to improve performance and cache locality).  Ideally it
2109
   would be called after all optimizations that rearrange the CFG have
2110
   been called.  However part of this optimization may introduce new
2111
   register usage, so it must be called before register allocation has
2112
   occurred.  This means that this optimization is actually called
2113
   well before the optimization that reorders basic blocks (see
2114
   function above).
2115
 
2116
   This optimization checks the feedback information to determine
2117
   which basic blocks are hot/cold, updates flags on the basic blocks
2118
   to indicate which section they belong in.  This information is
2119
   later used for writing out sections in the .o file.  Because hot
2120
   and cold sections can be arbitrarily large (within the bounds of
2121
   memory), far beyond the size of a single function, it is necessary
2122
   to fix up all edges that cross section boundaries, to make sure the
2123
   instructions used can actually span the required distance.  The
2124
   fixes are described below.
2125
 
2126
   Fall-through edges must be changed into jumps; it is not safe or
2127
   legal to fall through across a section boundary.  Whenever a
2128
   fall-through edge crossing a section boundary is encountered, a new
2129
   basic block is inserted (in the same section as the fall-through
2130
   source), and the fall through edge is redirected to the new basic
2131
   block.  The new basic block contains an unconditional jump to the
2132
   original fall-through target.  (If the unconditional jump is
2133
   insufficient to cross section boundaries, that is dealt with a
2134
   little later, see below).
2135
 
2136
   In order to deal with architectures that have short conditional
2137
   branches (which cannot span all of memory) we take any conditional
2138
   jump that attempts to cross a section boundary and add a level of
2139
   indirection: it becomes a conditional jump to a new basic block, in
2140
   the same section.  The new basic block contains an unconditional
2141
   jump to the original target, in the other section.
2142
 
2143
   For those architectures whose unconditional branch is also
2144
   incapable of reaching all of memory, those unconditional jumps are
2145
   converted into indirect jumps, through a register.
2146
 
2147
   IMPORTANT NOTE: This optimization causes some messy interactions
2148
   with the cfg cleanup optimizations; those optimizations want to
2149
   merge blocks wherever possible, and to collapse indirect jump
2150
   sequences (change "A jumps to B jumps to C" directly into "A jumps
2151
   to C").  Those optimizations can undo the jump fixes that
2152
   partitioning is required to make (see above), in order to ensure
2153
   that jumps attempting to cross section boundaries are really able
2154
   to cover whatever distance the jump requires (on many architectures
2155
   conditional or unconditional jumps are not able to reach all of
2156
   memory).  Therefore tests have to be inserted into each such
2157
   optimization to make sure that it does not undo stuff necessary to
2158
   cross partition boundaries.  This would be much less of a problem
2159
   if we could perform this optimization later in the compilation, but
2160
   unfortunately the fact that we may need to create indirect jumps
2161
   (through registers) requires that this optimization be performed
2162
   before register allocation.  */
2163
 
2164
static void
2165
partition_hot_cold_basic_blocks (void)
2166
{
2167
  basic_block cur_bb;
2168
  edge *crossing_edges;
2169
  int n_crossing_edges;
2170
  int max_edges = 2 * last_basic_block;
2171
 
2172
  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2173
    return;
2174
 
2175
  crossing_edges = XCNEWVEC (edge, max_edges);
2176
 
2177
  cfg_layout_initialize (0);
2178
 
2179
  FOR_EACH_BB (cur_bb)
2180
    if (cur_bb->index >= NUM_FIXED_BLOCKS
2181
        && cur_bb->next_bb->index >= NUM_FIXED_BLOCKS)
2182
      cur_bb->aux = cur_bb->next_bb;
2183
 
2184
  find_rarely_executed_basic_blocks_and_crossing_edges (crossing_edges,
2185
                                                        &n_crossing_edges,
2186
                                                        &max_edges);
2187
 
2188
  if (n_crossing_edges > 0)
2189
    fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges);
2190
 
2191
  free (crossing_edges);
2192
 
2193
  cfg_layout_finalize();
2194
}
2195
 
2196
static bool
2197
gate_handle_reorder_blocks (void)
2198
{
2199
  return (optimize > 0);
2200
}
2201
 
2202
 
2203
/* Reorder basic blocks.  */
2204
static unsigned int
2205
rest_of_handle_reorder_blocks (void)
2206
{
2207
  bool changed;
2208
  unsigned int liveness_flags;
2209
 
2210
  /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2211
     splitting possibly introduced more crossjumping opportunities.  */
2212
  liveness_flags = (!HAVE_conditional_execution ? CLEANUP_UPDATE_LIFE : 0);
2213
  changed = cleanup_cfg (CLEANUP_EXPENSIVE | liveness_flags);
2214
 
2215
  if (flag_sched2_use_traces && flag_schedule_insns_after_reload)
2216
    {
2217
      timevar_push (TV_TRACER);
2218
      tracer (liveness_flags);
2219
      timevar_pop (TV_TRACER);
2220
    }
2221
 
2222
  if (flag_reorder_blocks || flag_reorder_blocks_and_partition)
2223
    reorder_basic_blocks (liveness_flags);
2224
  if (flag_reorder_blocks || flag_reorder_blocks_and_partition
2225
      || (flag_sched2_use_traces && flag_schedule_insns_after_reload))
2226
    changed |= cleanup_cfg (CLEANUP_EXPENSIVE | liveness_flags);
2227
 
2228
  /* On conditional execution targets we can not update the life cheaply, so
2229
     we deffer the updating to after both cleanups.  This may lose some cases
2230
     but should not be terribly bad.  */
2231
  if (changed && HAVE_conditional_execution)
2232
    update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
2233
                      PROP_DEATH_NOTES);
2234
 
2235
  /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes.  */
2236
  insert_section_boundary_note ();
2237
  return 0;
2238
}
2239
 
2240
struct tree_opt_pass pass_reorder_blocks =
2241
{
2242
  "bbro",                               /* name */
2243
  gate_handle_reorder_blocks,           /* gate */
2244
  rest_of_handle_reorder_blocks,        /* execute */
2245
  NULL,                                 /* sub */
2246
  NULL,                                 /* next */
2247
  0,                                    /* static_pass_number */
2248
  TV_REORDER_BLOCKS,                    /* tv_id */
2249
  0,                                    /* properties_required */
2250
  0,                                    /* properties_provided */
2251
  0,                                    /* properties_destroyed */
2252
  0,                                    /* todo_flags_start */
2253
  TODO_dump_func,                       /* todo_flags_finish */
2254
  'B'                                   /* letter */
2255
};
2256
 
2257
static bool
2258
gate_handle_partition_blocks (void)
2259
{
2260
  /* The optimization to partition hot/cold basic blocks into separate
2261
     sections of the .o file does not work well with linkonce or with
2262
     user defined section attributes.  Don't call it if either case
2263
     arises.  */
2264
 
2265
  return (flag_reorder_blocks_and_partition
2266
          && !DECL_ONE_ONLY (current_function_decl)
2267
          && !user_defined_section_attribute);
2268
}
2269
 
2270
/* Partition hot and cold basic blocks.  */
2271
static unsigned int
2272
rest_of_handle_partition_blocks (void)
2273
{
2274
  no_new_pseudos = 0;
2275
  partition_hot_cold_basic_blocks ();
2276
  allocate_reg_life_data ();
2277
  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
2278
                    PROP_LOG_LINKS | PROP_REG_INFO | PROP_DEATH_NOTES);
2279
  no_new_pseudos = 1;
2280
  return 0;
2281
}
2282
 
2283
struct tree_opt_pass pass_partition_blocks =
2284
{
2285
  "bbpart",                             /* name */
2286
  gate_handle_partition_blocks,         /* gate */
2287
  rest_of_handle_partition_blocks,      /* execute */
2288
  NULL,                                 /* sub */
2289
  NULL,                                 /* next */
2290
  0,                                    /* static_pass_number */
2291
  TV_REORDER_BLOCKS,                    /* tv_id */
2292
  0,                                    /* properties_required */
2293
  0,                                    /* properties_provided */
2294
  0,                                    /* properties_destroyed */
2295
  0,                                    /* todo_flags_start */
2296
  TODO_dump_func,                       /* todo_flags_finish */
2297
 
2298
};
2299
 
2300
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.