OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [bb-reorder.c] - Blame information for rev 20

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
/* Basic block reordering routines for the GNU compiler.
2
   Copyright (C) 2000, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3
 
4
   This file is part of GCC.
5
 
6
   GCC is free software; you can redistribute it and/or modify it
7
   under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 2, or (at your option)
9
   any later version.
10
 
11
   GCC is distributed in the hope that it will be useful, but WITHOUT
12
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14
   License for more details.
15
 
16
   You should have received a copy of the GNU General Public License
17
   along with GCC; see the file COPYING.  If not, write to the Free
18
   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19
   02110-1301, USA.  */
20
 
21
/* This (greedy) algorithm constructs traces in several rounds.
22
   The construction starts from "seeds".  The seed for the first round
23
   is the entry point of function.  When there are more than one seed
24
   that one is selected first that has the lowest key in the heap
25
   (see function bb_to_key).  Then the algorithm repeatedly adds the most
26
   probable successor to the end of a trace.  Finally it connects the traces.
27
 
28
   There are two parameters: Branch Threshold and Exec Threshold.
29
   If the edge to a successor of the actual basic block is lower than
30
   Branch Threshold or the frequency of the successor is lower than
31
   Exec Threshold the successor will be the seed in one of the next rounds.
32
   Each round has these parameters lower than the previous one.
33
   The last round has to have these parameters set to zero
34
   so that the remaining blocks are picked up.
35
 
36
   The algorithm selects the most probable successor from all unvisited
37
   successors and successors that have been added to this trace.
38
   The other successors (that has not been "sent" to the next round) will be
39
   other seeds for this round and the secondary traces will start in them.
40
   If the successor has not been visited in this trace it is added to the trace
41
   (however, there is some heuristic for simple branches).
42
   If the successor has been visited in this trace the loop has been found.
43
   If the loop has many iterations the loop is rotated so that the
44
   source block of the most probable edge going out from the loop
45
   is the last block of the trace.
46
   If the loop has few iterations and there is no edge from the last block of
47
   the loop going out from loop the loop header is duplicated.
48
   Finally, the construction of the trace is terminated.
49
 
50
   When connecting traces it first checks whether there is an edge from the
51
   last block of one trace to the first block of another trace.
52
   When there are still some unconnected traces it checks whether there exists
53
   a basic block BB such that BB is a successor of the last bb of one trace
54
   and BB is a predecessor of the first block of another trace. In this case,
55
   BB is duplicated and the traces are connected through this duplicate.
56
   The rest of traces are simply connected so there will be a jump to the
57
   beginning of the rest of trace.
58
 
59
 
60
   References:
61
 
62
   "Software Trace Cache"
63
   A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64
   http://citeseer.nj.nec.com/15361.html
65
 
66
*/
67
 
68
#include "config.h"
69
#include "system.h"
70
#include "coretypes.h"
71
#include "tm.h"
72
#include "rtl.h"
73
#include "regs.h"
74
#include "flags.h"
75
#include "timevar.h"
76
#include "output.h"
77
#include "cfglayout.h"
78
#include "fibheap.h"
79
#include "target.h"
80
#include "function.h"
81
#include "tm_p.h"
82
#include "obstack.h"
83
#include "expr.h"
84
#include "params.h"
85
#include "toplev.h"
86
#include "tree-pass.h"
87
 
88
#ifndef HAVE_conditional_execution
89
#define HAVE_conditional_execution 0
90
#endif
91
 
92
/* The number of rounds.  In most cases there will only be 4 rounds, but
93
   when partitioning hot and cold basic blocks into separate sections of
94
   the .o file there will be an extra round.*/
95
#define N_ROUNDS 5
96
 
97
/* Stubs in case we don't have a return insn.
98
   We have to check at runtime too, not only compiletime.  */
99
 
100
#ifndef HAVE_return
101
#define HAVE_return 0
102
#define gen_return() NULL_RTX
103
#endif
104
 
105
 
106
/* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
107
static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
108
 
109
/* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
110
static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
111
 
112
/* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
113
   block the edge destination is not duplicated while connecting traces.  */
114
#define DUPLICATION_THRESHOLD 100
115
 
116
/* Length of unconditional jump instruction.  */
117
static int uncond_jump_length;
118
 
119
/* Structure to hold needed information for each basic block.  */
120
typedef struct bbro_basic_block_data_def
121
{
122
  /* Which trace is the bb start of (-1 means it is not a start of a trace).  */
123
  int start_of_trace;
124
 
125
  /* Which trace is the bb end of (-1 means it is not an end of a trace).  */
126
  int end_of_trace;
127
 
128
  /* Which trace is the bb in?  */
129
  int in_trace;
130
 
131
  /* Which heap is BB in (if any)?  */
132
  fibheap_t heap;
133
 
134
  /* Which heap node is BB in (if any)?  */
135
  fibnode_t node;
136
} bbro_basic_block_data;
137
 
138
/* The current size of the following dynamic array.  */
139
static int array_size;
140
 
141
/* The array which holds needed information for basic blocks.  */
142
static bbro_basic_block_data *bbd;
143
 
144
/* To avoid frequent reallocation the size of arrays is greater than needed,
145
   the number of elements is (not less than) 1.25 * size_wanted.  */
146
#define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
147
 
148
/* Free the memory and set the pointer to NULL.  */
149
#define FREE(P) (gcc_assert (P), free (P), P = 0)
150
 
151
/* Structure for holding information about a trace.  */
152
struct trace
153
{
154
  /* First and last basic block of the trace.  */
155
  basic_block first, last;
156
 
157
  /* The round of the STC creation which this trace was found in.  */
158
  int round;
159
 
160
  /* The length (i.e. the number of basic blocks) of the trace.  */
161
  int length;
162
};
163
 
164
/* Maximum frequency and count of one of the entry blocks.  */
165
static int max_entry_frequency;
166
static gcov_type max_entry_count;
167
 
168
/* Local function prototypes.  */
169
static void find_traces (int *, struct trace *);
170
static basic_block rotate_loop (edge, struct trace *, int);
171
static void mark_bb_visited (basic_block, int);
172
static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
173
                                 int, fibheap_t *, int);
174
static basic_block copy_bb (basic_block, edge, basic_block, int);
175
static fibheapkey_t bb_to_key (basic_block);
176
static bool better_edge_p (basic_block, edge, int, int, int, int, edge);
177
static void connect_traces (int, struct trace *);
178
static bool copy_bb_p (basic_block, int);
179
static int get_uncond_jump_length (void);
180
static bool push_to_next_round_p (basic_block, int, int, int, gcov_type);
181
static void find_rarely_executed_basic_blocks_and_crossing_edges (edge *,
182
                                                                  int *,
183
                                                                  int *);
184
static void add_labels_and_missing_jumps (edge *, int);
185
static void add_reg_crossing_jump_notes (void);
186
static void fix_up_fall_thru_edges (void);
187
static void fix_edges_for_rarely_executed_code (edge *, int);
188
static void fix_crossing_conditional_branches (void);
189
static void fix_crossing_unconditional_branches (void);
190
 
191
/* Check to see if bb should be pushed into the next round of trace
192
   collections or not.  Reasons for pushing the block forward are 1).
193
   If the block is cold, we are doing partitioning, and there will be
194
   another round (cold partition blocks are not supposed to be
195
   collected into traces until the very last round); or 2). There will
196
   be another round, and the basic block is not "hot enough" for the
197
   current round of trace collection.  */
198
 
199
static bool
200
push_to_next_round_p (basic_block bb, int round, int number_of_rounds,
201
                      int exec_th, gcov_type count_th)
202
{
203
  bool there_exists_another_round;
204
  bool block_not_hot_enough;
205
 
206
  there_exists_another_round = round < number_of_rounds - 1;
207
 
208
  block_not_hot_enough = (bb->frequency < exec_th
209
                          || bb->count < count_th
210
                          || probably_never_executed_bb_p (bb));
211
 
212
  if (there_exists_another_round
213
      && block_not_hot_enough)
214
    return true;
215
  else
216
    return false;
217
}
218
 
219
/* Find the traces for Software Trace Cache.  Chain each trace through
220
   RBI()->next.  Store the number of traces to N_TRACES and description of
221
   traces to TRACES.  */
222
 
223
static void
224
find_traces (int *n_traces, struct trace *traces)
225
{
226
  int i;
227
  int number_of_rounds;
228
  edge e;
229
  edge_iterator ei;
230
  fibheap_t heap;
231
 
232
  /* Add one extra round of trace collection when partitioning hot/cold
233
     basic blocks into separate sections.  The last round is for all the
234
     cold blocks (and ONLY the cold blocks).  */
235
 
236
  number_of_rounds = N_ROUNDS - 1;
237
 
238
  /* Insert entry points of function into heap.  */
239
  heap = fibheap_new ();
240
  max_entry_frequency = 0;
241
  max_entry_count = 0;
242
  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
243
    {
244
      bbd[e->dest->index].heap = heap;
245
      bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
246
                                                    e->dest);
247
      if (e->dest->frequency > max_entry_frequency)
248
        max_entry_frequency = e->dest->frequency;
249
      if (e->dest->count > max_entry_count)
250
        max_entry_count = e->dest->count;
251
    }
252
 
253
  /* Find the traces.  */
254
  for (i = 0; i < number_of_rounds; i++)
255
    {
256
      gcov_type count_threshold;
257
 
258
      if (dump_file)
259
        fprintf (dump_file, "STC - round %d\n", i + 1);
260
 
261
      if (max_entry_count < INT_MAX / 1000)
262
        count_threshold = max_entry_count * exec_threshold[i] / 1000;
263
      else
264
        count_threshold = max_entry_count / 1000 * exec_threshold[i];
265
 
266
      find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
267
                           max_entry_frequency * exec_threshold[i] / 1000,
268
                           count_threshold, traces, n_traces, i, &heap,
269
                           number_of_rounds);
270
    }
271
  fibheap_delete (heap);
272
 
273
  if (dump_file)
274
    {
275
      for (i = 0; i < *n_traces; i++)
276
        {
277
          basic_block bb;
278
          fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
279
                   traces[i].round + 1);
280
          for (bb = traces[i].first; bb != traces[i].last; bb = bb->aux)
281
            fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
282
          fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
283
        }
284
      fflush (dump_file);
285
    }
286
}
287
 
288
/* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
289
   (with sequential number TRACE_N).  */
290
 
291
static basic_block
292
rotate_loop (edge back_edge, struct trace *trace, int trace_n)
293
{
294
  basic_block bb;
295
 
296
  /* Information about the best end (end after rotation) of the loop.  */
297
  basic_block best_bb = NULL;
298
  edge best_edge = NULL;
299
  int best_freq = -1;
300
  gcov_type best_count = -1;
301
  /* The best edge is preferred when its destination is not visited yet
302
     or is a start block of some trace.  */
303
  bool is_preferred = false;
304
 
305
  /* Find the most frequent edge that goes out from current trace.  */
306
  bb = back_edge->dest;
307
  do
308
    {
309
      edge e;
310
      edge_iterator ei;
311
 
312
      FOR_EACH_EDGE (e, ei, bb->succs)
313
        if (e->dest != EXIT_BLOCK_PTR
314
            && e->dest->il.rtl->visited != trace_n
315
            && (e->flags & EDGE_CAN_FALLTHRU)
316
            && !(e->flags & EDGE_COMPLEX))
317
        {
318
          if (is_preferred)
319
            {
320
              /* The best edge is preferred.  */
321
              if (!e->dest->il.rtl->visited
322
                  || bbd[e->dest->index].start_of_trace >= 0)
323
                {
324
                  /* The current edge E is also preferred.  */
325
                  int freq = EDGE_FREQUENCY (e);
326
                  if (freq > best_freq || e->count > best_count)
327
                    {
328
                      best_freq = freq;
329
                      best_count = e->count;
330
                      best_edge = e;
331
                      best_bb = bb;
332
                    }
333
                }
334
            }
335
          else
336
            {
337
              if (!e->dest->il.rtl->visited
338
                  || bbd[e->dest->index].start_of_trace >= 0)
339
                {
340
                  /* The current edge E is preferred.  */
341
                  is_preferred = true;
342
                  best_freq = EDGE_FREQUENCY (e);
343
                  best_count = e->count;
344
                  best_edge = e;
345
                  best_bb = bb;
346
                }
347
              else
348
                {
349
                  int freq = EDGE_FREQUENCY (e);
350
                  if (!best_edge || freq > best_freq || e->count > best_count)
351
                    {
352
                      best_freq = freq;
353
                      best_count = e->count;
354
                      best_edge = e;
355
                      best_bb = bb;
356
                    }
357
                }
358
            }
359
        }
360
      bb = bb->aux;
361
    }
362
  while (bb != back_edge->dest);
363
 
364
  if (best_bb)
365
    {
366
      /* Rotate the loop so that the BEST_EDGE goes out from the last block of
367
         the trace.  */
368
      if (back_edge->dest == trace->first)
369
        {
370
          trace->first = best_bb->aux;
371
        }
372
      else
373
        {
374
          basic_block prev_bb;
375
 
376
          for (prev_bb = trace->first;
377
               prev_bb->aux != back_edge->dest;
378
               prev_bb = prev_bb->aux)
379
            ;
380
          prev_bb->aux = best_bb->aux;
381
 
382
          /* Try to get rid of uncond jump to cond jump.  */
383
          if (single_succ_p (prev_bb))
384
            {
385
              basic_block header = single_succ (prev_bb);
386
 
387
              /* Duplicate HEADER if it is a small block containing cond jump
388
                 in the end.  */
389
              if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
390
                  && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
391
                                     NULL_RTX))
392
                copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
393
            }
394
        }
395
    }
396
  else
397
    {
398
      /* We have not found suitable loop tail so do no rotation.  */
399
      best_bb = back_edge->src;
400
    }
401
  best_bb->aux = NULL;
402
  return best_bb;
403
}
404
 
405
/* This function marks BB that it was visited in trace number TRACE.  */
406
 
407
static void
408
mark_bb_visited (basic_block bb, int trace)
409
{
410
  bb->il.rtl->visited = trace;
411
  if (bbd[bb->index].heap)
412
    {
413
      fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
414
      bbd[bb->index].heap = NULL;
415
      bbd[bb->index].node = NULL;
416
    }
417
}
418
 
419
/* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
420
   not include basic blocks their probability is lower than BRANCH_TH or their
421
   frequency is lower than EXEC_TH into traces (or count is lower than
422
   COUNT_TH).  It stores the new traces into TRACES and modifies the number of
423
   traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
424
   expects that starting basic blocks are in *HEAP and at the end it deletes
425
   *HEAP and stores starting points for the next round into new *HEAP.  */
426
 
427
static void
428
find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
429
                     struct trace *traces, int *n_traces, int round,
430
                     fibheap_t *heap, int number_of_rounds)
431
{
432
  /* Heap for discarded basic blocks which are possible starting points for
433
     the next round.  */
434
  fibheap_t new_heap = fibheap_new ();
435
 
436
  while (!fibheap_empty (*heap))
437
    {
438
      basic_block bb;
439
      struct trace *trace;
440
      edge best_edge, e;
441
      fibheapkey_t key;
442
      edge_iterator ei;
443
 
444
      bb = fibheap_extract_min (*heap);
445
      bbd[bb->index].heap = NULL;
446
      bbd[bb->index].node = NULL;
447
 
448
      if (dump_file)
449
        fprintf (dump_file, "Getting bb %d\n", bb->index);
450
 
451
      /* If the BB's frequency is too low send BB to the next round.  When
452
         partitioning hot/cold blocks into separate sections, make sure all
453
         the cold blocks (and ONLY the cold blocks) go into the (extra) final
454
         round.  */
455
 
456
      if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
457
                                count_th))
458
        {
459
          int key = bb_to_key (bb);
460
          bbd[bb->index].heap = new_heap;
461
          bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
462
 
463
          if (dump_file)
464
            fprintf (dump_file,
465
                     "  Possible start point of next round: %d (key: %d)\n",
466
                     bb->index, key);
467
          continue;
468
        }
469
 
470
      trace = traces + *n_traces;
471
      trace->first = bb;
472
      trace->round = round;
473
      trace->length = 0;
474
      bbd[bb->index].in_trace = *n_traces;
475
      (*n_traces)++;
476
 
477
      do
478
        {
479
          int prob, freq;
480
          bool ends_in_call;
481
 
482
          /* The probability and frequency of the best edge.  */
483
          int best_prob = INT_MIN / 2;
484
          int best_freq = INT_MIN / 2;
485
 
486
          best_edge = NULL;
487
          mark_bb_visited (bb, *n_traces);
488
          trace->length++;
489
 
490
          if (dump_file)
491
            fprintf (dump_file, "Basic block %d was visited in trace %d\n",
492
                     bb->index, *n_traces - 1);
493
 
494
          ends_in_call = block_ends_with_call_p (bb);
495
 
496
          /* Select the successor that will be placed after BB.  */
497
          FOR_EACH_EDGE (e, ei, bb->succs)
498
            {
499
              gcc_assert (!(e->flags & EDGE_FAKE));
500
 
501
              if (e->dest == EXIT_BLOCK_PTR)
502
                continue;
503
 
504
              if (e->dest->il.rtl->visited
505
                  && e->dest->il.rtl->visited != *n_traces)
506
                continue;
507
 
508
              if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
509
                continue;
510
 
511
              prob = e->probability;
512
              freq = e->dest->frequency;
513
 
514
              /* The only sensible preference for a call instruction is the
515
                 fallthru edge.  Don't bother selecting anything else.  */
516
              if (ends_in_call)
517
                {
518
                  if (e->flags & EDGE_CAN_FALLTHRU)
519
                    {
520
                      best_edge = e;
521
                      best_prob = prob;
522
                      best_freq = freq;
523
                    }
524
                  continue;
525
                }
526
 
527
              /* Edge that cannot be fallthru or improbable or infrequent
528
                 successor (i.e. it is unsuitable successor).  */
529
              if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
530
                  || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
531
                  || e->count < count_th)
532
                continue;
533
 
534
              /* If partitioning hot/cold basic blocks, don't consider edges
535
                 that cross section boundaries.  */
536
 
537
              if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
538
                                 best_edge))
539
                {
540
                  best_edge = e;
541
                  best_prob = prob;
542
                  best_freq = freq;
543
                }
544
            }
545
 
546
          /* If the best destination has multiple predecessors, and can be
547
             duplicated cheaper than a jump, don't allow it to be added
548
             to a trace.  We'll duplicate it when connecting traces.  */
549
          if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
550
              && copy_bb_p (best_edge->dest, 0))
551
            best_edge = NULL;
552
 
553
          /* Add all non-selected successors to the heaps.  */
554
          FOR_EACH_EDGE (e, ei, bb->succs)
555
            {
556
              if (e == best_edge
557
                  || e->dest == EXIT_BLOCK_PTR
558
                  || e->dest->il.rtl->visited)
559
                continue;
560
 
561
              key = bb_to_key (e->dest);
562
 
563
              if (bbd[e->dest->index].heap)
564
                {
565
                  /* E->DEST is already in some heap.  */
566
                  if (key != bbd[e->dest->index].node->key)
567
                    {
568
                      if (dump_file)
569
                        {
570
                          fprintf (dump_file,
571
                                   "Changing key for bb %d from %ld to %ld.\n",
572
                                   e->dest->index,
573
                                   (long) bbd[e->dest->index].node->key,
574
                                   key);
575
                        }
576
                      fibheap_replace_key (bbd[e->dest->index].heap,
577
                                           bbd[e->dest->index].node, key);
578
                    }
579
                }
580
              else
581
                {
582
                  fibheap_t which_heap = *heap;
583
 
584
                  prob = e->probability;
585
                  freq = EDGE_FREQUENCY (e);
586
 
587
                  if (!(e->flags & EDGE_CAN_FALLTHRU)
588
                      || (e->flags & EDGE_COMPLEX)
589
                      || prob < branch_th || freq < exec_th
590
                      || e->count < count_th)
591
                    {
592
                      /* When partitioning hot/cold basic blocks, make sure
593
                         the cold blocks (and only the cold blocks) all get
594
                         pushed to the last round of trace collection.  */
595
 
596
                      if (push_to_next_round_p (e->dest, round,
597
                                                number_of_rounds,
598
                                                exec_th, count_th))
599
                        which_heap = new_heap;
600
                    }
601
 
602
                  bbd[e->dest->index].heap = which_heap;
603
                  bbd[e->dest->index].node = fibheap_insert (which_heap,
604
                                                                key, e->dest);
605
 
606
                  if (dump_file)
607
                    {
608
                      fprintf (dump_file,
609
                               "  Possible start of %s round: %d (key: %ld)\n",
610
                               (which_heap == new_heap) ? "next" : "this",
611
                               e->dest->index, (long) key);
612
                    }
613
 
614
                }
615
            }
616
 
617
          if (best_edge) /* Suitable successor was found.  */
618
            {
619
              if (best_edge->dest->il.rtl->visited == *n_traces)
620
                {
621
                  /* We do nothing with one basic block loops.  */
622
                  if (best_edge->dest != bb)
623
                    {
624
                      if (EDGE_FREQUENCY (best_edge)
625
                          > 4 * best_edge->dest->frequency / 5)
626
                        {
627
                          /* The loop has at least 4 iterations.  If the loop
628
                             header is not the first block of the function
629
                             we can rotate the loop.  */
630
 
631
                          if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
632
                            {
633
                              if (dump_file)
634
                                {
635
                                  fprintf (dump_file,
636
                                           "Rotating loop %d - %d\n",
637
                                           best_edge->dest->index, bb->index);
638
                                }
639
                              bb->aux = best_edge->dest;
640
                              bbd[best_edge->dest->index].in_trace =
641
                                                             (*n_traces) - 1;
642
                              bb = rotate_loop (best_edge, trace, *n_traces);
643
                            }
644
                        }
645
                      else
646
                        {
647
                          /* The loop has less than 4 iterations.  */
648
 
649
                          if (single_succ_p (bb)
650
                              && copy_bb_p (best_edge->dest, !optimize_size))
651
                            {
652
                              bb = copy_bb (best_edge->dest, best_edge, bb,
653
                                            *n_traces);
654
                              trace->length++;
655
                            }
656
                        }
657
                    }
658
 
659
                  /* Terminate the trace.  */
660
                  break;
661
                }
662
              else
663
                {
664
                  /* Check for a situation
665
 
666
                    A
667
                   /|
668
                  B |
669
                   \|
670
                    C
671
 
672
                  where
673
                  EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
674
                    >= EDGE_FREQUENCY (AC).
675
                  (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
676
                  Best ordering is then A B C.
677
 
678
                  This situation is created for example by:
679
 
680
                  if (A) B;
681
                  C;
682
 
683
                  */
684
 
685
                  FOR_EACH_EDGE (e, ei, bb->succs)
686
                    if (e != best_edge
687
                        && (e->flags & EDGE_CAN_FALLTHRU)
688
                        && !(e->flags & EDGE_COMPLEX)
689
                        && !e->dest->il.rtl->visited
690
                        && single_pred_p (e->dest)
691
                        && !(e->flags & EDGE_CROSSING)
692
                        && single_succ_p (e->dest)
693
                        && (single_succ_edge (e->dest)->flags
694
                            & EDGE_CAN_FALLTHRU)
695
                        && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
696
                        && single_succ (e->dest) == best_edge->dest
697
                        && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
698
                      {
699
                        best_edge = e;
700
                        if (dump_file)
701
                          fprintf (dump_file, "Selecting BB %d\n",
702
                                   best_edge->dest->index);
703
                        break;
704
                      }
705
 
706
                  bb->aux = best_edge->dest;
707
                  bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
708
                  bb = best_edge->dest;
709
                }
710
            }
711
        }
712
      while (best_edge);
713
      trace->last = bb;
714
      bbd[trace->first->index].start_of_trace = *n_traces - 1;
715
      bbd[trace->last->index].end_of_trace = *n_traces - 1;
716
 
717
      /* The trace is terminated so we have to recount the keys in heap
718
         (some block can have a lower key because now one of its predecessors
719
         is an end of the trace).  */
720
      FOR_EACH_EDGE (e, ei, bb->succs)
721
        {
722
          if (e->dest == EXIT_BLOCK_PTR
723
              || e->dest->il.rtl->visited)
724
            continue;
725
 
726
          if (bbd[e->dest->index].heap)
727
            {
728
              key = bb_to_key (e->dest);
729
              if (key != bbd[e->dest->index].node->key)
730
                {
731
                  if (dump_file)
732
                    {
733
                      fprintf (dump_file,
734
                               "Changing key for bb %d from %ld to %ld.\n",
735
                               e->dest->index,
736
                               (long) bbd[e->dest->index].node->key, key);
737
                    }
738
                  fibheap_replace_key (bbd[e->dest->index].heap,
739
                                       bbd[e->dest->index].node,
740
                                       key);
741
                }
742
            }
743
        }
744
    }
745
 
746
  fibheap_delete (*heap);
747
 
748
  /* "Return" the new heap.  */
749
  *heap = new_heap;
750
}
751
 
752
/* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
753
   it to trace after BB, mark OLD_BB visited and update pass' data structures
754
   (TRACE is a number of trace which OLD_BB is duplicated to).  */
755
 
756
static basic_block
757
copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
758
{
759
  basic_block new_bb;
760
 
761
  new_bb = duplicate_block (old_bb, e, bb);
762
  BB_COPY_PARTITION (new_bb, old_bb);
763
 
764
  gcc_assert (e->dest == new_bb);
765
  gcc_assert (!e->dest->il.rtl->visited);
766
 
767
  if (dump_file)
768
    fprintf (dump_file,
769
             "Duplicated bb %d (created bb %d)\n",
770
             old_bb->index, new_bb->index);
771
  new_bb->il.rtl->visited = trace;
772
  new_bb->aux = bb->aux;
773
  bb->aux = new_bb;
774
 
775
  if (new_bb->index >= array_size || last_basic_block > array_size)
776
    {
777
      int i;
778
      int new_size;
779
 
780
      new_size = MAX (last_basic_block, new_bb->index + 1);
781
      new_size = GET_ARRAY_SIZE (new_size);
782
      bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
783
      for (i = array_size; i < new_size; i++)
784
        {
785
          bbd[i].start_of_trace = -1;
786
          bbd[i].in_trace = -1;
787
          bbd[i].end_of_trace = -1;
788
          bbd[i].heap = NULL;
789
          bbd[i].node = NULL;
790
        }
791
      array_size = new_size;
792
 
793
      if (dump_file)
794
        {
795
          fprintf (dump_file,
796
                   "Growing the dynamic array to %d elements.\n",
797
                   array_size);
798
        }
799
    }
800
 
801
  bbd[new_bb->index].in_trace = trace;
802
 
803
  return new_bb;
804
}
805
 
806
/* Compute and return the key (for the heap) of the basic block BB.  */
807
 
808
static fibheapkey_t
809
bb_to_key (basic_block bb)
810
{
811
  edge e;
812
  edge_iterator ei;
813
  int priority = 0;
814
 
815
  /* Do not start in probably never executed blocks.  */
816
 
817
  if (BB_PARTITION (bb) == BB_COLD_PARTITION
818
      || probably_never_executed_bb_p (bb))
819
    return BB_FREQ_MAX;
820
 
821
  /* Prefer blocks whose predecessor is an end of some trace
822
     or whose predecessor edge is EDGE_DFS_BACK.  */
823
  FOR_EACH_EDGE (e, ei, bb->preds)
824
    {
825
      if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
826
          || (e->flags & EDGE_DFS_BACK))
827
        {
828
          int edge_freq = EDGE_FREQUENCY (e);
829
 
830
          if (edge_freq > priority)
831
            priority = edge_freq;
832
        }
833
    }
834
 
835
  if (priority)
836
    /* The block with priority should have significantly lower key.  */
837
    return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
838
  return -bb->frequency;
839
}
840
 
841
/* Return true when the edge E from basic block BB is better than the temporary
842
   best edge (details are in function).  The probability of edge E is PROB. The
843
   frequency of the successor is FREQ.  The current best probability is
844
   BEST_PROB, the best frequency is BEST_FREQ.
845
   The edge is considered to be equivalent when PROB does not differ much from
846
   BEST_PROB; similarly for frequency.  */
847
 
848
static bool
849
better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob,
850
               int best_freq, edge cur_best_edge)
851
{
852
  bool is_better_edge;
853
 
854
  /* The BEST_* values do not have to be best, but can be a bit smaller than
855
     maximum values.  */
856
  int diff_prob = best_prob / 10;
857
  int diff_freq = best_freq / 10;
858
 
859
  if (prob > best_prob + diff_prob)
860
    /* The edge has higher probability than the temporary best edge.  */
861
    is_better_edge = true;
862
  else if (prob < best_prob - diff_prob)
863
    /* The edge has lower probability than the temporary best edge.  */
864
    is_better_edge = false;
865
  else if (freq < best_freq - diff_freq)
866
    /* The edge and the temporary best edge  have almost equivalent
867
       probabilities.  The higher frequency of a successor now means
868
       that there is another edge going into that successor.
869
       This successor has lower frequency so it is better.  */
870
    is_better_edge = true;
871
  else if (freq > best_freq + diff_freq)
872
    /* This successor has higher frequency so it is worse.  */
873
    is_better_edge = false;
874
  else if (e->dest->prev_bb == bb)
875
    /* The edges have equivalent probabilities and the successors
876
       have equivalent frequencies.  Select the previous successor.  */
877
    is_better_edge = true;
878
  else
879
    is_better_edge = false;
880
 
881
  /* If we are doing hot/cold partitioning, make sure that we always favor
882
     non-crossing edges over crossing edges.  */
883
 
884
  if (!is_better_edge
885
      && flag_reorder_blocks_and_partition
886
      && cur_best_edge
887
      && (cur_best_edge->flags & EDGE_CROSSING)
888
      && !(e->flags & EDGE_CROSSING))
889
    is_better_edge = true;
890
 
891
  return is_better_edge;
892
}
893
 
894
/* Connect traces in array TRACES, N_TRACES is the count of traces.  */
895
 
896
static void
897
connect_traces (int n_traces, struct trace *traces)
898
{
899
  int i;
900
  bool *connected;
901
  bool two_passes;
902
  int last_trace;
903
  int current_pass;
904
  int current_partition;
905
  int freq_threshold;
906
  gcov_type count_threshold;
907
 
908
  freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
909
  if (max_entry_count < INT_MAX / 1000)
910
    count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
911
  else
912
    count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
913
 
914
  connected = xcalloc (n_traces, sizeof (bool));
915
  last_trace = -1;
916
  current_pass = 1;
917
  current_partition = BB_PARTITION (traces[0].first);
918
  two_passes = false;
919
 
920
  if (flag_reorder_blocks_and_partition)
921
    for (i = 0; i < n_traces && !two_passes; i++)
922
      if (BB_PARTITION (traces[0].first)
923
          != BB_PARTITION (traces[i].first))
924
        two_passes = true;
925
 
926
  for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
927
    {
928
      int t = i;
929
      int t2;
930
      edge e, best;
931
      int best_len;
932
 
933
      if (i >= n_traces)
934
        {
935
          gcc_assert (two_passes && current_pass == 1);
936
          i = 0;
937
          t = i;
938
          current_pass = 2;
939
          if (current_partition == BB_HOT_PARTITION)
940
            current_partition = BB_COLD_PARTITION;
941
          else
942
            current_partition = BB_HOT_PARTITION;
943
        }
944
 
945
      if (connected[t])
946
        continue;
947
 
948
      if (two_passes
949
          && BB_PARTITION (traces[t].first) != current_partition)
950
        continue;
951
 
952
      connected[t] = true;
953
 
954
      /* Find the predecessor traces.  */
955
      for (t2 = t; t2 > 0;)
956
        {
957
          edge_iterator ei;
958
          best = NULL;
959
          best_len = 0;
960
          FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
961
            {
962
              int si = e->src->index;
963
 
964
              if (e->src != ENTRY_BLOCK_PTR
965
                  && (e->flags & EDGE_CAN_FALLTHRU)
966
                  && !(e->flags & EDGE_COMPLEX)
967
                  && bbd[si].end_of_trace >= 0
968
                  && !connected[bbd[si].end_of_trace]
969
                  && (BB_PARTITION (e->src) == current_partition)
970
                  && (!best
971
                      || e->probability > best->probability
972
                      || (e->probability == best->probability
973
                          && traces[bbd[si].end_of_trace].length > best_len)))
974
                {
975
                  best = e;
976
                  best_len = traces[bbd[si].end_of_trace].length;
977
                }
978
            }
979
          if (best)
980
            {
981
              best->src->aux = best->dest;
982
              t2 = bbd[best->src->index].end_of_trace;
983
              connected[t2] = true;
984
 
985
              if (dump_file)
986
                {
987
                  fprintf (dump_file, "Connection: %d %d\n",
988
                           best->src->index, best->dest->index);
989
                }
990
            }
991
          else
992
            break;
993
        }
994
 
995
      if (last_trace >= 0)
996
        traces[last_trace].last->aux = traces[t2].first;
997
      last_trace = t;
998
 
999
      /* Find the successor traces.  */
1000
      while (1)
1001
        {
1002
          /* Find the continuation of the chain.  */
1003
          edge_iterator ei;
1004
          best = NULL;
1005
          best_len = 0;
1006
          FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1007
            {
1008
              int di = e->dest->index;
1009
 
1010
              if (e->dest != EXIT_BLOCK_PTR
1011
                  && (e->flags & EDGE_CAN_FALLTHRU)
1012
                  && !(e->flags & EDGE_COMPLEX)
1013
                  && bbd[di].start_of_trace >= 0
1014
                  && !connected[bbd[di].start_of_trace]
1015
                  && (BB_PARTITION (e->dest) == current_partition)
1016
                  && (!best
1017
                      || e->probability > best->probability
1018
                      || (e->probability == best->probability
1019
                          && traces[bbd[di].start_of_trace].length > best_len)))
1020
                {
1021
                  best = e;
1022
                  best_len = traces[bbd[di].start_of_trace].length;
1023
                }
1024
            }
1025
 
1026
          if (best)
1027
            {
1028
              if (dump_file)
1029
                {
1030
                  fprintf (dump_file, "Connection: %d %d\n",
1031
                           best->src->index, best->dest->index);
1032
                }
1033
              t = bbd[best->dest->index].start_of_trace;
1034
              traces[last_trace].last->aux = traces[t].first;
1035
              connected[t] = true;
1036
              last_trace = t;
1037
            }
1038
          else
1039
            {
1040
              /* Try to connect the traces by duplication of 1 block.  */
1041
              edge e2;
1042
              basic_block next_bb = NULL;
1043
              bool try_copy = false;
1044
 
1045
              FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1046
                if (e->dest != EXIT_BLOCK_PTR
1047
                    && (e->flags & EDGE_CAN_FALLTHRU)
1048
                    && !(e->flags & EDGE_COMPLEX)
1049
                    && (!best || e->probability > best->probability))
1050
                  {
1051
                    edge_iterator ei;
1052
                    edge best2 = NULL;
1053
                    int best2_len = 0;
1054
 
1055
                    /* If the destination is a start of a trace which is only
1056
                       one block long, then no need to search the successor
1057
                       blocks of the trace.  Accept it.  */
1058
                    if (bbd[e->dest->index].start_of_trace >= 0
1059
                        && traces[bbd[e->dest->index].start_of_trace].length
1060
                           == 1)
1061
                      {
1062
                        best = e;
1063
                        try_copy = true;
1064
                        continue;
1065
                      }
1066
 
1067
                    FOR_EACH_EDGE (e2, ei, e->dest->succs)
1068
                      {
1069
                        int di = e2->dest->index;
1070
 
1071
                        if (e2->dest == EXIT_BLOCK_PTR
1072
                            || ((e2->flags & EDGE_CAN_FALLTHRU)
1073
                                && !(e2->flags & EDGE_COMPLEX)
1074
                                && bbd[di].start_of_trace >= 0
1075
                                && !connected[bbd[di].start_of_trace]
1076
                                && (BB_PARTITION (e2->dest) == current_partition)
1077
                                && (EDGE_FREQUENCY (e2) >= freq_threshold)
1078
                                && (e2->count >= count_threshold)
1079
                                && (!best2
1080
                                    || e2->probability > best2->probability
1081
                                    || (e2->probability == best2->probability
1082
                                        && traces[bbd[di].start_of_trace].length
1083
                                           > best2_len))))
1084
                          {
1085
                            best = e;
1086
                            best2 = e2;
1087
                            if (e2->dest != EXIT_BLOCK_PTR)
1088
                              best2_len = traces[bbd[di].start_of_trace].length;
1089
                            else
1090
                              best2_len = INT_MAX;
1091
                            next_bb = e2->dest;
1092
                            try_copy = true;
1093
                          }
1094
                      }
1095
                  }
1096
 
1097
              if (flag_reorder_blocks_and_partition)
1098
                try_copy = false;
1099
 
1100
              /* Copy tiny blocks always; copy larger blocks only when the
1101
                 edge is traversed frequently enough.  */
1102
              if (try_copy
1103
                  && copy_bb_p (best->dest,
1104
                                !optimize_size
1105
                                && EDGE_FREQUENCY (best) >= freq_threshold
1106
                                && best->count >= count_threshold))
1107
                {
1108
                  basic_block new_bb;
1109
 
1110
                  if (dump_file)
1111
                    {
1112
                      fprintf (dump_file, "Connection: %d %d ",
1113
                               traces[t].last->index, best->dest->index);
1114
                      if (!next_bb)
1115
                        fputc ('\n', dump_file);
1116
                      else if (next_bb == EXIT_BLOCK_PTR)
1117
                        fprintf (dump_file, "exit\n");
1118
                      else
1119
                        fprintf (dump_file, "%d\n", next_bb->index);
1120
                    }
1121
 
1122
                  new_bb = copy_bb (best->dest, best, traces[t].last, t);
1123
                  traces[t].last = new_bb;
1124
                  if (next_bb && next_bb != EXIT_BLOCK_PTR)
1125
                    {
1126
                      t = bbd[next_bb->index].start_of_trace;
1127
                      traces[last_trace].last->aux = traces[t].first;
1128
                      connected[t] = true;
1129
                      last_trace = t;
1130
                    }
1131
                  else
1132
                    break;      /* Stop finding the successor traces.  */
1133
                }
1134
              else
1135
                break;  /* Stop finding the successor traces.  */
1136
            }
1137
        }
1138
    }
1139
 
1140
  if (dump_file)
1141
    {
1142
      basic_block bb;
1143
 
1144
      fprintf (dump_file, "Final order:\n");
1145
      for (bb = traces[0].first; bb; bb = bb->aux)
1146
        fprintf (dump_file, "%d ", bb->index);
1147
      fprintf (dump_file, "\n");
1148
      fflush (dump_file);
1149
    }
1150
 
1151
  FREE (connected);
1152
}
1153
 
1154
/* Return true when BB can and should be copied. CODE_MAY_GROW is true
1155
   when code size is allowed to grow by duplication.  */
1156
 
1157
static bool
1158
copy_bb_p (basic_block bb, int code_may_grow)
1159
{
1160
  int size = 0;
1161
  int max_size = uncond_jump_length;
1162
  rtx insn;
1163
 
1164
  if (!bb->frequency)
1165
    return false;
1166
  if (EDGE_COUNT (bb->preds) < 2)
1167
    return false;
1168
  if (!can_duplicate_block_p (bb))
1169
    return false;
1170
 
1171
  /* Avoid duplicating blocks which have many successors (PR/13430).  */
1172
  if (EDGE_COUNT (bb->succs) > 8)
1173
    return false;
1174
 
1175
  if (code_may_grow && maybe_hot_bb_p (bb))
1176
    max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1177
 
1178
  FOR_BB_INSNS (bb, insn)
1179
    {
1180
      if (INSN_P (insn))
1181
        size += get_attr_min_length (insn);
1182
    }
1183
 
1184
  if (size <= max_size)
1185
    return true;
1186
 
1187
  if (dump_file)
1188
    {
1189
      fprintf (dump_file,
1190
               "Block %d can't be copied because its size = %d.\n",
1191
               bb->index, size);
1192
    }
1193
 
1194
  return false;
1195
}
1196
 
1197
/* Return the length of unconditional jump instruction.  */
1198
 
1199
static int
1200
get_uncond_jump_length (void)
1201
{
1202
  rtx label, jump;
1203
  int length;
1204
 
1205
  label = emit_label_before (gen_label_rtx (), get_insns ());
1206
  jump = emit_jump_insn (gen_jump (label));
1207
 
1208
  length = get_attr_min_length (jump);
1209
 
1210
  delete_insn (jump);
1211
  delete_insn (label);
1212
  return length;
1213
}
1214
 
1215
/* Find the basic blocks that are rarely executed and need to be moved to
1216
   a separate section of the .o file (to cut down on paging and improve
1217
   cache locality).  */
1218
 
1219
static void
1220
find_rarely_executed_basic_blocks_and_crossing_edges (edge *crossing_edges,
1221
                                                      int *n_crossing_edges,
1222
                                                      int *max_idx)
1223
{
1224
  basic_block bb;
1225
  bool has_hot_blocks = false;
1226
  edge e;
1227
  int i;
1228
  edge_iterator ei;
1229
 
1230
  /* Mark which partition (hot/cold) each basic block belongs in.  */
1231
 
1232
  FOR_EACH_BB (bb)
1233
    {
1234
      if (probably_never_executed_bb_p (bb))
1235
        BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1236
      else
1237
        {
1238
          BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1239
          has_hot_blocks = true;
1240
        }
1241
    }
1242
 
1243
  /* Mark every edge that crosses between sections.  */
1244
 
1245
  i = 0;
1246
  FOR_EACH_BB (bb)
1247
    FOR_EACH_EDGE (e, ei, bb->succs)
1248
    {
1249
      if (e->src != ENTRY_BLOCK_PTR
1250
          && e->dest != EXIT_BLOCK_PTR
1251
          && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1252
        {
1253
          e->flags |= EDGE_CROSSING;
1254
          if (i == *max_idx)
1255
            {
1256
              *max_idx *= 2;
1257
              crossing_edges = xrealloc (crossing_edges,
1258
                                         (*max_idx) * sizeof (edge));
1259
            }
1260
          crossing_edges[i++] = e;
1261
        }
1262
      else
1263
        e->flags &= ~EDGE_CROSSING;
1264
    }
1265
  *n_crossing_edges = i;
1266
}
1267
 
1268
/* If any destination of a crossing edge does not have a label, add label;
1269
   Convert any fall-through crossing edges (for blocks that do not contain
1270
   a jump) to unconditional jumps.  */
1271
 
1272
static void
1273
add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges)
1274
{
1275
  int i;
1276
  basic_block src;
1277
  basic_block dest;
1278
  rtx label;
1279
  rtx barrier;
1280
  rtx new_jump;
1281
 
1282
  for (i=0; i < n_crossing_edges; i++)
1283
    {
1284
      if (crossing_edges[i])
1285
        {
1286
          src = crossing_edges[i]->src;
1287
          dest = crossing_edges[i]->dest;
1288
 
1289
          /* Make sure dest has a label.  */
1290
 
1291
          if (dest && (dest != EXIT_BLOCK_PTR))
1292
            {
1293
              label = block_label (dest);
1294
 
1295
              /* Make sure source block ends with a jump.  */
1296
 
1297
              if (src && (src != ENTRY_BLOCK_PTR))
1298
                {
1299
                  if (!JUMP_P (BB_END (src)))
1300
                    /* bb just falls through.  */
1301
                    {
1302
                      /* make sure there's only one successor */
1303
                      gcc_assert (single_succ_p (src));
1304
 
1305
                      /* Find label in dest block.  */
1306
                      label = block_label (dest);
1307
 
1308
                      new_jump = emit_jump_insn_after (gen_jump (label),
1309
                                                       BB_END (src));
1310
                      barrier = emit_barrier_after (new_jump);
1311
                      JUMP_LABEL (new_jump) = label;
1312
                      LABEL_NUSES (label) += 1;
1313
                      src->il.rtl->footer = unlink_insn_chain (barrier, barrier);
1314
                      /* Mark edge as non-fallthru.  */
1315
                      crossing_edges[i]->flags &= ~EDGE_FALLTHRU;
1316
                    } /* end: 'if (GET_CODE ... '  */
1317
                } /* end: 'if (src && src->index...'  */
1318
            } /* end: 'if (dest && dest->index...'  */
1319
        } /* end: 'if (crossing_edges[i]...'  */
1320
    } /* end for loop  */
1321
}
1322
 
1323
/* Find any bb's where the fall-through edge is a crossing edge (note that
1324
   these bb's must also contain a conditional jump; we've already
1325
   dealt with fall-through edges for blocks that didn't have a
1326
   conditional jump in the call to add_labels_and_missing_jumps).
1327
   Convert the fall-through edge to non-crossing edge by inserting a
1328
   new bb to fall-through into.  The new bb will contain an
1329
   unconditional jump (crossing edge) to the original fall through
1330
   destination.  */
1331
 
1332
static void
1333
fix_up_fall_thru_edges (void)
1334
{
1335
  basic_block cur_bb;
1336
  basic_block new_bb;
1337
  edge succ1;
1338
  edge succ2;
1339
  edge fall_thru;
1340
  edge cond_jump = NULL;
1341
  edge e;
1342
  bool cond_jump_crosses;
1343
  int invert_worked;
1344
  rtx old_jump;
1345
  rtx fall_thru_label;
1346
  rtx barrier;
1347
 
1348
  FOR_EACH_BB (cur_bb)
1349
    {
1350
      fall_thru = NULL;
1351
      if (EDGE_COUNT (cur_bb->succs) > 0)
1352
        succ1 = EDGE_SUCC (cur_bb, 0);
1353
      else
1354
        succ1 = NULL;
1355
 
1356
      if (EDGE_COUNT (cur_bb->succs) > 1)
1357
        succ2 = EDGE_SUCC (cur_bb, 1);
1358
      else
1359
        succ2 = NULL;
1360
 
1361
      /* Find the fall-through edge.  */
1362
 
1363
      if (succ1
1364
          && (succ1->flags & EDGE_FALLTHRU))
1365
        {
1366
          fall_thru = succ1;
1367
          cond_jump = succ2;
1368
        }
1369
      else if (succ2
1370
               && (succ2->flags & EDGE_FALLTHRU))
1371
        {
1372
          fall_thru = succ2;
1373
          cond_jump = succ1;
1374
        }
1375
 
1376
      if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
1377
        {
1378
          /* Check to see if the fall-thru edge is a crossing edge.  */
1379
 
1380
          if (fall_thru->flags & EDGE_CROSSING)
1381
            {
1382
              /* The fall_thru edge crosses; now check the cond jump edge, if
1383
                 it exists.  */
1384
 
1385
              cond_jump_crosses = true;
1386
              invert_worked  = 0;
1387
              old_jump = BB_END (cur_bb);
1388
 
1389
              /* Find the jump instruction, if there is one.  */
1390
 
1391
              if (cond_jump)
1392
                {
1393
                  if (!(cond_jump->flags & EDGE_CROSSING))
1394
                    cond_jump_crosses = false;
1395
 
1396
                  /* We know the fall-thru edge crosses; if the cond
1397
                     jump edge does NOT cross, and its destination is the
1398
                     next block in the bb order, invert the jump
1399
                     (i.e. fix it so the fall thru does not cross and
1400
                     the cond jump does).  */
1401
 
1402
                  if (!cond_jump_crosses
1403
                      && cur_bb->aux == cond_jump->dest)
1404
                    {
1405
                      /* Find label in fall_thru block. We've already added
1406
                         any missing labels, so there must be one.  */
1407
 
1408
                      fall_thru_label = block_label (fall_thru->dest);
1409
 
1410
                      if (old_jump && fall_thru_label)
1411
                        invert_worked = invert_jump (old_jump,
1412
                                                     fall_thru_label,0);
1413
                      if (invert_worked)
1414
                        {
1415
                          fall_thru->flags &= ~EDGE_FALLTHRU;
1416
                          cond_jump->flags |= EDGE_FALLTHRU;
1417
                          update_br_prob_note (cur_bb);
1418
                          e = fall_thru;
1419
                          fall_thru = cond_jump;
1420
                          cond_jump = e;
1421
                          cond_jump->flags |= EDGE_CROSSING;
1422
                          fall_thru->flags &= ~EDGE_CROSSING;
1423
                        }
1424
                    }
1425
                }
1426
 
1427
              if (cond_jump_crosses || !invert_worked)
1428
                {
1429
                  /* This is the case where both edges out of the basic
1430
                     block are crossing edges. Here we will fix up the
1431
                     fall through edge. The jump edge will be taken care
1432
                     of later.  */
1433
 
1434
                  new_bb = force_nonfallthru (fall_thru);
1435
 
1436
                  if (new_bb)
1437
                    {
1438
                      new_bb->aux = cur_bb->aux;
1439
                      cur_bb->aux = new_bb;
1440
 
1441
                      /* Make sure new fall-through bb is in same
1442
                         partition as bb it's falling through from.  */
1443
 
1444
                      BB_COPY_PARTITION (new_bb, cur_bb);
1445
                      single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1446
                    }
1447
 
1448
                  /* Add barrier after new jump */
1449
 
1450
                  if (new_bb)
1451
                    {
1452
                      barrier = emit_barrier_after (BB_END (new_bb));
1453
                      new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1454
                                                               barrier);
1455
                    }
1456
                  else
1457
                    {
1458
                      barrier = emit_barrier_after (BB_END (cur_bb));
1459
                      cur_bb->il.rtl->footer = unlink_insn_chain (barrier,
1460
                                                               barrier);
1461
                    }
1462
                }
1463
            }
1464
        }
1465
    }
1466
}
1467
 
1468
/* This function checks the destination blockof a "crossing jump" to
1469
   see if it has any crossing predecessors that begin with a code label
1470
   and end with an unconditional jump.  If so, it returns that predecessor
1471
   block.  (This is to avoid creating lots of new basic blocks that all
1472
   contain unconditional jumps to the same destination).  */
1473
 
1474
static basic_block
1475
find_jump_block (basic_block jump_dest)
1476
{
1477
  basic_block source_bb = NULL;
1478
  edge e;
1479
  rtx insn;
1480
  edge_iterator ei;
1481
 
1482
  FOR_EACH_EDGE (e, ei, jump_dest->preds)
1483
    if (e->flags & EDGE_CROSSING)
1484
      {
1485
        basic_block src = e->src;
1486
 
1487
        /* Check each predecessor to see if it has a label, and contains
1488
           only one executable instruction, which is an unconditional jump.
1489
           If so, we can use it.  */
1490
 
1491
        if (LABEL_P (BB_HEAD (src)))
1492
          for (insn = BB_HEAD (src);
1493
               !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1494
               insn = NEXT_INSN (insn))
1495
            {
1496
              if (INSN_P (insn)
1497
                  && insn == BB_END (src)
1498
                  && JUMP_P (insn)
1499
                  && !any_condjump_p (insn))
1500
                {
1501
                  source_bb = src;
1502
                  break;
1503
                }
1504
            }
1505
 
1506
        if (source_bb)
1507
          break;
1508
      }
1509
 
1510
  return source_bb;
1511
}
1512
 
1513
/* Find all BB's with conditional jumps that are crossing edges;
1514
   insert a new bb and make the conditional jump branch to the new
1515
   bb instead (make the new bb same color so conditional branch won't
1516
   be a 'crossing' edge).  Insert an unconditional jump from the
1517
   new bb to the original destination of the conditional jump.  */
1518
 
1519
static void
1520
fix_crossing_conditional_branches (void)
1521
{
1522
  basic_block cur_bb;
1523
  basic_block new_bb;
1524
  basic_block last_bb;
1525
  basic_block dest;
1526
  basic_block prev_bb;
1527
  edge succ1;
1528
  edge succ2;
1529
  edge crossing_edge;
1530
  edge new_edge;
1531
  rtx old_jump;
1532
  rtx set_src;
1533
  rtx old_label = NULL_RTX;
1534
  rtx new_label;
1535
  rtx new_jump;
1536
  rtx barrier;
1537
 
1538
 last_bb = EXIT_BLOCK_PTR->prev_bb;
1539
 
1540
  FOR_EACH_BB (cur_bb)
1541
    {
1542
      crossing_edge = NULL;
1543
      if (EDGE_COUNT (cur_bb->succs) > 0)
1544
        succ1 = EDGE_SUCC (cur_bb, 0);
1545
      else
1546
        succ1 = NULL;
1547
 
1548
      if (EDGE_COUNT (cur_bb->succs) > 1)
1549
        succ2 = EDGE_SUCC (cur_bb, 1);
1550
      else
1551
        succ2 = NULL;
1552
 
1553
      /* We already took care of fall-through edges, so only one successor
1554
         can be a crossing edge.  */
1555
 
1556
      if (succ1 && (succ1->flags & EDGE_CROSSING))
1557
        crossing_edge = succ1;
1558
      else if (succ2 && (succ2->flags & EDGE_CROSSING))
1559
        crossing_edge = succ2;
1560
 
1561
      if (crossing_edge)
1562
        {
1563
          old_jump = BB_END (cur_bb);
1564
 
1565
          /* Check to make sure the jump instruction is a
1566
             conditional jump.  */
1567
 
1568
          set_src = NULL_RTX;
1569
 
1570
          if (any_condjump_p (old_jump))
1571
            {
1572
              if (GET_CODE (PATTERN (old_jump)) == SET)
1573
                set_src = SET_SRC (PATTERN (old_jump));
1574
              else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1575
                {
1576
                  set_src = XVECEXP (PATTERN (old_jump), 0,0);
1577
                  if (GET_CODE (set_src) == SET)
1578
                    set_src = SET_SRC (set_src);
1579
                  else
1580
                    set_src = NULL_RTX;
1581
                }
1582
            }
1583
 
1584
          if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1585
            {
1586
              if (GET_CODE (XEXP (set_src, 1)) == PC)
1587
                old_label = XEXP (set_src, 2);
1588
              else if (GET_CODE (XEXP (set_src, 2)) == PC)
1589
                old_label = XEXP (set_src, 1);
1590
 
1591
              /* Check to see if new bb for jumping to that dest has
1592
                 already been created; if so, use it; if not, create
1593
                 a new one.  */
1594
 
1595
              new_bb = find_jump_block (crossing_edge->dest);
1596
 
1597
              if (new_bb)
1598
                new_label = block_label (new_bb);
1599
              else
1600
                {
1601
                  /* Create new basic block to be dest for
1602
                     conditional jump.  */
1603
 
1604
                  new_bb = create_basic_block (NULL, NULL, last_bb);
1605
                  new_bb->aux = last_bb->aux;
1606
                  last_bb->aux = new_bb;
1607
                  prev_bb = last_bb;
1608
                  last_bb = new_bb;
1609
 
1610
                  /* Update register liveness information.  */
1611
 
1612
                  new_bb->il.rtl->global_live_at_start = ALLOC_REG_SET (&reg_obstack);
1613
                  new_bb->il.rtl->global_live_at_end = ALLOC_REG_SET (&reg_obstack);
1614
                  COPY_REG_SET (new_bb->il.rtl->global_live_at_end,
1615
                                prev_bb->il.rtl->global_live_at_end);
1616
                  COPY_REG_SET (new_bb->il.rtl->global_live_at_start,
1617
                                prev_bb->il.rtl->global_live_at_end);
1618
 
1619
                  /* Put appropriate instructions in new bb.  */
1620
 
1621
                  new_label = gen_label_rtx ();
1622
                  emit_label_before (new_label, BB_HEAD (new_bb));
1623
                  BB_HEAD (new_bb) = new_label;
1624
 
1625
                  if (GET_CODE (old_label) == LABEL_REF)
1626
                    {
1627
                      old_label = JUMP_LABEL (old_jump);
1628
                      new_jump = emit_jump_insn_after (gen_jump
1629
                                                       (old_label),
1630
                                                       BB_END (new_bb));
1631
                    }
1632
                  else
1633
                    {
1634
                      gcc_assert (HAVE_return
1635
                                  && GET_CODE (old_label) == RETURN);
1636
                      new_jump = emit_jump_insn_after (gen_return (),
1637
                                                       BB_END (new_bb));
1638
                    }
1639
 
1640
                  barrier = emit_barrier_after (new_jump);
1641
                  JUMP_LABEL (new_jump) = old_label;
1642
                  new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1643
                                                           barrier);
1644
 
1645
                  /* Make sure new bb is in same partition as source
1646
                     of conditional branch.  */
1647
                  BB_COPY_PARTITION (new_bb, cur_bb);
1648
                }
1649
 
1650
              /* Make old jump branch to new bb.  */
1651
 
1652
              redirect_jump (old_jump, new_label, 0);
1653
 
1654
              /* Remove crossing_edge as predecessor of 'dest'.  */
1655
 
1656
              dest = crossing_edge->dest;
1657
 
1658
              redirect_edge_succ (crossing_edge, new_bb);
1659
 
1660
              /* Make a new edge from new_bb to old dest; new edge
1661
                 will be a successor for new_bb and a predecessor
1662
                 for 'dest'.  */
1663
 
1664
              if (EDGE_COUNT (new_bb->succs) == 0)
1665
                new_edge = make_edge (new_bb, dest, 0);
1666
              else
1667
                new_edge = EDGE_SUCC (new_bb, 0);
1668
 
1669
              crossing_edge->flags &= ~EDGE_CROSSING;
1670
              new_edge->flags |= EDGE_CROSSING;
1671
            }
1672
        }
1673
    }
1674
}
1675
 
1676
/* Find any unconditional branches that cross between hot and cold
1677
   sections.  Convert them into indirect jumps instead.  */
1678
 
1679
static void
1680
fix_crossing_unconditional_branches (void)
1681
{
1682
  basic_block cur_bb;
1683
  rtx last_insn;
1684
  rtx label;
1685
  rtx label_addr;
1686
  rtx indirect_jump_sequence;
1687
  rtx jump_insn = NULL_RTX;
1688
  rtx new_reg;
1689
  rtx cur_insn;
1690
  edge succ;
1691
 
1692
  FOR_EACH_BB (cur_bb)
1693
    {
1694
      last_insn = BB_END (cur_bb);
1695
 
1696
      if (EDGE_COUNT (cur_bb->succs) < 1)
1697
        continue;
1698
 
1699
      succ = EDGE_SUCC (cur_bb, 0);
1700
 
1701
      /* Check to see if bb ends in a crossing (unconditional) jump.  At
1702
         this point, no crossing jumps should be conditional.  */
1703
 
1704
      if (JUMP_P (last_insn)
1705
          && (succ->flags & EDGE_CROSSING))
1706
        {
1707
          rtx label2, table;
1708
 
1709
          gcc_assert (!any_condjump_p (last_insn));
1710
 
1711
          /* Make sure the jump is not already an indirect or table jump.  */
1712
 
1713
          if (!computed_jump_p (last_insn)
1714
              && !tablejump_p (last_insn, &label2, &table))
1715
            {
1716
              /* We have found a "crossing" unconditional branch.  Now
1717
                 we must convert it to an indirect jump.  First create
1718
                 reference of label, as target for jump.  */
1719
 
1720
              label = JUMP_LABEL (last_insn);
1721
              label_addr = gen_rtx_LABEL_REF (Pmode, label);
1722
              LABEL_NUSES (label) += 1;
1723
 
1724
              /* Get a register to use for the indirect jump.  */
1725
 
1726
              new_reg = gen_reg_rtx (Pmode);
1727
 
1728
              /* Generate indirect the jump sequence.  */
1729
 
1730
              start_sequence ();
1731
              emit_move_insn (new_reg, label_addr);
1732
              emit_indirect_jump (new_reg);
1733
              indirect_jump_sequence = get_insns ();
1734
              end_sequence ();
1735
 
1736
              /* Make sure every instruction in the new jump sequence has
1737
                 its basic block set to be cur_bb.  */
1738
 
1739
              for (cur_insn = indirect_jump_sequence; cur_insn;
1740
                   cur_insn = NEXT_INSN (cur_insn))
1741
                {
1742
                  if (!BARRIER_P (cur_insn))
1743
                    BLOCK_FOR_INSN (cur_insn) = cur_bb;
1744
                  if (JUMP_P (cur_insn))
1745
                    jump_insn = cur_insn;
1746
                }
1747
 
1748
              /* Insert the new (indirect) jump sequence immediately before
1749
                 the unconditional jump, then delete the unconditional jump.  */
1750
 
1751
              emit_insn_before (indirect_jump_sequence, last_insn);
1752
              delete_insn (last_insn);
1753
 
1754
              /* Make BB_END for cur_bb be the jump instruction (NOT the
1755
                 barrier instruction at the end of the sequence...).  */
1756
 
1757
              BB_END (cur_bb) = jump_insn;
1758
            }
1759
        }
1760
    }
1761
}
1762
 
1763
/* Add REG_CROSSING_JUMP note to all crossing jump insns.  */
1764
 
1765
static void
1766
add_reg_crossing_jump_notes (void)
1767
{
1768
  basic_block bb;
1769
  edge e;
1770
  edge_iterator ei;
1771
 
1772
  FOR_EACH_BB (bb)
1773
    FOR_EACH_EDGE (e, ei, bb->succs)
1774
      if ((e->flags & EDGE_CROSSING)
1775
          && JUMP_P (BB_END (e->src)))
1776
        REG_NOTES (BB_END (e->src)) = gen_rtx_EXPR_LIST (REG_CROSSING_JUMP,
1777
                                                         NULL_RTX,
1778
                                                         REG_NOTES (BB_END
1779
                                                                  (e->src)));
1780
}
1781
 
1782
/* Hot and cold basic blocks are partitioned and put in separate
1783
   sections of the .o file, to reduce paging and improve cache
1784
   performance (hopefully).  This can result in bits of code from the
1785
   same function being widely separated in the .o file.  However this
1786
   is not obvious to the current bb structure.  Therefore we must take
1787
   care to ensure that: 1). There are no fall_thru edges that cross
1788
   between sections; 2). For those architectures which have "short"
1789
   conditional branches, all conditional branches that attempt to
1790
   cross between sections are converted to unconditional branches;
1791
   and, 3). For those architectures which have "short" unconditional
1792
   branches, all unconditional branches that attempt to cross between
1793
   sections are converted to indirect jumps.
1794
 
1795
   The code for fixing up fall_thru edges that cross between hot and
1796
   cold basic blocks does so by creating new basic blocks containing
1797
   unconditional branches to the appropriate label in the "other"
1798
   section.  The new basic block is then put in the same (hot or cold)
1799
   section as the original conditional branch, and the fall_thru edge
1800
   is modified to fall into the new basic block instead.  By adding
1801
   this level of indirection we end up with only unconditional branches
1802
   crossing between hot and cold sections.
1803
 
1804
   Conditional branches are dealt with by adding a level of indirection.
1805
   A new basic block is added in the same (hot/cold) section as the
1806
   conditional branch, and the conditional branch is retargeted to the
1807
   new basic block.  The new basic block contains an unconditional branch
1808
   to the original target of the conditional branch (in the other section).
1809
 
1810
   Unconditional branches are dealt with by converting them into
1811
   indirect jumps.  */
1812
 
1813
static void
1814
fix_edges_for_rarely_executed_code (edge *crossing_edges,
1815
                                    int n_crossing_edges)
1816
{
1817
  /* Make sure the source of any crossing edge ends in a jump and the
1818
     destination of any crossing edge has a label.  */
1819
 
1820
  add_labels_and_missing_jumps (crossing_edges, n_crossing_edges);
1821
 
1822
  /* Convert all crossing fall_thru edges to non-crossing fall
1823
     thrus to unconditional jumps (that jump to the original fall
1824
     thru dest).  */
1825
 
1826
  fix_up_fall_thru_edges ();
1827
 
1828
  /* If the architecture does not have conditional branches that can
1829
     span all of memory, convert crossing conditional branches into
1830
     crossing unconditional branches.  */
1831
 
1832
  if (!HAS_LONG_COND_BRANCH)
1833
    fix_crossing_conditional_branches ();
1834
 
1835
  /* If the architecture does not have unconditional branches that
1836
     can span all of memory, convert crossing unconditional branches
1837
     into indirect jumps.  Since adding an indirect jump also adds
1838
     a new register usage, update the register usage information as
1839
     well.  */
1840
 
1841
  if (!HAS_LONG_UNCOND_BRANCH)
1842
    {
1843
      fix_crossing_unconditional_branches ();
1844
      reg_scan (get_insns(), max_reg_num ());
1845
    }
1846
 
1847
  add_reg_crossing_jump_notes ();
1848
}
1849
 
1850
/* Verify, in the basic block chain, that there is at most one switch
1851
   between hot/cold partitions. This is modelled on
1852
   rtl_verify_flow_info_1, but it cannot go inside that function
1853
   because this condition will not be true until after
1854
   reorder_basic_blocks is called.  */
1855
 
1856
static void
1857
verify_hot_cold_block_grouping (void)
1858
{
1859
  basic_block bb;
1860
  int err = 0;
1861
  bool switched_sections = false;
1862
  int current_partition = 0;
1863
 
1864
  FOR_EACH_BB (bb)
1865
    {
1866
      if (!current_partition)
1867
        current_partition = BB_PARTITION (bb);
1868
      if (BB_PARTITION (bb) != current_partition)
1869
        {
1870
          if (switched_sections)
1871
            {
1872
              error ("multiple hot/cold transitions found (bb %i)",
1873
                     bb->index);
1874
              err = 1;
1875
            }
1876
          else
1877
            {
1878
              switched_sections = true;
1879
              current_partition = BB_PARTITION (bb);
1880
            }
1881
        }
1882
    }
1883
 
1884
  gcc_assert(!err);
1885
}
1886
 
1887
/* Reorder basic blocks.  The main entry point to this file.  FLAGS is
1888
   the set of flags to pass to cfg_layout_initialize().  */
1889
 
1890
void
1891
reorder_basic_blocks (unsigned int flags)
1892
{
1893
  int n_traces;
1894
  int i;
1895
  struct trace *traces;
1896
 
1897
  if (n_basic_blocks <= 1)
1898
    return;
1899
 
1900
  if (targetm.cannot_modify_jumps_p ())
1901
    return;
1902
 
1903
  cfg_layout_initialize (flags);
1904
 
1905
  set_edge_can_fallthru_flag ();
1906
  mark_dfs_back_edges ();
1907
 
1908
  /* We are estimating the length of uncond jump insn only once since the code
1909
     for getting the insn length always returns the minimal length now.  */
1910
  if (uncond_jump_length == 0)
1911
    uncond_jump_length = get_uncond_jump_length ();
1912
 
1913
  /* We need to know some information for each basic block.  */
1914
  array_size = GET_ARRAY_SIZE (last_basic_block);
1915
  bbd = xmalloc (array_size * sizeof (bbro_basic_block_data));
1916
  for (i = 0; i < array_size; i++)
1917
    {
1918
      bbd[i].start_of_trace = -1;
1919
      bbd[i].in_trace = -1;
1920
      bbd[i].end_of_trace = -1;
1921
      bbd[i].heap = NULL;
1922
      bbd[i].node = NULL;
1923
    }
1924
 
1925
  traces = xmalloc (n_basic_blocks * sizeof (struct trace));
1926
  n_traces = 0;
1927
  find_traces (&n_traces, traces);
1928
  connect_traces (n_traces, traces);
1929
  FREE (traces);
1930
  FREE (bbd);
1931
 
1932
  if (dump_file)
1933
    dump_flow_info (dump_file);
1934
 
1935
  cfg_layout_finalize ();
1936
  if (flag_reorder_blocks_and_partition)
1937
    verify_hot_cold_block_grouping ();
1938
}
1939
 
1940
/* Determine which partition the first basic block in the function
1941
   belongs to, then find the first basic block in the current function
1942
   that belongs to a different section, and insert a
1943
   NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
1944
   instruction stream.  When writing out the assembly code,
1945
   encountering this note will make the compiler switch between the
1946
   hot and cold text sections.  */
1947
 
1948
static void
1949
insert_section_boundary_note (void)
1950
{
1951
  basic_block bb;
1952
  rtx new_note;
1953
  int first_partition = 0;
1954
 
1955
  if (flag_reorder_blocks_and_partition)
1956
    FOR_EACH_BB (bb)
1957
    {
1958
      if (!first_partition)
1959
        first_partition = BB_PARTITION (bb);
1960
      if (BB_PARTITION (bb) != first_partition)
1961
        {
1962
          new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
1963
                                       BB_HEAD (bb));
1964
          break;
1965
        }
1966
    }
1967
}
1968
 
1969
/* Duplicate the blocks containing computed gotos.  This basically unfactors
1970
   computed gotos that were factored early on in the compilation process to
1971
   speed up edge based data flow.  We used to not unfactoring them again,
1972
   which can seriously pessimize code with many computed jumps in the source
1973
   code, such as interpreters.  See e.g. PR15242.  */
1974
 
1975
static bool
1976
gate_duplicate_computed_gotos (void)
1977
{
1978
  return (optimize > 0 && flag_expensive_optimizations && !optimize_size);
1979
}
1980
 
1981
 
1982
static void
1983
duplicate_computed_gotos (void)
1984
{
1985
  basic_block bb, new_bb;
1986
  bitmap candidates;
1987
  int max_size;
1988
 
1989
  if (n_basic_blocks <= 1)
1990
    return;
1991
 
1992
  if (targetm.cannot_modify_jumps_p ())
1993
    return;
1994
 
1995
  cfg_layout_initialize (0);
1996
 
1997
  /* We are estimating the length of uncond jump insn only once
1998
     since the code for getting the insn length always returns
1999
     the minimal length now.  */
2000
  if (uncond_jump_length == 0)
2001
    uncond_jump_length = get_uncond_jump_length ();
2002
 
2003
  max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2004
  candidates = BITMAP_ALLOC (NULL);
2005
 
2006
  /* Look for blocks that end in a computed jump, and see if such blocks
2007
     are suitable for unfactoring.  If a block is a candidate for unfactoring,
2008
     mark it in the candidates.  */
2009
  FOR_EACH_BB (bb)
2010
    {
2011
      rtx insn;
2012
      edge e;
2013
      edge_iterator ei;
2014
      int size, all_flags;
2015
 
2016
      /* Build the reorder chain for the original order of blocks.  */
2017
      if (bb->next_bb != EXIT_BLOCK_PTR)
2018
        bb->aux = bb->next_bb;
2019
 
2020
      /* Obviously the block has to end in a computed jump.  */
2021
      if (!computed_jump_p (BB_END (bb)))
2022
        continue;
2023
 
2024
      /* Only consider blocks that can be duplicated.  */
2025
      if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2026
          || !can_duplicate_block_p (bb))
2027
        continue;
2028
 
2029
      /* Make sure that the block is small enough.  */
2030
      size = 0;
2031
      FOR_BB_INSNS (bb, insn)
2032
        if (INSN_P (insn))
2033
          {
2034
            size += get_attr_min_length (insn);
2035
            if (size > max_size)
2036
               break;
2037
          }
2038
      if (size > max_size)
2039
        continue;
2040
 
2041
      /* Final check: there must not be any incoming abnormal edges.  */
2042
      all_flags = 0;
2043
      FOR_EACH_EDGE (e, ei, bb->preds)
2044
        all_flags |= e->flags;
2045
      if (all_flags & EDGE_COMPLEX)
2046
        continue;
2047
 
2048
      bitmap_set_bit (candidates, bb->index);
2049
    }
2050
 
2051
  /* Nothing to do if there is no computed jump here.  */
2052
  if (bitmap_empty_p (candidates))
2053
    goto done;
2054
 
2055
  /* Duplicate computed gotos.  */
2056
  FOR_EACH_BB (bb)
2057
    {
2058
      if (bb->il.rtl->visited)
2059
        continue;
2060
 
2061
      bb->il.rtl->visited = 1;
2062
 
2063
      /* BB must have one outgoing edge.  That edge must not lead to
2064
         the exit block or the next block.
2065
         The destination must have more than one predecessor.  */
2066
      if (!single_succ_p (bb)
2067
          || single_succ (bb) == EXIT_BLOCK_PTR
2068
          || single_succ (bb) == bb->next_bb
2069
          || single_pred_p (single_succ (bb)))
2070
        continue;
2071
 
2072
      /* The successor block has to be a duplication candidate.  */
2073
      if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2074
        continue;
2075
 
2076
      new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2077
      new_bb->aux = bb->aux;
2078
      bb->aux = new_bb;
2079
      new_bb->il.rtl->visited = 1;
2080
    }
2081
 
2082
done:
2083
  cfg_layout_finalize ();
2084
 
2085
  BITMAP_FREE (candidates);
2086
}
2087
 
2088
struct tree_opt_pass pass_duplicate_computed_gotos =
2089
{
2090
  "compgotos",                          /* name */
2091
  gate_duplicate_computed_gotos,        /* gate */
2092
  duplicate_computed_gotos,             /* execute */
2093
  NULL,                                 /* sub */
2094
  NULL,                                 /* next */
2095
  0,                                    /* static_pass_number */
2096
  TV_REORDER_BLOCKS,                    /* tv_id */
2097
  0,                                    /* properties_required */
2098
  0,                                    /* properties_provided */
2099
  0,                                    /* properties_destroyed */
2100
  0,                                    /* todo_flags_start */
2101
  TODO_dump_func,                       /* todo_flags_finish */
2102
 
2103
};
2104
 
2105
 
2106
/* This function is the main 'entrance' for the optimization that
2107
   partitions hot and cold basic blocks into separate sections of the
2108
   .o file (to improve performance and cache locality).  Ideally it
2109
   would be called after all optimizations that rearrange the CFG have
2110
   been called.  However part of this optimization may introduce new
2111
   register usage, so it must be called before register allocation has
2112
   occurred.  This means that this optimization is actually called
2113
   well before the optimization that reorders basic blocks (see
2114
   function above).
2115
 
2116
   This optimization checks the feedback information to determine
2117
   which basic blocks are hot/cold, updates flags on the basic blocks
2118
   to indicate which section they belong in.  This information is
2119
   later used for writing out sections in the .o file.  Because hot
2120
   and cold sections can be arbitrarily large (within the bounds of
2121
   memory), far beyond the size of a single function, it is necessary
2122
   to fix up all edges that cross section boundaries, to make sure the
2123
   instructions used can actually span the required distance.  The
2124
   fixes are described below.
2125
 
2126
   Fall-through edges must be changed into jumps; it is not safe or
2127
   legal to fall through across a section boundary.  Whenever a
2128
   fall-through edge crossing a section boundary is encountered, a new
2129
   basic block is inserted (in the same section as the fall-through
2130
   source), and the fall through edge is redirected to the new basic
2131
   block.  The new basic block contains an unconditional jump to the
2132
   original fall-through target.  (If the unconditional jump is
2133
   insufficient to cross section boundaries, that is dealt with a
2134
   little later, see below).
2135
 
2136
   In order to deal with architectures that have short conditional
2137
   branches (which cannot span all of memory) we take any conditional
2138
   jump that attempts to cross a section boundary and add a level of
2139
   indirection: it becomes a conditional jump to a new basic block, in
2140
   the same section.  The new basic block contains an unconditional
2141
   jump to the original target, in the other section.
2142
 
2143
   For those architectures whose unconditional branch is also
2144
   incapable of reaching all of memory, those unconditional jumps are
2145
   converted into indirect jumps, through a register.
2146
 
2147
   IMPORTANT NOTE: This optimization causes some messy interactions
2148
   with the cfg cleanup optimizations; those optimizations want to
2149
   merge blocks wherever possible, and to collapse indirect jump
2150
   sequences (change "A jumps to B jumps to C" directly into "A jumps
2151
   to C").  Those optimizations can undo the jump fixes that
2152
   partitioning is required to make (see above), in order to ensure
2153
   that jumps attempting to cross section boundaries are really able
2154
   to cover whatever distance the jump requires (on many architectures
2155
   conditional or unconditional jumps are not able to reach all of
2156
   memory).  Therefore tests have to be inserted into each such
2157
   optimization to make sure that it does not undo stuff necessary to
2158
   cross partition boundaries.  This would be much less of a problem
2159
   if we could perform this optimization later in the compilation, but
2160
   unfortunately the fact that we may need to create indirect jumps
2161
   (through registers) requires that this optimization be performed
2162
   before register allocation.  */
2163
 
2164
void
2165
partition_hot_cold_basic_blocks (void)
2166
{
2167
  basic_block cur_bb;
2168
  edge *crossing_edges;
2169
  int n_crossing_edges;
2170
  int max_edges = 2 * last_basic_block;
2171
 
2172
  if (n_basic_blocks <= 1)
2173
    return;
2174
 
2175
  crossing_edges = xcalloc (max_edges, sizeof (edge));
2176
 
2177
  cfg_layout_initialize (0);
2178
 
2179
  FOR_EACH_BB (cur_bb)
2180
    if (cur_bb->index >= 0
2181
        && cur_bb->next_bb->index >= 0)
2182
      cur_bb->aux = cur_bb->next_bb;
2183
 
2184
  find_rarely_executed_basic_blocks_and_crossing_edges (crossing_edges,
2185
                                                        &n_crossing_edges,
2186
                                                        &max_edges);
2187
 
2188
  if (n_crossing_edges > 0)
2189
    fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges);
2190
 
2191
  free (crossing_edges);
2192
 
2193
  cfg_layout_finalize();
2194
}
2195
 
2196
static bool
2197
gate_handle_reorder_blocks (void)
2198
{
2199
  return (optimize > 0);
2200
}
2201
 
2202
 
2203
/* Reorder basic blocks.  */
2204
static void
2205
rest_of_handle_reorder_blocks (void)
2206
{
2207
  bool changed;
2208
  unsigned int liveness_flags;
2209
 
2210
  /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2211
     splitting possibly introduced more crossjumping opportunities.  */
2212
  liveness_flags = (!HAVE_conditional_execution ? CLEANUP_UPDATE_LIFE : 0);
2213
  changed = cleanup_cfg (CLEANUP_EXPENSIVE | liveness_flags);
2214
 
2215
  if (flag_sched2_use_traces && flag_schedule_insns_after_reload)
2216
    {
2217
      timevar_push (TV_TRACER);
2218
      tracer (liveness_flags);
2219
      timevar_pop (TV_TRACER);
2220
    }
2221
 
2222
  if (flag_reorder_blocks || flag_reorder_blocks_and_partition)
2223
    reorder_basic_blocks (liveness_flags);
2224
  if (flag_reorder_blocks || flag_reorder_blocks_and_partition
2225
      || (flag_sched2_use_traces && flag_schedule_insns_after_reload))
2226
    changed |= cleanup_cfg (CLEANUP_EXPENSIVE | liveness_flags);
2227
 
2228
  /* On conditional execution targets we can not update the life cheaply, so
2229
     we deffer the updating to after both cleanups.  This may lose some cases
2230
     but should not be terribly bad.  */
2231
  if (changed && HAVE_conditional_execution)
2232
    update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
2233
                      PROP_DEATH_NOTES);
2234
 
2235
  /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes.  */
2236
  insert_section_boundary_note ();
2237
}
2238
 
2239
struct tree_opt_pass pass_reorder_blocks =
2240
{
2241
  "bbro",                               /* name */
2242
  gate_handle_reorder_blocks,           /* gate */
2243
  rest_of_handle_reorder_blocks,        /* execute */
2244
  NULL,                                 /* sub */
2245
  NULL,                                 /* next */
2246
  0,                                    /* static_pass_number */
2247
  TV_REORDER_BLOCKS,                    /* tv_id */
2248
  0,                                    /* properties_required */
2249
  0,                                    /* properties_provided */
2250
  0,                                    /* properties_destroyed */
2251
  0,                                    /* todo_flags_start */
2252
  TODO_dump_func,                       /* todo_flags_finish */
2253
  'B'                                   /* letter */
2254
};
2255
 
2256
static bool
2257
gate_handle_partition_blocks (void)
2258
{
2259
  /* The optimization to partition hot/cold basic blocks into separate
2260
     sections of the .o file does not work well with linkonce or with
2261
     user defined section attributes.  Don't call it if either case
2262
     arises.  */
2263
 
2264
  return (flag_reorder_blocks_and_partition
2265
          && !DECL_ONE_ONLY (current_function_decl)
2266
          && !user_defined_section_attribute);
2267
}
2268
 
2269
/* Partition hot and cold basic blocks.  */
2270
static void
2271
rest_of_handle_partition_blocks (void)
2272
{
2273
  no_new_pseudos = 0;
2274
  partition_hot_cold_basic_blocks ();
2275
  allocate_reg_life_data ();
2276
  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
2277
                    PROP_LOG_LINKS | PROP_REG_INFO | PROP_DEATH_NOTES);
2278
  no_new_pseudos = 1;
2279
}
2280
 
2281
struct tree_opt_pass pass_partition_blocks =
2282
{
2283
  "bbpart",                             /* name */
2284
  gate_handle_partition_blocks,         /* gate */
2285
  rest_of_handle_partition_blocks,      /* execute */
2286
  NULL,                                 /* sub */
2287
  NULL,                                 /* next */
2288
  0,                                    /* static_pass_number */
2289
  TV_REORDER_BLOCKS,                    /* tv_id */
2290
  0,                                    /* properties_required */
2291
  0,                                    /* properties_provided */
2292
  0,                                    /* properties_destroyed */
2293
  0,                                    /* todo_flags_start */
2294
  TODO_dump_func,                       /* todo_flags_finish */
2295
 
2296
};
2297
 
2298
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.