OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [sched-rgn.c] - Blame information for rev 837

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Instruction scheduling pass.
2
   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3
   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
4
   Free Software Foundation, Inc.
5
   Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
6
   and currently maintained by, Jim Wilson (wilson@cygnus.com)
7
 
8
This file is part of GCC.
9
 
10
GCC is free software; you can redistribute it and/or modify it under
11
the terms of the GNU General Public License as published by the Free
12
Software Foundation; either version 3, or (at your option) any later
13
version.
14
 
15
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16
WARRANTY; without even the implied warranty of MERCHANTABILITY or
17
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18
for more details.
19
 
20
You should have received a copy of the GNU General Public License
21
along with GCC; see the file COPYING3.  If not see
22
<http://www.gnu.org/licenses/>.  */
23
 
24
/* This pass implements list scheduling within basic blocks.  It is
25
   run twice: (1) after flow analysis, but before register allocation,
26
   and (2) after register allocation.
27
 
28
   The first run performs interblock scheduling, moving insns between
29
   different blocks in the same "region", and the second runs only
30
   basic block scheduling.
31
 
32
   Interblock motions performed are useful motions and speculative
33
   motions, including speculative loads.  Motions requiring code
34
   duplication are not supported.  The identification of motion type
35
   and the check for validity of speculative motions requires
36
   construction and analysis of the function's control flow graph.
37
 
38
   The main entry point for this pass is schedule_insns(), called for
39
   each function.  The work of the scheduler is organized in three
40
   levels: (1) function level: insns are subject to splitting,
41
   control-flow-graph is constructed, regions are computed (after
42
   reload, each region is of one block), (2) region level: control
43
   flow graph attributes required for interblock scheduling are
44
   computed (dominators, reachability, etc.), data dependences and
45
   priorities are computed, and (3) block level: insns in the block
46
   are actually scheduled.  */
47
 
48
#include "config.h"
49
#include "system.h"
50
#include "coretypes.h"
51
#include "tm.h"
52
#include "toplev.h"
53
#include "rtl.h"
54
#include "tm_p.h"
55
#include "hard-reg-set.h"
56
#include "regs.h"
57
#include "function.h"
58
#include "flags.h"
59
#include "insn-config.h"
60
#include "insn-attr.h"
61
#include "except.h"
62
#include "toplev.h"
63
#include "recog.h"
64
#include "cfglayout.h"
65
#include "params.h"
66
#include "sched-int.h"
67
#include "sel-sched.h"
68
#include "target.h"
69
#include "timevar.h"
70
#include "tree-pass.h"
71
#include "dbgcnt.h"
72
 
73
#ifdef INSN_SCHEDULING
74
 
75
/* Some accessor macros for h_i_d members only used within this file.  */
76
#define FED_BY_SPEC_LOAD(INSN) (HID (INSN)->fed_by_spec_load)
77
#define IS_LOAD_INSN(INSN) (HID (insn)->is_load_insn)
78
 
79
/* nr_inter/spec counts interblock/speculative motion for the function.  */
80
static int nr_inter, nr_spec;
81
 
82
static int is_cfg_nonregular (void);
83
 
84
/* Number of regions in the procedure.  */
85
int nr_regions = 0;
86
 
87
/* Table of region descriptions.  */
88
region *rgn_table = NULL;
89
 
90
/* Array of lists of regions' blocks.  */
91
int *rgn_bb_table = NULL;
92
 
93
/* Topological order of blocks in the region (if b2 is reachable from
94
   b1, block_to_bb[b2] > block_to_bb[b1]).  Note: A basic block is
95
   always referred to by either block or b, while its topological
96
   order name (in the region) is referred to by bb.  */
97
int *block_to_bb = NULL;
98
 
99
/* The number of the region containing a block.  */
100
int *containing_rgn = NULL;
101
 
102
/* ebb_head [i] - is index in rgn_bb_table of the head basic block of i'th ebb.
103
   Currently we can get a ebb only through splitting of currently
104
   scheduling block, therefore, we don't need ebb_head array for every region,
105
   hence, its sufficient to hold it for current one only.  */
106
int *ebb_head = NULL;
107
 
108
/* The minimum probability of reaching a source block so that it will be
109
   considered for speculative scheduling.  */
110
static int min_spec_prob;
111
 
112
static void find_single_block_region (bool);
113
static void find_rgns (void);
114
static bool too_large (int, int *, int *);
115
 
116
/* Blocks of the current region being scheduled.  */
117
int current_nr_blocks;
118
int current_blocks;
119
 
120
/* A speculative motion requires checking live information on the path
121
   from 'source' to 'target'.  The split blocks are those to be checked.
122
   After a speculative motion, live information should be modified in
123
   the 'update' blocks.
124
 
125
   Lists of split and update blocks for each candidate of the current
126
   target are in array bblst_table.  */
127
static basic_block *bblst_table;
128
static int bblst_size, bblst_last;
129
 
130
/* Target info declarations.
131
 
132
   The block currently being scheduled is referred to as the "target" block,
133
   while other blocks in the region from which insns can be moved to the
134
   target are called "source" blocks.  The candidate structure holds info
135
   about such sources: are they valid?  Speculative?  Etc.  */
136
typedef struct
137
{
138
  basic_block *first_member;
139
  int nr_members;
140
}
141
bblst;
142
 
143
typedef struct
144
{
145
  char is_valid;
146
  char is_speculative;
147
  int src_prob;
148
  bblst split_bbs;
149
  bblst update_bbs;
150
}
151
candidate;
152
 
153
static candidate *candidate_table;
154
#define IS_VALID(src) (candidate_table[src].is_valid)
155
#define IS_SPECULATIVE(src) (candidate_table[src].is_speculative)
156
#define IS_SPECULATIVE_INSN(INSN)                       \
157
  (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
158
#define SRC_PROB(src) ( candidate_table[src].src_prob )
159
 
160
/* The bb being currently scheduled.  */
161
int target_bb;
162
 
163
/* List of edges.  */
164
typedef struct
165
{
166
  edge *first_member;
167
  int nr_members;
168
}
169
edgelst;
170
 
171
static edge *edgelst_table;
172
static int edgelst_last;
173
 
174
static void extract_edgelst (sbitmap, edgelst *);
175
 
176
/* Target info functions.  */
177
static void split_edges (int, int, edgelst *);
178
static void compute_trg_info (int);
179
void debug_candidate (int);
180
void debug_candidates (int);
181
 
182
/* Dominators array: dom[i] contains the sbitmap of dominators of
183
   bb i in the region.  */
184
static sbitmap *dom;
185
 
186
/* bb 0 is the only region entry.  */
187
#define IS_RGN_ENTRY(bb) (!bb)
188
 
189
/* Is bb_src dominated by bb_trg.  */
190
#define IS_DOMINATED(bb_src, bb_trg)                                 \
191
( TEST_BIT (dom[bb_src], bb_trg) )
192
 
193
/* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
194
   the probability of bb i relative to the region entry.  */
195
static int *prob;
196
 
197
/* Bit-set of edges, where bit i stands for edge i.  */
198
typedef sbitmap edgeset;
199
 
200
/* Number of edges in the region.  */
201
static int rgn_nr_edges;
202
 
203
/* Array of size rgn_nr_edges.  */
204
static edge *rgn_edges;
205
 
206
/* Mapping from each edge in the graph to its number in the rgn.  */
207
#define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
208
#define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
209
 
210
/* The split edges of a source bb is different for each target
211
   bb.  In order to compute this efficiently, the 'potential-split edges'
212
   are computed for each bb prior to scheduling a region.  This is actually
213
   the split edges of each bb relative to the region entry.
214
 
215
   pot_split[bb] is the set of potential split edges of bb.  */
216
static edgeset *pot_split;
217
 
218
/* For every bb, a set of its ancestor edges.  */
219
static edgeset *ancestor_edges;
220
 
221
#define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
222
 
223
/* Speculative scheduling functions.  */
224
static int check_live_1 (int, rtx);
225
static void update_live_1 (int, rtx);
226
static int is_pfree (rtx, int, int);
227
static int find_conditional_protection (rtx, int);
228
static int is_conditionally_protected (rtx, int, int);
229
static int is_prisky (rtx, int, int);
230
static int is_exception_free (rtx, int, int);
231
 
232
static bool sets_likely_spilled (rtx);
233
static void sets_likely_spilled_1 (rtx, const_rtx, void *);
234
static void add_branch_dependences (rtx, rtx);
235
static void compute_block_dependences (int);
236
 
237
static void schedule_region (int);
238
static rtx concat_INSN_LIST (rtx, rtx);
239
static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
240
static void propagate_deps (int, struct deps_desc *);
241
static void free_pending_lists (void);
242
 
243
/* Functions for construction of the control flow graph.  */
244
 
245
/* Return 1 if control flow graph should not be constructed, 0 otherwise.
246
 
247
   We decide not to build the control flow graph if there is possibly more
248
   than one entry to the function, if computed branches exist, if we
249
   have nonlocal gotos, or if we have an unreachable loop.  */
250
 
251
static int
252
is_cfg_nonregular (void)
253
{
254
  basic_block b;
255
  rtx insn;
256
 
257
  /* If we have a label that could be the target of a nonlocal goto, then
258
     the cfg is not well structured.  */
259
  if (nonlocal_goto_handler_labels)
260
    return 1;
261
 
262
  /* If we have any forced labels, then the cfg is not well structured.  */
263
  if (forced_labels)
264
    return 1;
265
 
266
  /* If we have exception handlers, then we consider the cfg not well
267
     structured.  ?!?  We should be able to handle this now that we
268
     compute an accurate cfg for EH.  */
269
  if (current_function_has_exception_handlers ())
270
    return 1;
271
 
272
  /* If we have insns which refer to labels as non-jumped-to operands,
273
     then we consider the cfg not well structured.  */
274
  FOR_EACH_BB (b)
275
    FOR_BB_INSNS (b, insn)
276
      {
277
        rtx note, next, set, dest;
278
 
279
        /* If this function has a computed jump, then we consider the cfg
280
           not well structured.  */
281
        if (JUMP_P (insn) && computed_jump_p (insn))
282
          return 1;
283
 
284
        if (!INSN_P (insn))
285
          continue;
286
 
287
        note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
288
        if (note == NULL_RTX)
289
          continue;
290
 
291
        /* For that label not to be seen as a referred-to label, this
292
           must be a single-set which is feeding a jump *only*.  This
293
           could be a conditional jump with the label split off for
294
           machine-specific reasons or a casesi/tablejump.  */
295
        next = next_nonnote_insn (insn);
296
        if (next == NULL_RTX
297
            || !JUMP_P (next)
298
            || (JUMP_LABEL (next) != XEXP (note, 0)
299
                && find_reg_note (next, REG_LABEL_TARGET,
300
                                  XEXP (note, 0)) == NULL_RTX)
301
            || BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next))
302
          return 1;
303
 
304
        set = single_set (insn);
305
        if (set == NULL_RTX)
306
          return 1;
307
 
308
        dest = SET_DEST (set);
309
        if (!REG_P (dest) || !dead_or_set_p (next, dest))
310
          return 1;
311
      }
312
 
313
  /* Unreachable loops with more than one basic block are detected
314
     during the DFS traversal in find_rgns.
315
 
316
     Unreachable loops with a single block are detected here.  This
317
     test is redundant with the one in find_rgns, but it's much
318
     cheaper to go ahead and catch the trivial case here.  */
319
  FOR_EACH_BB (b)
320
    {
321
      if (EDGE_COUNT (b->preds) == 0
322
          || (single_pred_p (b)
323
              && single_pred (b) == b))
324
        return 1;
325
    }
326
 
327
  /* All the tests passed.  Consider the cfg well structured.  */
328
  return 0;
329
}
330
 
331
/* Extract list of edges from a bitmap containing EDGE_TO_BIT bits.  */
332
 
333
static void
334
extract_edgelst (sbitmap set, edgelst *el)
335
{
336
  unsigned int i = 0;
337
  sbitmap_iterator sbi;
338
 
339
  /* edgelst table space is reused in each call to extract_edgelst.  */
340
  edgelst_last = 0;
341
 
342
  el->first_member = &edgelst_table[edgelst_last];
343
  el->nr_members = 0;
344
 
345
  /* Iterate over each word in the bitset.  */
346
  EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi)
347
    {
348
      edgelst_table[edgelst_last++] = rgn_edges[i];
349
      el->nr_members++;
350
    }
351
}
352
 
353
/* Functions for the construction of regions.  */
354
 
355
/* Print the regions, for debugging purposes.  Callable from debugger.  */
356
 
357
void
358
debug_regions (void)
359
{
360
  int rgn, bb;
361
 
362
  fprintf (sched_dump, "\n;;   ------------ REGIONS ----------\n\n");
363
  for (rgn = 0; rgn < nr_regions; rgn++)
364
    {
365
      fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
366
               rgn_table[rgn].rgn_nr_blocks);
367
      fprintf (sched_dump, ";;\tbb/block: ");
368
 
369
      /* We don't have ebb_head initialized yet, so we can't use
370
         BB_TO_BLOCK ().  */
371
      current_blocks = RGN_BLOCKS (rgn);
372
 
373
      for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
374
        fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
375
 
376
      fprintf (sched_dump, "\n\n");
377
    }
378
}
379
 
380
/* Print the region's basic blocks.  */
381
 
382
void
383
debug_region (int rgn)
384
{
385
  int bb;
386
 
387
  fprintf (stderr, "\n;;   ------------ REGION %d ----------\n\n", rgn);
388
  fprintf (stderr, ";;\trgn %d nr_blocks %d:\n", rgn,
389
           rgn_table[rgn].rgn_nr_blocks);
390
  fprintf (stderr, ";;\tbb/block: ");
391
 
392
  /* We don't have ebb_head initialized yet, so we can't use
393
     BB_TO_BLOCK ().  */
394
  current_blocks = RGN_BLOCKS (rgn);
395
 
396
  for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
397
    fprintf (stderr, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
398
 
399
  fprintf (stderr, "\n\n");
400
 
401
  for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
402
    {
403
      debug_bb_n_slim (rgn_bb_table[current_blocks + bb]);
404
      fprintf (stderr, "\n");
405
    }
406
 
407
  fprintf (stderr, "\n");
408
 
409
}
410
 
411
/* True when a bb with index BB_INDEX contained in region RGN.  */
412
static bool
413
bb_in_region_p (int bb_index, int rgn)
414
{
415
  int i;
416
 
417
  for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
418
    if (rgn_bb_table[current_blocks + i] == bb_index)
419
      return true;
420
 
421
  return false;
422
}
423
 
424
/* Dump region RGN to file F using dot syntax.  */
425
void
426
dump_region_dot (FILE *f, int rgn)
427
{
428
  int i;
429
 
430
  fprintf (f, "digraph Region_%d {\n", rgn);
431
 
432
  /* We don't have ebb_head initialized yet, so we can't use
433
     BB_TO_BLOCK ().  */
434
  current_blocks = RGN_BLOCKS (rgn);
435
 
436
  for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
437
    {
438
      edge e;
439
      edge_iterator ei;
440
      int src_bb_num = rgn_bb_table[current_blocks + i];
441
      struct basic_block_def *bb = BASIC_BLOCK (src_bb_num);
442
 
443
      FOR_EACH_EDGE (e, ei, bb->succs)
444
        if (bb_in_region_p (e->dest->index, rgn))
445
          fprintf (f, "\t%d -> %d\n", src_bb_num, e->dest->index);
446
    }
447
  fprintf (f, "}\n");
448
}
449
 
450
/* The same, but first open a file specified by FNAME.  */
451
void
452
dump_region_dot_file (const char *fname, int rgn)
453
{
454
  FILE *f = fopen (fname, "wt");
455
  dump_region_dot (f, rgn);
456
  fclose (f);
457
}
458
 
459
/* Build a single block region for each basic block in the function.
460
   This allows for using the same code for interblock and basic block
461
   scheduling.  */
462
 
463
static void
464
find_single_block_region (bool ebbs_p)
465
{
466
  basic_block bb, ebb_start;
467
  int i = 0;
468
 
469
  nr_regions = 0;
470
 
471
  if (ebbs_p) {
472
    int probability_cutoff;
473
    if (profile_info && flag_branch_probabilities)
474
      probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
475
    else
476
      probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
477
    probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
478
 
479
    FOR_EACH_BB (ebb_start)
480
      {
481
        RGN_NR_BLOCKS (nr_regions) = 0;
482
        RGN_BLOCKS (nr_regions) = i;
483
        RGN_DONT_CALC_DEPS (nr_regions) = 0;
484
        RGN_HAS_REAL_EBB (nr_regions) = 0;
485
 
486
        for (bb = ebb_start; ; bb = bb->next_bb)
487
          {
488
            edge e;
489
            edge_iterator ei;
490
 
491
            rgn_bb_table[i] = bb->index;
492
            RGN_NR_BLOCKS (nr_regions)++;
493
            CONTAINING_RGN (bb->index) = nr_regions;
494
            BLOCK_TO_BB (bb->index) = i - RGN_BLOCKS (nr_regions);
495
            i++;
496
 
497
            if (bb->next_bb == EXIT_BLOCK_PTR
498
                || LABEL_P (BB_HEAD (bb->next_bb)))
499
              break;
500
 
501
            FOR_EACH_EDGE (e, ei, bb->succs)
502
             if ((e->flags & EDGE_FALLTHRU) != 0)
503
               break;
504
            if (! e)
505
              break;
506
            if (e->probability <= probability_cutoff)
507
              break;
508
          }
509
 
510
        ebb_start = bb;
511
        nr_regions++;
512
      }
513
  }
514
  else
515
    FOR_EACH_BB (bb)
516
      {
517
        rgn_bb_table[nr_regions] = bb->index;
518
        RGN_NR_BLOCKS (nr_regions) = 1;
519
        RGN_BLOCKS (nr_regions) = nr_regions;
520
        RGN_DONT_CALC_DEPS (nr_regions) = 0;
521
        RGN_HAS_REAL_EBB (nr_regions) = 0;
522
 
523
        CONTAINING_RGN (bb->index) = nr_regions;
524
        BLOCK_TO_BB (bb->index) = 0;
525
        nr_regions++;
526
      }
527
}
528
 
529
/* Estimate number of the insns in the BB.  */
530
static int
531
rgn_estimate_number_of_insns (basic_block bb)
532
{
533
  int count;
534
 
535
  count = INSN_LUID (BB_END (bb)) - INSN_LUID (BB_HEAD (bb));
536
 
537
  if (MAY_HAVE_DEBUG_INSNS)
538
    {
539
      rtx insn;
540
 
541
      FOR_BB_INSNS (bb, insn)
542
        if (DEBUG_INSN_P (insn))
543
          count--;
544
    }
545
 
546
  return count;
547
}
548
 
549
/* Update number of blocks and the estimate for number of insns
550
   in the region.  Return true if the region is "too large" for interblock
551
   scheduling (compile time considerations).  */
552
 
553
static bool
554
too_large (int block, int *num_bbs, int *num_insns)
555
{
556
  (*num_bbs)++;
557
  (*num_insns) += (common_sched_info->estimate_number_of_insns
558
                   (BASIC_BLOCK (block)));
559
 
560
  return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
561
          || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
562
}
563
 
564
/* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
565
   is still an inner loop.  Put in max_hdr[blk] the header of the most inner
566
   loop containing blk.  */
567
#define UPDATE_LOOP_RELATIONS(blk, hdr)         \
568
{                                               \
569
  if (max_hdr[blk] == -1)                       \
570
    max_hdr[blk] = hdr;                         \
571
  else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr])  \
572
    RESET_BIT (inner, hdr);                     \
573
  else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr])  \
574
    {                                           \
575
      RESET_BIT (inner,max_hdr[blk]);           \
576
      max_hdr[blk] = hdr;                       \
577
    }                                           \
578
}
579
 
580
/* Find regions for interblock scheduling.
581
 
582
   A region for scheduling can be:
583
 
584
     * A loop-free procedure, or
585
 
586
     * A reducible inner loop, or
587
 
588
     * A basic block not contained in any other region.
589
 
590
   ?!? In theory we could build other regions based on extended basic
591
   blocks or reverse extended basic blocks.  Is it worth the trouble?
592
 
593
   Loop blocks that form a region are put into the region's block list
594
   in topological order.
595
 
596
   This procedure stores its results into the following global (ick) variables
597
 
598
     * rgn_nr
599
     * rgn_table
600
     * rgn_bb_table
601
     * block_to_bb
602
     * containing region
603
 
604
   We use dominator relationships to avoid making regions out of non-reducible
605
   loops.
606
 
607
   This procedure needs to be converted to work on pred/succ lists instead
608
   of edge tables.  That would simplify it somewhat.  */
609
 
610
static void
611
haifa_find_rgns (void)
612
{
613
  int *max_hdr, *dfs_nr, *degree;
614
  char no_loops = 1;
615
  int node, child, loop_head, i, head, tail;
616
  int count = 0, sp, idx = 0;
617
  edge_iterator current_edge;
618
  edge_iterator *stack;
619
  int num_bbs, num_insns, unreachable;
620
  int too_large_failure;
621
  basic_block bb;
622
 
623
  /* Note if a block is a natural loop header.  */
624
  sbitmap header;
625
 
626
  /* Note if a block is a natural inner loop header.  */
627
  sbitmap inner;
628
 
629
  /* Note if a block is in the block queue.  */
630
  sbitmap in_queue;
631
 
632
  /* Note if a block is in the block queue.  */
633
  sbitmap in_stack;
634
 
635
  /* Perform a DFS traversal of the cfg.  Identify loop headers, inner loops
636
     and a mapping from block to its loop header (if the block is contained
637
     in a loop, else -1).
638
 
639
     Store results in HEADER, INNER, and MAX_HDR respectively, these will
640
     be used as inputs to the second traversal.
641
 
642
     STACK, SP and DFS_NR are only used during the first traversal.  */
643
 
644
  /* Allocate and initialize variables for the first traversal.  */
645
  max_hdr = XNEWVEC (int, last_basic_block);
646
  dfs_nr = XCNEWVEC (int, last_basic_block);
647
  stack = XNEWVEC (edge_iterator, n_edges);
648
 
649
  inner = sbitmap_alloc (last_basic_block);
650
  sbitmap_ones (inner);
651
 
652
  header = sbitmap_alloc (last_basic_block);
653
  sbitmap_zero (header);
654
 
655
  in_queue = sbitmap_alloc (last_basic_block);
656
  sbitmap_zero (in_queue);
657
 
658
  in_stack = sbitmap_alloc (last_basic_block);
659
  sbitmap_zero (in_stack);
660
 
661
  for (i = 0; i < last_basic_block; i++)
662
    max_hdr[i] = -1;
663
 
664
  #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
665
  #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
666
 
667
  /* DFS traversal to find inner loops in the cfg.  */
668
 
669
  current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs);
670
  sp = -1;
671
 
672
  while (1)
673
    {
674
      if (EDGE_PASSED (current_edge))
675
        {
676
          /* We have reached a leaf node or a node that was already
677
             processed.  Pop edges off the stack until we find
678
             an edge that has not yet been processed.  */
679
          while (sp >= 0 && EDGE_PASSED (current_edge))
680
            {
681
              /* Pop entry off the stack.  */
682
              current_edge = stack[sp--];
683
              node = ei_edge (current_edge)->src->index;
684
              gcc_assert (node != ENTRY_BLOCK);
685
              child = ei_edge (current_edge)->dest->index;
686
              gcc_assert (child != EXIT_BLOCK);
687
              RESET_BIT (in_stack, child);
688
              if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
689
                UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
690
              ei_next (&current_edge);
691
            }
692
 
693
          /* See if have finished the DFS tree traversal.  */
694
          if (sp < 0 && EDGE_PASSED (current_edge))
695
            break;
696
 
697
          /* Nope, continue the traversal with the popped node.  */
698
          continue;
699
        }
700
 
701
      /* Process a node.  */
702
      node = ei_edge (current_edge)->src->index;
703
      gcc_assert (node != ENTRY_BLOCK);
704
      SET_BIT (in_stack, node);
705
      dfs_nr[node] = ++count;
706
 
707
      /* We don't traverse to the exit block.  */
708
      child = ei_edge (current_edge)->dest->index;
709
      if (child == EXIT_BLOCK)
710
        {
711
          SET_EDGE_PASSED (current_edge);
712
          ei_next (&current_edge);
713
          continue;
714
        }
715
 
716
      /* If the successor is in the stack, then we've found a loop.
717
         Mark the loop, if it is not a natural loop, then it will
718
         be rejected during the second traversal.  */
719
      if (TEST_BIT (in_stack, child))
720
        {
721
          no_loops = 0;
722
          SET_BIT (header, child);
723
          UPDATE_LOOP_RELATIONS (node, child);
724
          SET_EDGE_PASSED (current_edge);
725
          ei_next (&current_edge);
726
          continue;
727
        }
728
 
729
      /* If the child was already visited, then there is no need to visit
730
         it again.  Just update the loop relationships and restart
731
         with a new edge.  */
732
      if (dfs_nr[child])
733
        {
734
          if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
735
            UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
736
          SET_EDGE_PASSED (current_edge);
737
          ei_next (&current_edge);
738
          continue;
739
        }
740
 
741
      /* Push an entry on the stack and continue DFS traversal.  */
742
      stack[++sp] = current_edge;
743
      SET_EDGE_PASSED (current_edge);
744
      current_edge = ei_start (ei_edge (current_edge)->dest->succs);
745
    }
746
 
747
  /* Reset ->aux field used by EDGE_PASSED.  */
748
  FOR_ALL_BB (bb)
749
    {
750
      edge_iterator ei;
751
      edge e;
752
      FOR_EACH_EDGE (e, ei, bb->succs)
753
        e->aux = NULL;
754
    }
755
 
756
 
757
  /* Another check for unreachable blocks.  The earlier test in
758
     is_cfg_nonregular only finds unreachable blocks that do not
759
     form a loop.
760
 
761
     The DFS traversal will mark every block that is reachable from
762
     the entry node by placing a nonzero value in dfs_nr.  Thus if
763
     dfs_nr is zero for any block, then it must be unreachable.  */
764
  unreachable = 0;
765
  FOR_EACH_BB (bb)
766
    if (dfs_nr[bb->index] == 0)
767
      {
768
        unreachable = 1;
769
        break;
770
      }
771
 
772
  /* Gross.  To avoid wasting memory, the second pass uses the dfs_nr array
773
     to hold degree counts.  */
774
  degree = dfs_nr;
775
 
776
  FOR_EACH_BB (bb)
777
    degree[bb->index] = EDGE_COUNT (bb->preds);
778
 
779
  /* Do not perform region scheduling if there are any unreachable
780
     blocks.  */
781
  if (!unreachable)
782
    {
783
      int *queue, *degree1 = NULL;
784
      /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put
785
         there basic blocks, which are forced to be region heads.
786
         This is done to try to assemble few smaller regions
787
         from a too_large region.  */
788
      sbitmap extended_rgn_header = NULL;
789
      bool extend_regions_p;
790
 
791
      if (no_loops)
792
        SET_BIT (header, 0);
793
 
794
      /* Second traversal:find reducible inner loops and topologically sort
795
         block of each region.  */
796
 
797
      queue = XNEWVEC (int, n_basic_blocks);
798
 
799
      extend_regions_p = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS) > 0;
800
      if (extend_regions_p)
801
        {
802
          degree1 = XNEWVEC (int, last_basic_block);
803
          extended_rgn_header = sbitmap_alloc (last_basic_block);
804
          sbitmap_zero (extended_rgn_header);
805
        }
806
 
807
      /* Find blocks which are inner loop headers.  We still have non-reducible
808
         loops to consider at this point.  */
809
      FOR_EACH_BB (bb)
810
        {
811
          if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
812
            {
813
              edge e;
814
              edge_iterator ei;
815
              basic_block jbb;
816
 
817
              /* Now check that the loop is reducible.  We do this separate
818
                 from finding inner loops so that we do not find a reducible
819
                 loop which contains an inner non-reducible loop.
820
 
821
                 A simple way to find reducible/natural loops is to verify
822
                 that each block in the loop is dominated by the loop
823
                 header.
824
 
825
                 If there exists a block that is not dominated by the loop
826
                 header, then the block is reachable from outside the loop
827
                 and thus the loop is not a natural loop.  */
828
              FOR_EACH_BB (jbb)
829
                {
830
                  /* First identify blocks in the loop, except for the loop
831
                     entry block.  */
832
                  if (bb->index == max_hdr[jbb->index] && bb != jbb)
833
                    {
834
                      /* Now verify that the block is dominated by the loop
835
                         header.  */
836
                      if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
837
                        break;
838
                    }
839
                }
840
 
841
              /* If we exited the loop early, then I is the header of
842
                 a non-reducible loop and we should quit processing it
843
                 now.  */
844
              if (jbb != EXIT_BLOCK_PTR)
845
                continue;
846
 
847
              /* I is a header of an inner loop, or block 0 in a subroutine
848
                 with no loops at all.  */
849
              head = tail = -1;
850
              too_large_failure = 0;
851
              loop_head = max_hdr[bb->index];
852
 
853
              if (extend_regions_p)
854
                /* We save degree in case when we meet a too_large region
855
                   and cancel it.  We need a correct degree later when
856
                   calling extend_rgns.  */
857
                memcpy (degree1, degree, last_basic_block * sizeof (int));
858
 
859
              /* Decrease degree of all I's successors for topological
860
                 ordering.  */
861
              FOR_EACH_EDGE (e, ei, bb->succs)
862
                if (e->dest != EXIT_BLOCK_PTR)
863
                  --degree[e->dest->index];
864
 
865
              /* Estimate # insns, and count # blocks in the region.  */
866
              num_bbs = 1;
867
              num_insns = common_sched_info->estimate_number_of_insns (bb);
868
 
869
              /* Find all loop latches (blocks with back edges to the loop
870
                 header) or all the leaf blocks in the cfg has no loops.
871
 
872
                 Place those blocks into the queue.  */
873
              if (no_loops)
874
                {
875
                  FOR_EACH_BB (jbb)
876
                    /* Leaf nodes have only a single successor which must
877
                       be EXIT_BLOCK.  */
878
                    if (single_succ_p (jbb)
879
                        && single_succ (jbb) == EXIT_BLOCK_PTR)
880
                      {
881
                        queue[++tail] = jbb->index;
882
                        SET_BIT (in_queue, jbb->index);
883
 
884
                        if (too_large (jbb->index, &num_bbs, &num_insns))
885
                          {
886
                            too_large_failure = 1;
887
                            break;
888
                          }
889
                      }
890
                }
891
              else
892
                {
893
                  edge e;
894
 
895
                  FOR_EACH_EDGE (e, ei, bb->preds)
896
                    {
897
                      if (e->src == ENTRY_BLOCK_PTR)
898
                        continue;
899
 
900
                      node = e->src->index;
901
 
902
                      if (max_hdr[node] == loop_head && node != bb->index)
903
                        {
904
                          /* This is a loop latch.  */
905
                          queue[++tail] = node;
906
                          SET_BIT (in_queue, node);
907
 
908
                          if (too_large (node, &num_bbs, &num_insns))
909
                            {
910
                              too_large_failure = 1;
911
                              break;
912
                            }
913
                        }
914
                    }
915
                }
916
 
917
              /* Now add all the blocks in the loop to the queue.
918
 
919
             We know the loop is a natural loop; however the algorithm
920
             above will not always mark certain blocks as being in the
921
             loop.  Consider:
922
                node   children
923
                 a        b,c
924
                 b        c
925
                 c        a,d
926
                 d        b
927
 
928
             The algorithm in the DFS traversal may not mark B & D as part
929
             of the loop (i.e. they will not have max_hdr set to A).
930
 
931
             We know they can not be loop latches (else they would have
932
             had max_hdr set since they'd have a backedge to a dominator
933
             block).  So we don't need them on the initial queue.
934
 
935
             We know they are part of the loop because they are dominated
936
             by the loop header and can be reached by a backwards walk of
937
             the edges starting with nodes on the initial queue.
938
 
939
             It is safe and desirable to include those nodes in the
940
             loop/scheduling region.  To do so we would need to decrease
941
             the degree of a node if it is the target of a backedge
942
             within the loop itself as the node is placed in the queue.
943
 
944
             We do not do this because I'm not sure that the actual
945
             scheduling code will properly handle this case. ?!? */
946
 
947
              while (head < tail && !too_large_failure)
948
                {
949
                  edge e;
950
                  child = queue[++head];
951
 
952
                  FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
953
                    {
954
                      node = e->src->index;
955
 
956
                      /* See discussion above about nodes not marked as in
957
                         this loop during the initial DFS traversal.  */
958
                      if (e->src == ENTRY_BLOCK_PTR
959
                          || max_hdr[node] != loop_head)
960
                        {
961
                          tail = -1;
962
                          break;
963
                        }
964
                      else if (!TEST_BIT (in_queue, node) && node != bb->index)
965
                        {
966
                          queue[++tail] = node;
967
                          SET_BIT (in_queue, node);
968
 
969
                          if (too_large (node, &num_bbs, &num_insns))
970
                            {
971
                              too_large_failure = 1;
972
                              break;
973
                            }
974
                        }
975
                    }
976
                }
977
 
978
              if (tail >= 0 && !too_large_failure)
979
                {
980
                  /* Place the loop header into list of region blocks.  */
981
                  degree[bb->index] = -1;
982
                  rgn_bb_table[idx] = bb->index;
983
                  RGN_NR_BLOCKS (nr_regions) = num_bbs;
984
                  RGN_BLOCKS (nr_regions) = idx++;
985
                  RGN_DONT_CALC_DEPS (nr_regions) = 0;
986
                  RGN_HAS_REAL_EBB (nr_regions) = 0;
987
                  CONTAINING_RGN (bb->index) = nr_regions;
988
                  BLOCK_TO_BB (bb->index) = count = 0;
989
 
990
                  /* Remove blocks from queue[] when their in degree
991
                     becomes zero.  Repeat until no blocks are left on the
992
                     list.  This produces a topological list of blocks in
993
                     the region.  */
994
                  while (tail >= 0)
995
                    {
996
                      if (head < 0)
997
                        head = tail;
998
                      child = queue[head];
999
                      if (degree[child] == 0)
1000
                        {
1001
                          edge e;
1002
 
1003
                          degree[child] = -1;
1004
                          rgn_bb_table[idx++] = child;
1005
                          BLOCK_TO_BB (child) = ++count;
1006
                          CONTAINING_RGN (child) = nr_regions;
1007
                          queue[head] = queue[tail--];
1008
 
1009
                          FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
1010
                            if (e->dest != EXIT_BLOCK_PTR)
1011
                              --degree[e->dest->index];
1012
                        }
1013
                      else
1014
                        --head;
1015
                    }
1016
                  ++nr_regions;
1017
                }
1018
              else if (extend_regions_p)
1019
                {
1020
                  /* Restore DEGREE.  */
1021
                  int *t = degree;
1022
 
1023
                  degree = degree1;
1024
                  degree1 = t;
1025
 
1026
                  /* And force successors of BB to be region heads.
1027
                     This may provide several smaller regions instead
1028
                     of one too_large region.  */
1029
                  FOR_EACH_EDGE (e, ei, bb->succs)
1030
                    if (e->dest != EXIT_BLOCK_PTR)
1031
                      SET_BIT (extended_rgn_header, e->dest->index);
1032
                }
1033
            }
1034
        }
1035
      free (queue);
1036
 
1037
      if (extend_regions_p)
1038
        {
1039
          free (degree1);
1040
 
1041
          sbitmap_a_or_b (header, header, extended_rgn_header);
1042
          sbitmap_free (extended_rgn_header);
1043
 
1044
          extend_rgns (degree, &idx, header, max_hdr);
1045
        }
1046
    }
1047
 
1048
  /* Any block that did not end up in a region is placed into a region
1049
     by itself.  */
1050
  FOR_EACH_BB (bb)
1051
    if (degree[bb->index] >= 0)
1052
      {
1053
        rgn_bb_table[idx] = bb->index;
1054
        RGN_NR_BLOCKS (nr_regions) = 1;
1055
        RGN_BLOCKS (nr_regions) = idx++;
1056
        RGN_DONT_CALC_DEPS (nr_regions) = 0;
1057
        RGN_HAS_REAL_EBB (nr_regions) = 0;
1058
        CONTAINING_RGN (bb->index) = nr_regions++;
1059
        BLOCK_TO_BB (bb->index) = 0;
1060
      }
1061
 
1062
  free (max_hdr);
1063
  free (degree);
1064
  free (stack);
1065
  sbitmap_free (header);
1066
  sbitmap_free (inner);
1067
  sbitmap_free (in_queue);
1068
  sbitmap_free (in_stack);
1069
}
1070
 
1071
 
1072
/* Wrapper function.
1073
   If FLAG_SEL_SCHED_PIPELINING is set, then use custom function to form
1074
   regions.  Otherwise just call find_rgns_haifa.  */
1075
static void
1076
find_rgns (void)
1077
{
1078
  if (sel_sched_p () && flag_sel_sched_pipelining)
1079
    sel_find_rgns ();
1080
  else
1081
    haifa_find_rgns ();
1082
}
1083
 
1084
static int gather_region_statistics (int **);
1085
static void print_region_statistics (int *, int, int *, int);
1086
 
1087
/* Calculate the histogram that shows the number of regions having the
1088
   given number of basic blocks, and store it in the RSP array.  Return
1089
   the size of this array.  */
1090
static int
1091
gather_region_statistics (int **rsp)
1092
{
1093
  int i, *a = 0, a_sz = 0;
1094
 
1095
  /* a[i] is the number of regions that have (i + 1) basic blocks.  */
1096
  for (i = 0; i < nr_regions; i++)
1097
    {
1098
      int nr_blocks = RGN_NR_BLOCKS (i);
1099
 
1100
      gcc_assert (nr_blocks >= 1);
1101
 
1102
      if (nr_blocks > a_sz)
1103
        {
1104
          a = XRESIZEVEC (int, a, nr_blocks);
1105
          do
1106
            a[a_sz++] = 0;
1107
          while (a_sz != nr_blocks);
1108
        }
1109
 
1110
      a[nr_blocks - 1]++;
1111
    }
1112
 
1113
  *rsp = a;
1114
  return a_sz;
1115
}
1116
 
1117
/* Print regions statistics.  S1 and S2 denote the data before and after
1118
   calling extend_rgns, respectively.  */
1119
static void
1120
print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz)
1121
{
1122
  int i;
1123
 
1124
  /* We iterate until s2_sz because extend_rgns does not decrease
1125
     the maximal region size.  */
1126
  for (i = 1; i < s2_sz; i++)
1127
    {
1128
      int n1, n2;
1129
 
1130
      n2 = s2[i];
1131
 
1132
      if (n2 == 0)
1133
        continue;
1134
 
1135
      if (i >= s1_sz)
1136
        n1 = 0;
1137
      else
1138
        n1 = s1[i];
1139
 
1140
      fprintf (sched_dump, ";; Region extension statistics: size %d: " \
1141
               "was %d + %d more\n", i + 1, n1, n2 - n1);
1142
    }
1143
}
1144
 
1145
/* Extend regions.
1146
   DEGREE - Array of incoming edge count, considering only
1147
   the edges, that don't have their sources in formed regions yet.
1148
   IDXP - pointer to the next available index in rgn_bb_table.
1149
   HEADER - set of all region heads.
1150
   LOOP_HDR - mapping from block to the containing loop
1151
   (two blocks can reside within one region if they have
1152
   the same loop header).  */
1153
void
1154
extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr)
1155
{
1156
  int *order, i, rescan = 0, idx = *idxp, iter = 0, max_iter, *max_hdr;
1157
  int nblocks = n_basic_blocks - NUM_FIXED_BLOCKS;
1158
 
1159
  max_iter = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS);
1160
 
1161
  max_hdr = XNEWVEC (int, last_basic_block);
1162
 
1163
  order = XNEWVEC (int, last_basic_block);
1164
  post_order_compute (order, false, false);
1165
 
1166
  for (i = nblocks - 1; i >= 0; i--)
1167
    {
1168
      int bbn = order[i];
1169
      if (degree[bbn] >= 0)
1170
        {
1171
          max_hdr[bbn] = bbn;
1172
          rescan = 1;
1173
        }
1174
      else
1175
        /* This block already was processed in find_rgns.  */
1176
        max_hdr[bbn] = -1;
1177
    }
1178
 
1179
  /* The idea is to topologically walk through CFG in top-down order.
1180
     During the traversal, if all the predecessors of a node are
1181
     marked to be in the same region (they all have the same max_hdr),
1182
     then current node is also marked to be a part of that region.
1183
     Otherwise the node starts its own region.
1184
     CFG should be traversed until no further changes are made.  On each
1185
     iteration the set of the region heads is extended (the set of those
1186
     blocks that have max_hdr[bbi] == bbi).  This set is upper bounded by the
1187
     set of all basic blocks, thus the algorithm is guaranteed to
1188
     terminate.  */
1189
 
1190
  while (rescan && iter < max_iter)
1191
    {
1192
      rescan = 0;
1193
 
1194
      for (i = nblocks - 1; i >= 0; i--)
1195
        {
1196
          edge e;
1197
          edge_iterator ei;
1198
          int bbn = order[i];
1199
 
1200
          if (max_hdr[bbn] != -1 && !TEST_BIT (header, bbn))
1201
            {
1202
              int hdr = -1;
1203
 
1204
              FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->preds)
1205
                {
1206
                  int predn = e->src->index;
1207
 
1208
                  if (predn != ENTRY_BLOCK
1209
                      /* If pred wasn't processed in find_rgns.  */
1210
                      && max_hdr[predn] != -1
1211
                      /* And pred and bb reside in the same loop.
1212
                         (Or out of any loop).  */
1213
                      && loop_hdr[bbn] == loop_hdr[predn])
1214
                    {
1215
                      if (hdr == -1)
1216
                        /* Then bb extends the containing region of pred.  */
1217
                        hdr = max_hdr[predn];
1218
                      else if (hdr != max_hdr[predn])
1219
                        /* Too bad, there are at least two predecessors
1220
                           that reside in different regions.  Thus, BB should
1221
                           begin its own region.  */
1222
                        {
1223
                          hdr = bbn;
1224
                          break;
1225
                        }
1226
                    }
1227
                  else
1228
                    /* BB starts its own region.  */
1229
                    {
1230
                      hdr = bbn;
1231
                      break;
1232
                    }
1233
                }
1234
 
1235
              if (hdr == bbn)
1236
                {
1237
                  /* If BB start its own region,
1238
                     update set of headers with BB.  */
1239
                  SET_BIT (header, bbn);
1240
                  rescan = 1;
1241
                }
1242
              else
1243
                gcc_assert (hdr != -1);
1244
 
1245
              max_hdr[bbn] = hdr;
1246
            }
1247
        }
1248
 
1249
      iter++;
1250
    }
1251
 
1252
  /* Statistics were gathered on the SPEC2000 package of tests with
1253
     mainline weekly snapshot gcc-4.1-20051015 on ia64.
1254
 
1255
     Statistics for SPECint:
1256
     1 iteration : 1751 cases (38.7%)
1257
     2 iterations: 2770 cases (61.3%)
1258
     Blocks wrapped in regions by find_rgns without extension: 18295 blocks
1259
     Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks
1260
     (We don't count single block regions here).
1261
 
1262
     Statistics for SPECfp:
1263
     1 iteration : 621 cases (35.9%)
1264
     2 iterations: 1110 cases (64.1%)
1265
     Blocks wrapped in regions by find_rgns without extension: 6476 blocks
1266
     Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks
1267
     (We don't count single block regions here).
1268
 
1269
     By default we do at most 2 iterations.
1270
     This can be overridden with max-sched-extend-regions-iters parameter:
1271
 
1272
     N > 0 - do at most N iterations.  */
1273
 
1274
  if (sched_verbose && iter != 0)
1275
    fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter,
1276
             rescan ? "... failed" : "");
1277
 
1278
  if (!rescan && iter != 0)
1279
    {
1280
      int *s1 = NULL, s1_sz = 0;
1281
 
1282
      /* Save the old statistics for later printout.  */
1283
      if (sched_verbose >= 6)
1284
        s1_sz = gather_region_statistics (&s1);
1285
 
1286
      /* We have succeeded.  Now assemble the regions.  */
1287
      for (i = nblocks - 1; i >= 0; i--)
1288
        {
1289
          int bbn = order[i];
1290
 
1291
          if (max_hdr[bbn] == bbn)
1292
            /* BBN is a region head.  */
1293
            {
1294
              edge e;
1295
              edge_iterator ei;
1296
              int num_bbs = 0, j, num_insns = 0, large;
1297
 
1298
              large = too_large (bbn, &num_bbs, &num_insns);
1299
 
1300
              degree[bbn] = -1;
1301
              rgn_bb_table[idx] = bbn;
1302
              RGN_BLOCKS (nr_regions) = idx++;
1303
              RGN_DONT_CALC_DEPS (nr_regions) = 0;
1304
              RGN_HAS_REAL_EBB (nr_regions) = 0;
1305
              CONTAINING_RGN (bbn) = nr_regions;
1306
              BLOCK_TO_BB (bbn) = 0;
1307
 
1308
              FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->succs)
1309
                if (e->dest != EXIT_BLOCK_PTR)
1310
                  degree[e->dest->index]--;
1311
 
1312
              if (!large)
1313
                /* Here we check whether the region is too_large.  */
1314
                for (j = i - 1; j >= 0; j--)
1315
                  {
1316
                    int succn = order[j];
1317
                    if (max_hdr[succn] == bbn)
1318
                      {
1319
                        if ((large = too_large (succn, &num_bbs, &num_insns)))
1320
                          break;
1321
                      }
1322
                  }
1323
 
1324
              if (large)
1325
                /* If the region is too_large, then wrap every block of
1326
                   the region into single block region.
1327
                   Here we wrap region head only.  Other blocks are
1328
                   processed in the below cycle.  */
1329
                {
1330
                  RGN_NR_BLOCKS (nr_regions) = 1;
1331
                  nr_regions++;
1332
                }
1333
 
1334
              num_bbs = 1;
1335
 
1336
              for (j = i - 1; j >= 0; j--)
1337
                {
1338
                  int succn = order[j];
1339
 
1340
                  if (max_hdr[succn] == bbn)
1341
                    /* This cycle iterates over all basic blocks, that
1342
                       are supposed to be in the region with head BBN,
1343
                       and wraps them into that region (or in single
1344
                       block region).  */
1345
                    {
1346
                      gcc_assert (degree[succn] == 0);
1347
 
1348
                      degree[succn] = -1;
1349
                      rgn_bb_table[idx] = succn;
1350
                      BLOCK_TO_BB (succn) = large ? 0 : num_bbs++;
1351
                      CONTAINING_RGN (succn) = nr_regions;
1352
 
1353
                      if (large)
1354
                        /* Wrap SUCCN into single block region.  */
1355
                        {
1356
                          RGN_BLOCKS (nr_regions) = idx;
1357
                          RGN_NR_BLOCKS (nr_regions) = 1;
1358
                          RGN_DONT_CALC_DEPS (nr_regions) = 0;
1359
                          RGN_HAS_REAL_EBB (nr_regions) = 0;
1360
                          nr_regions++;
1361
                        }
1362
 
1363
                      idx++;
1364
 
1365
                      FOR_EACH_EDGE (e, ei, BASIC_BLOCK (succn)->succs)
1366
                        if (e->dest != EXIT_BLOCK_PTR)
1367
                          degree[e->dest->index]--;
1368
                    }
1369
                }
1370
 
1371
              if (!large)
1372
                {
1373
                  RGN_NR_BLOCKS (nr_regions) = num_bbs;
1374
                  nr_regions++;
1375
                }
1376
            }
1377
        }
1378
 
1379
      if (sched_verbose >= 6)
1380
        {
1381
          int *s2, s2_sz;
1382
 
1383
          /* Get the new statistics and print the comparison with the
1384
             one before calling this function.  */
1385
          s2_sz = gather_region_statistics (&s2);
1386
          print_region_statistics (s1, s1_sz, s2, s2_sz);
1387
          free (s1);
1388
          free (s2);
1389
        }
1390
    }
1391
 
1392
  free (order);
1393
  free (max_hdr);
1394
 
1395
  *idxp = idx;
1396
}
1397
 
1398
/* Functions for regions scheduling information.  */
1399
 
1400
/* Compute dominators, probability, and potential-split-edges of bb.
1401
   Assume that these values were already computed for bb's predecessors.  */
1402
 
1403
static void
1404
compute_dom_prob_ps (int bb)
1405
{
1406
  edge_iterator in_ei;
1407
  edge in_edge;
1408
 
1409
  /* We shouldn't have any real ebbs yet.  */
1410
  gcc_assert (ebb_head [bb] == bb + current_blocks);
1411
 
1412
  if (IS_RGN_ENTRY (bb))
1413
    {
1414
      SET_BIT (dom[bb], 0);
1415
      prob[bb] = REG_BR_PROB_BASE;
1416
      return;
1417
    }
1418
 
1419
  prob[bb] = 0;
1420
 
1421
  /* Initialize dom[bb] to '111..1'.  */
1422
  sbitmap_ones (dom[bb]);
1423
 
1424
  FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
1425
    {
1426
      int pred_bb;
1427
      edge out_edge;
1428
      edge_iterator out_ei;
1429
 
1430
      if (in_edge->src == ENTRY_BLOCK_PTR)
1431
        continue;
1432
 
1433
      pred_bb = BLOCK_TO_BB (in_edge->src->index);
1434
      sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]);
1435
      sbitmap_a_or_b (ancestor_edges[bb],
1436
                      ancestor_edges[bb], ancestor_edges[pred_bb]);
1437
 
1438
      SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
1439
 
1440
      sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
1441
 
1442
      FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
1443
        SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
1444
 
1445
      prob[bb] += ((prob[pred_bb] * in_edge->probability) / REG_BR_PROB_BASE);
1446
    }
1447
 
1448
  SET_BIT (dom[bb], bb);
1449
  sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1450
 
1451
  if (sched_verbose >= 2)
1452
    fprintf (sched_dump, ";;  bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1453
             (100 * prob[bb]) / REG_BR_PROB_BASE);
1454
}
1455
 
1456
/* Functions for target info.  */
1457
 
1458
/* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1459
   Note that bb_trg dominates bb_src.  */
1460
 
1461
static void
1462
split_edges (int bb_src, int bb_trg, edgelst *bl)
1463
{
1464
  sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
1465
  sbitmap_copy (src, pot_split[bb_src]);
1466
 
1467
  sbitmap_difference (src, src, pot_split[bb_trg]);
1468
  extract_edgelst (src, bl);
1469
  sbitmap_free (src);
1470
}
1471
 
1472
/* Find the valid candidate-source-blocks for the target block TRG, compute
1473
   their probability, and check if they are speculative or not.
1474
   For speculative sources, compute their update-blocks and split-blocks.  */
1475
 
1476
static void
1477
compute_trg_info (int trg)
1478
{
1479
  candidate *sp;
1480
  edgelst el = { NULL, 0 };
1481
  int i, j, k, update_idx;
1482
  basic_block block;
1483
  sbitmap visited;
1484
  edge_iterator ei;
1485
  edge e;
1486
 
1487
  candidate_table = XNEWVEC (candidate, current_nr_blocks);
1488
 
1489
  bblst_last = 0;
1490
  /* bblst_table holds split blocks and update blocks for each block after
1491
     the current one in the region.  split blocks and update blocks are
1492
     the TO blocks of region edges, so there can be at most rgn_nr_edges
1493
     of them.  */
1494
  bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1495
  bblst_table = XNEWVEC (basic_block, bblst_size);
1496
 
1497
  edgelst_last = 0;
1498
  edgelst_table = XNEWVEC (edge, rgn_nr_edges);
1499
 
1500
  /* Define some of the fields for the target bb as well.  */
1501
  sp = candidate_table + trg;
1502
  sp->is_valid = 1;
1503
  sp->is_speculative = 0;
1504
  sp->src_prob = REG_BR_PROB_BASE;
1505
 
1506
  visited = sbitmap_alloc (last_basic_block);
1507
 
1508
  for (i = trg + 1; i < current_nr_blocks; i++)
1509
    {
1510
      sp = candidate_table + i;
1511
 
1512
      sp->is_valid = IS_DOMINATED (i, trg);
1513
      if (sp->is_valid)
1514
        {
1515
          int tf = prob[trg], cf = prob[i];
1516
 
1517
          /* In CFGs with low probability edges TF can possibly be zero.  */
1518
          sp->src_prob = (tf ? ((cf * REG_BR_PROB_BASE) / tf) : 0);
1519
          sp->is_valid = (sp->src_prob >= min_spec_prob);
1520
        }
1521
 
1522
      if (sp->is_valid)
1523
        {
1524
          split_edges (i, trg, &el);
1525
          sp->is_speculative = (el.nr_members) ? 1 : 0;
1526
          if (sp->is_speculative && !flag_schedule_speculative)
1527
            sp->is_valid = 0;
1528
        }
1529
 
1530
      if (sp->is_valid)
1531
        {
1532
          /* Compute split blocks and store them in bblst_table.
1533
             The TO block of every split edge is a split block.  */
1534
          sp->split_bbs.first_member = &bblst_table[bblst_last];
1535
          sp->split_bbs.nr_members = el.nr_members;
1536
          for (j = 0; j < el.nr_members; bblst_last++, j++)
1537
            bblst_table[bblst_last] = el.first_member[j]->dest;
1538
          sp->update_bbs.first_member = &bblst_table[bblst_last];
1539
 
1540
          /* Compute update blocks and store them in bblst_table.
1541
             For every split edge, look at the FROM block, and check
1542
             all out edges.  For each out edge that is not a split edge,
1543
             add the TO block to the update block list.  This list can end
1544
             up with a lot of duplicates.  We need to weed them out to avoid
1545
             overrunning the end of the bblst_table.  */
1546
 
1547
          update_idx = 0;
1548
          sbitmap_zero (visited);
1549
          for (j = 0; j < el.nr_members; j++)
1550
            {
1551
              block = el.first_member[j]->src;
1552
              FOR_EACH_EDGE (e, ei, block->succs)
1553
                {
1554
                  if (!TEST_BIT (visited, e->dest->index))
1555
                    {
1556
                      for (k = 0; k < el.nr_members; k++)
1557
                        if (e == el.first_member[k])
1558
                          break;
1559
 
1560
                      if (k >= el.nr_members)
1561
                        {
1562
                          bblst_table[bblst_last++] = e->dest;
1563
                          SET_BIT (visited, e->dest->index);
1564
                          update_idx++;
1565
                        }
1566
                    }
1567
                }
1568
            }
1569
          sp->update_bbs.nr_members = update_idx;
1570
 
1571
          /* Make sure we didn't overrun the end of bblst_table.  */
1572
          gcc_assert (bblst_last <= bblst_size);
1573
        }
1574
      else
1575
        {
1576
          sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1577
 
1578
          sp->is_speculative = 0;
1579
          sp->src_prob = 0;
1580
        }
1581
    }
1582
 
1583
  sbitmap_free (visited);
1584
}
1585
 
1586
/* Free the computed target info.  */
1587
static void
1588
free_trg_info (void)
1589
{
1590
  free (candidate_table);
1591
  free (bblst_table);
1592
  free (edgelst_table);
1593
}
1594
 
1595
/* Print candidates info, for debugging purposes.  Callable from debugger.  */
1596
 
1597
void
1598
debug_candidate (int i)
1599
{
1600
  if (!candidate_table[i].is_valid)
1601
    return;
1602
 
1603
  if (candidate_table[i].is_speculative)
1604
    {
1605
      int j;
1606
      fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1607
 
1608
      fprintf (sched_dump, "split path: ");
1609
      for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1610
        {
1611
          int b = candidate_table[i].split_bbs.first_member[j]->index;
1612
 
1613
          fprintf (sched_dump, " %d ", b);
1614
        }
1615
      fprintf (sched_dump, "\n");
1616
 
1617
      fprintf (sched_dump, "update path: ");
1618
      for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1619
        {
1620
          int b = candidate_table[i].update_bbs.first_member[j]->index;
1621
 
1622
          fprintf (sched_dump, " %d ", b);
1623
        }
1624
      fprintf (sched_dump, "\n");
1625
    }
1626
  else
1627
    {
1628
      fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1629
    }
1630
}
1631
 
1632
/* Print candidates info, for debugging purposes.  Callable from debugger.  */
1633
 
1634
void
1635
debug_candidates (int trg)
1636
{
1637
  int i;
1638
 
1639
  fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1640
           BB_TO_BLOCK (trg), trg);
1641
  for (i = trg + 1; i < current_nr_blocks; i++)
1642
    debug_candidate (i);
1643
}
1644
 
1645
/* Functions for speculative scheduling.  */
1646
 
1647
static bitmap_head not_in_df;
1648
 
1649
/* Return 0 if x is a set of a register alive in the beginning of one
1650
   of the split-blocks of src, otherwise return 1.  */
1651
 
1652
static int
1653
check_live_1 (int src, rtx x)
1654
{
1655
  int i;
1656
  int regno;
1657
  rtx reg = SET_DEST (x);
1658
 
1659
  if (reg == 0)
1660
    return 1;
1661
 
1662
  while (GET_CODE (reg) == SUBREG
1663
         || GET_CODE (reg) == ZERO_EXTRACT
1664
         || GET_CODE (reg) == STRICT_LOW_PART)
1665
    reg = XEXP (reg, 0);
1666
 
1667
  if (GET_CODE (reg) == PARALLEL)
1668
    {
1669
      int i;
1670
 
1671
      for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1672
        if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1673
          if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1674
            return 1;
1675
 
1676
      return 0;
1677
    }
1678
 
1679
  if (!REG_P (reg))
1680
    return 1;
1681
 
1682
  regno = REGNO (reg);
1683
 
1684
  if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1685
    {
1686
      /* Global registers are assumed live.  */
1687
      return 0;
1688
    }
1689
  else
1690
    {
1691
      if (regno < FIRST_PSEUDO_REGISTER)
1692
        {
1693
          /* Check for hard registers.  */
1694
          int j = hard_regno_nregs[regno][GET_MODE (reg)];
1695
          while (--j >= 0)
1696
            {
1697
              for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1698
                {
1699
                  basic_block b = candidate_table[src].split_bbs.first_member[i];
1700
                  int t = bitmap_bit_p (&not_in_df, b->index);
1701
 
1702
                  /* We can have split blocks, that were recently generated.
1703
                     Such blocks are always outside current region.  */
1704
                  gcc_assert (!t || (CONTAINING_RGN (b->index)
1705
                                     != CONTAINING_RGN (BB_TO_BLOCK (src))));
1706
 
1707
                  if (t || REGNO_REG_SET_P (df_get_live_in (b), regno + j))
1708
                    return 0;
1709
                }
1710
            }
1711
        }
1712
      else
1713
        {
1714
          /* Check for pseudo registers.  */
1715
          for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1716
            {
1717
              basic_block b = candidate_table[src].split_bbs.first_member[i];
1718
              int t = bitmap_bit_p (&not_in_df, b->index);
1719
 
1720
              gcc_assert (!t || (CONTAINING_RGN (b->index)
1721
                                 != CONTAINING_RGN (BB_TO_BLOCK (src))));
1722
 
1723
              if (t || REGNO_REG_SET_P (df_get_live_in (b), regno))
1724
                return 0;
1725
            }
1726
        }
1727
    }
1728
 
1729
  return 1;
1730
}
1731
 
1732
/* If x is a set of a register R, mark that R is alive in the beginning
1733
   of every update-block of src.  */
1734
 
1735
static void
1736
update_live_1 (int src, rtx x)
1737
{
1738
  int i;
1739
  int regno;
1740
  rtx reg = SET_DEST (x);
1741
 
1742
  if (reg == 0)
1743
    return;
1744
 
1745
  while (GET_CODE (reg) == SUBREG
1746
         || GET_CODE (reg) == ZERO_EXTRACT
1747
         || GET_CODE (reg) == STRICT_LOW_PART)
1748
    reg = XEXP (reg, 0);
1749
 
1750
  if (GET_CODE (reg) == PARALLEL)
1751
    {
1752
      int i;
1753
 
1754
      for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1755
        if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1756
          update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1757
 
1758
      return;
1759
    }
1760
 
1761
  if (!REG_P (reg))
1762
    return;
1763
 
1764
  /* Global registers are always live, so the code below does not apply
1765
     to them.  */
1766
 
1767
  regno = REGNO (reg);
1768
 
1769
  if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1770
    {
1771
      if (regno < FIRST_PSEUDO_REGISTER)
1772
        {
1773
          int j = hard_regno_nregs[regno][GET_MODE (reg)];
1774
          while (--j >= 0)
1775
            {
1776
              for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1777
                {
1778
                  basic_block b = candidate_table[src].update_bbs.first_member[i];
1779
 
1780
                  SET_REGNO_REG_SET (df_get_live_in (b), regno + j);
1781
                }
1782
            }
1783
        }
1784
      else
1785
        {
1786
          for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1787
            {
1788
              basic_block b = candidate_table[src].update_bbs.first_member[i];
1789
 
1790
              SET_REGNO_REG_SET (df_get_live_in (b), regno);
1791
            }
1792
        }
1793
    }
1794
}
1795
 
1796
/* Return 1 if insn can be speculatively moved from block src to trg,
1797
   otherwise return 0.  Called before first insertion of insn to
1798
   ready-list or before the scheduling.  */
1799
 
1800
static int
1801
check_live (rtx insn, int src)
1802
{
1803
  /* Find the registers set by instruction.  */
1804
  if (GET_CODE (PATTERN (insn)) == SET
1805
      || GET_CODE (PATTERN (insn)) == CLOBBER)
1806
    return check_live_1 (src, PATTERN (insn));
1807
  else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1808
    {
1809
      int j;
1810
      for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1811
        if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1812
             || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1813
            && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1814
          return 0;
1815
 
1816
      return 1;
1817
    }
1818
 
1819
  return 1;
1820
}
1821
 
1822
/* Update the live registers info after insn was moved speculatively from
1823
   block src to trg.  */
1824
 
1825
static void
1826
update_live (rtx insn, int src)
1827
{
1828
  /* Find the registers set by instruction.  */
1829
  if (GET_CODE (PATTERN (insn)) == SET
1830
      || GET_CODE (PATTERN (insn)) == CLOBBER)
1831
    update_live_1 (src, PATTERN (insn));
1832
  else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1833
    {
1834
      int j;
1835
      for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1836
        if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1837
            || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1838
          update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1839
    }
1840
}
1841
 
1842
/* Nonzero if block bb_to is equal to, or reachable from block bb_from.  */
1843
#define IS_REACHABLE(bb_from, bb_to)                                    \
1844
  (bb_from == bb_to                                                     \
1845
   || IS_RGN_ENTRY (bb_from)                                            \
1846
   || (TEST_BIT (ancestor_edges[bb_to],                                 \
1847
         EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from)))))))
1848
 
1849
/* Turns on the fed_by_spec_load flag for insns fed by load_insn.  */
1850
 
1851
static void
1852
set_spec_fed (rtx load_insn)
1853
{
1854
  sd_iterator_def sd_it;
1855
  dep_t dep;
1856
 
1857
  FOR_EACH_DEP (load_insn, SD_LIST_FORW, sd_it, dep)
1858
    if (DEP_TYPE (dep) == REG_DEP_TRUE)
1859
      FED_BY_SPEC_LOAD (DEP_CON (dep)) = 1;
1860
}
1861
 
1862
/* On the path from the insn to load_insn_bb, find a conditional
1863
branch depending on insn, that guards the speculative load.  */
1864
 
1865
static int
1866
find_conditional_protection (rtx insn, int load_insn_bb)
1867
{
1868
  sd_iterator_def sd_it;
1869
  dep_t dep;
1870
 
1871
  /* Iterate through DEF-USE forward dependences.  */
1872
  FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
1873
    {
1874
      rtx next = DEP_CON (dep);
1875
 
1876
      if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1877
           CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1878
          && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1879
          && load_insn_bb != INSN_BB (next)
1880
          && DEP_TYPE (dep) == REG_DEP_TRUE
1881
          && (JUMP_P (next)
1882
              || find_conditional_protection (next, load_insn_bb)))
1883
        return 1;
1884
    }
1885
  return 0;
1886
}                               /* find_conditional_protection */
1887
 
1888
/* Returns 1 if the same insn1 that participates in the computation
1889
   of load_insn's address is feeding a conditional branch that is
1890
   guarding on load_insn. This is true if we find two DEF-USE
1891
   chains:
1892
   insn1 -> ... -> conditional-branch
1893
   insn1 -> ... -> load_insn,
1894
   and if a flow path exists:
1895
   insn1 -> ... -> conditional-branch -> ... -> load_insn,
1896
   and if insn1 is on the path
1897
   region-entry -> ... -> bb_trg -> ... load_insn.
1898
 
1899
   Locate insn1 by climbing on INSN_BACK_DEPS from load_insn.
1900
   Locate the branch by following INSN_FORW_DEPS from insn1.  */
1901
 
1902
static int
1903
is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1904
{
1905
  sd_iterator_def sd_it;
1906
  dep_t dep;
1907
 
1908
  FOR_EACH_DEP (load_insn, SD_LIST_BACK, sd_it, dep)
1909
    {
1910
      rtx insn1 = DEP_PRO (dep);
1911
 
1912
      /* Must be a DEF-USE dependence upon non-branch.  */
1913
      if (DEP_TYPE (dep) != REG_DEP_TRUE
1914
          || JUMP_P (insn1))
1915
        continue;
1916
 
1917
      /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn.  */
1918
      if (INSN_BB (insn1) == bb_src
1919
          || (CONTAINING_RGN (BLOCK_NUM (insn1))
1920
              != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1921
          || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1922
              && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1923
        continue;
1924
 
1925
      /* Now search for the conditional-branch.  */
1926
      if (find_conditional_protection (insn1, bb_src))
1927
        return 1;
1928
 
1929
      /* Recursive step: search another insn1, "above" current insn1.  */
1930
      return is_conditionally_protected (insn1, bb_src, bb_trg);
1931
    }
1932
 
1933
  /* The chain does not exist.  */
1934
  return 0;
1935
}                               /* is_conditionally_protected */
1936
 
1937
/* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1938
   load_insn can move speculatively from bb_src to bb_trg.  All the
1939
   following must hold:
1940
 
1941
   (1) both loads have 1 base register (PFREE_CANDIDATEs).
1942
   (2) load_insn and load1 have a def-use dependence upon
1943
   the same insn 'insn1'.
1944
   (3) either load2 is in bb_trg, or:
1945
   - there's only one split-block, and
1946
   - load1 is on the escape path, and
1947
 
1948
   From all these we can conclude that the two loads access memory
1949
   addresses that differ at most by a constant, and hence if moving
1950
   load_insn would cause an exception, it would have been caused by
1951
   load2 anyhow.  */
1952
 
1953
static int
1954
is_pfree (rtx load_insn, int bb_src, int bb_trg)
1955
{
1956
  sd_iterator_def back_sd_it;
1957
  dep_t back_dep;
1958
  candidate *candp = candidate_table + bb_src;
1959
 
1960
  if (candp->split_bbs.nr_members != 1)
1961
    /* Must have exactly one escape block.  */
1962
    return 0;
1963
 
1964
  FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep)
1965
    {
1966
      rtx insn1 = DEP_PRO (back_dep);
1967
 
1968
      if (DEP_TYPE (back_dep) == REG_DEP_TRUE)
1969
        /* Found a DEF-USE dependence (insn1, load_insn).  */
1970
        {
1971
          sd_iterator_def fore_sd_it;
1972
          dep_t fore_dep;
1973
 
1974
          FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep)
1975
            {
1976
              rtx insn2 = DEP_CON (fore_dep);
1977
 
1978
              if (DEP_TYPE (fore_dep) == REG_DEP_TRUE)
1979
                {
1980
                  /* Found a DEF-USE dependence (insn1, insn2).  */
1981
                  if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1982
                    /* insn2 not guaranteed to be a 1 base reg load.  */
1983
                    continue;
1984
 
1985
                  if (INSN_BB (insn2) == bb_trg)
1986
                    /* insn2 is the similar load, in the target block.  */
1987
                    return 1;
1988
 
1989
                  if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1990
                    /* insn2 is a similar load, in a split-block.  */
1991
                    return 1;
1992
                }
1993
            }
1994
        }
1995
    }
1996
 
1997
  /* Couldn't find a similar load.  */
1998
  return 0;
1999
}                               /* is_pfree */
2000
 
2001
/* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
2002
   a load moved speculatively, or if load_insn is protected by
2003
   a compare on load_insn's address).  */
2004
 
2005
static int
2006
is_prisky (rtx load_insn, int bb_src, int bb_trg)
2007
{
2008
  if (FED_BY_SPEC_LOAD (load_insn))
2009
    return 1;
2010
 
2011
  if (sd_lists_empty_p (load_insn, SD_LIST_BACK))
2012
    /* Dependence may 'hide' out of the region.  */
2013
    return 1;
2014
 
2015
  if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2016
    return 1;
2017
 
2018
  return 0;
2019
}
2020
 
2021
/* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2022
   Return 1 if insn is exception-free (and the motion is valid)
2023
   and 0 otherwise.  */
2024
 
2025
static int
2026
is_exception_free (rtx insn, int bb_src, int bb_trg)
2027
{
2028
  int insn_class = haifa_classify_insn (insn);
2029
 
2030
  /* Handle non-load insns.  */
2031
  switch (insn_class)
2032
    {
2033
    case TRAP_FREE:
2034
      return 1;
2035
    case TRAP_RISKY:
2036
      return 0;
2037
    default:;
2038
    }
2039
 
2040
  /* Handle loads.  */
2041
  if (!flag_schedule_speculative_load)
2042
    return 0;
2043
  IS_LOAD_INSN (insn) = 1;
2044
  switch (insn_class)
2045
    {
2046
    case IFREE:
2047
      return (1);
2048
    case IRISKY:
2049
      return 0;
2050
    case PFREE_CANDIDATE:
2051
      if (is_pfree (insn, bb_src, bb_trg))
2052
        return 1;
2053
      /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate.  */
2054
    case PRISKY_CANDIDATE:
2055
      if (!flag_schedule_speculative_load_dangerous
2056
          || is_prisky (insn, bb_src, bb_trg))
2057
        return 0;
2058
      break;
2059
    default:;
2060
    }
2061
 
2062
  return flag_schedule_speculative_load_dangerous;
2063
}
2064
 
2065
/* The number of insns from the current block scheduled so far.  */
2066
static int sched_target_n_insns;
2067
/* The number of insns from the current block to be scheduled in total.  */
2068
static int target_n_insns;
2069
/* The number of insns from the entire region scheduled so far.  */
2070
static int sched_n_insns;
2071
 
2072
/* Implementations of the sched_info functions for region scheduling.  */
2073
static void init_ready_list (void);
2074
static int can_schedule_ready_p (rtx);
2075
static void begin_schedule_ready (rtx, rtx);
2076
static ds_t new_ready (rtx, ds_t);
2077
static int schedule_more_p (void);
2078
static const char *rgn_print_insn (const_rtx, int);
2079
static int rgn_rank (rtx, rtx);
2080
static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
2081
 
2082
/* Functions for speculative scheduling.  */
2083
static void rgn_add_remove_insn (rtx, int);
2084
static void rgn_add_block (basic_block, basic_block);
2085
static void rgn_fix_recovery_cfg (int, int, int);
2086
static basic_block advance_target_bb (basic_block, rtx);
2087
 
2088
/* Return nonzero if there are more insns that should be scheduled.  */
2089
 
2090
static int
2091
schedule_more_p (void)
2092
{
2093
  return sched_target_n_insns < target_n_insns;
2094
}
2095
 
2096
/* Add all insns that are initially ready to the ready list READY.  Called
2097
   once before scheduling a set of insns.  */
2098
 
2099
static void
2100
init_ready_list (void)
2101
{
2102
  rtx prev_head = current_sched_info->prev_head;
2103
  rtx next_tail = current_sched_info->next_tail;
2104
  int bb_src;
2105
  rtx insn;
2106
 
2107
  target_n_insns = 0;
2108
  sched_target_n_insns = 0;
2109
  sched_n_insns = 0;
2110
 
2111
  /* Print debugging information.  */
2112
  if (sched_verbose >= 5)
2113
    debug_rgn_dependencies (target_bb);
2114
 
2115
  /* Prepare current target block info.  */
2116
  if (current_nr_blocks > 1)
2117
    compute_trg_info (target_bb);
2118
 
2119
  /* Initialize ready list with all 'ready' insns in target block.
2120
     Count number of insns in the target block being scheduled.  */
2121
  for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2122
    {
2123
      try_ready (insn);
2124
      target_n_insns++;
2125
 
2126
      gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL));
2127
    }
2128
 
2129
  /* Add to ready list all 'ready' insns in valid source blocks.
2130
     For speculative insns, check-live, exception-free, and
2131
     issue-delay.  */
2132
  for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2133
    if (IS_VALID (bb_src))
2134
      {
2135
        rtx src_head;
2136
        rtx src_next_tail;
2137
        rtx tail, head;
2138
 
2139
        get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src),
2140
                           &head, &tail);
2141
        src_next_tail = NEXT_INSN (tail);
2142
        src_head = head;
2143
 
2144
        for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2145
          if (INSN_P (insn) && !BOUNDARY_DEBUG_INSN_P (insn))
2146
            try_ready (insn);
2147
      }
2148
}
2149
 
2150
/* Called after taking INSN from the ready list.  Returns nonzero if this
2151
   insn can be scheduled, nonzero if we should silently discard it.  */
2152
 
2153
static int
2154
can_schedule_ready_p (rtx insn)
2155
{
2156
  /* An interblock motion?  */
2157
  if (INSN_BB (insn) != target_bb
2158
      && IS_SPECULATIVE_INSN (insn)
2159
      && !check_live (insn, INSN_BB (insn)))
2160
    return 0;
2161
  else
2162
    return 1;
2163
}
2164
 
2165
/* Updates counter and other information.  Split from can_schedule_ready_p ()
2166
   because when we schedule insn speculatively then insn passed to
2167
   can_schedule_ready_p () differs from the one passed to
2168
   begin_schedule_ready ().  */
2169
static void
2170
begin_schedule_ready (rtx insn, rtx last ATTRIBUTE_UNUSED)
2171
{
2172
  /* An interblock motion?  */
2173
  if (INSN_BB (insn) != target_bb)
2174
    {
2175
      if (IS_SPECULATIVE_INSN (insn))
2176
        {
2177
          gcc_assert (check_live (insn, INSN_BB (insn)));
2178
 
2179
          update_live (insn, INSN_BB (insn));
2180
 
2181
          /* For speculative load, mark insns fed by it.  */
2182
          if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2183
            set_spec_fed (insn);
2184
 
2185
          nr_spec++;
2186
        }
2187
      nr_inter++;
2188
    }
2189
  else
2190
    {
2191
      /* In block motion.  */
2192
      sched_target_n_insns++;
2193
    }
2194
  sched_n_insns++;
2195
}
2196
 
2197
/* Called after INSN has all its hard dependencies resolved and the speculation
2198
   of type TS is enough to overcome them all.
2199
   Return nonzero if it should be moved to the ready list or the queue, or zero
2200
   if we should silently discard it.  */
2201
static ds_t
2202
new_ready (rtx next, ds_t ts)
2203
{
2204
  if (INSN_BB (next) != target_bb)
2205
    {
2206
      int not_ex_free = 0;
2207
 
2208
      /* For speculative insns, before inserting to ready/queue,
2209
         check live, exception-free, and issue-delay.  */
2210
      if (!IS_VALID (INSN_BB (next))
2211
          || CANT_MOVE (next)
2212
          || (IS_SPECULATIVE_INSN (next)
2213
              && ((recog_memoized (next) >= 0
2214
                   && min_insn_conflict_delay (curr_state, next, next)
2215
                   > PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY))
2216
                  || IS_SPECULATION_CHECK_P (next)
2217
                  || !check_live (next, INSN_BB (next))
2218
                  || (not_ex_free = !is_exception_free (next, INSN_BB (next),
2219
                                                        target_bb)))))
2220
        {
2221
          if (not_ex_free
2222
              /* We are here because is_exception_free () == false.
2223
                 But we possibly can handle that with control speculation.  */
2224
              && sched_deps_info->generate_spec_deps
2225
              && spec_info->mask & BEGIN_CONTROL)
2226
            {
2227
              ds_t new_ds;
2228
 
2229
              /* Add control speculation to NEXT's dependency type.  */
2230
              new_ds = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK);
2231
 
2232
              /* Check if NEXT can be speculated with new dependency type.  */
2233
              if (sched_insn_is_legitimate_for_speculation_p (next, new_ds))
2234
                /* Here we got new control-speculative instruction.  */
2235
                ts = new_ds;
2236
              else
2237
                /* NEXT isn't ready yet.  */
2238
                ts = (ts & ~SPECULATIVE) | HARD_DEP;
2239
            }
2240
          else
2241
            /* NEXT isn't ready yet.  */
2242
            ts = (ts & ~SPECULATIVE) | HARD_DEP;
2243
        }
2244
    }
2245
 
2246
  return ts;
2247
}
2248
 
2249
/* Return a string that contains the insn uid and optionally anything else
2250
   necessary to identify this insn in an output.  It's valid to use a
2251
   static buffer for this.  The ALIGNED parameter should cause the string
2252
   to be formatted so that multiple output lines will line up nicely.  */
2253
 
2254
static const char *
2255
rgn_print_insn (const_rtx insn, int aligned)
2256
{
2257
  static char tmp[80];
2258
 
2259
  if (aligned)
2260
    sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2261
  else
2262
    {
2263
      if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2264
        sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2265
      else
2266
        sprintf (tmp, "%d", INSN_UID (insn));
2267
    }
2268
  return tmp;
2269
}
2270
 
2271
/* Compare priority of two insns.  Return a positive number if the second
2272
   insn is to be preferred for scheduling, and a negative one if the first
2273
   is to be preferred.  Zero if they are equally good.  */
2274
 
2275
static int
2276
rgn_rank (rtx insn1, rtx insn2)
2277
{
2278
  /* Some comparison make sense in interblock scheduling only.  */
2279
  if (INSN_BB (insn1) != INSN_BB (insn2))
2280
    {
2281
      int spec_val, prob_val;
2282
 
2283
      /* Prefer an inblock motion on an interblock motion.  */
2284
      if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2285
        return 1;
2286
      if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2287
        return -1;
2288
 
2289
      /* Prefer a useful motion on a speculative one.  */
2290
      spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2291
      if (spec_val)
2292
        return spec_val;
2293
 
2294
      /* Prefer a more probable (speculative) insn.  */
2295
      prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2296
      if (prob_val)
2297
        return prob_val;
2298
    }
2299
  return 0;
2300
}
2301
 
2302
/* NEXT is an instruction that depends on INSN (a backward dependence);
2303
   return nonzero if we should include this dependence in priority
2304
   calculations.  */
2305
 
2306
int
2307
contributes_to_priority (rtx next, rtx insn)
2308
{
2309
  /* NEXT and INSN reside in one ebb.  */
2310
  return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn));
2311
}
2312
 
2313
/* INSN is a JUMP_INSN, COND_SET is the set of registers that are
2314
   conditionally set before INSN.  Store the set of registers that
2315
   must be considered as used by this jump in USED and that of
2316
   registers that must be considered as set in SET.  */
2317
 
2318
static void
2319
compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
2320
                               regset cond_exec ATTRIBUTE_UNUSED,
2321
                               regset used ATTRIBUTE_UNUSED,
2322
                               regset set ATTRIBUTE_UNUSED)
2323
{
2324
  /* Nothing to do here, since we postprocess jumps in
2325
     add_branch_dependences.  */
2326
}
2327
 
2328
/* This variable holds common_sched_info hooks and data relevant to
2329
   the interblock scheduler.  */
2330
static struct common_sched_info_def rgn_common_sched_info;
2331
 
2332
 
2333
/* This holds data for the dependence analysis relevant to
2334
   the interblock scheduler.  */
2335
static struct sched_deps_info_def rgn_sched_deps_info;
2336
 
2337
/* This holds constant data used for initializing the above structure
2338
   for the Haifa scheduler.  */
2339
static const struct sched_deps_info_def rgn_const_sched_deps_info =
2340
  {
2341
    compute_jump_reg_dependencies,
2342
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2343
    0, 0, 0
2344
  };
2345
 
2346
/* Same as above, but for the selective scheduler.  */
2347
static const struct sched_deps_info_def rgn_const_sel_sched_deps_info =
2348
  {
2349
    compute_jump_reg_dependencies,
2350
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2351
    0, 0, 0
2352
  };
2353
 
2354
/* Return true if scheduling INSN will trigger finish of scheduling
2355
   current block.  */
2356
static bool
2357
rgn_insn_finishes_block_p (rtx insn)
2358
{
2359
  if (INSN_BB (insn) == target_bb
2360
      && sched_target_n_insns + 1 == target_n_insns)
2361
    /* INSN is the last not-scheduled instruction in the current block.  */
2362
    return true;
2363
 
2364
  return false;
2365
}
2366
 
2367
/* Used in schedule_insns to initialize current_sched_info for scheduling
2368
   regions (or single basic blocks).  */
2369
 
2370
static const struct haifa_sched_info rgn_const_sched_info =
2371
{
2372
  init_ready_list,
2373
  can_schedule_ready_p,
2374
  schedule_more_p,
2375
  new_ready,
2376
  rgn_rank,
2377
  rgn_print_insn,
2378
  contributes_to_priority,
2379
  rgn_insn_finishes_block_p,
2380
 
2381
  NULL, NULL,
2382
  NULL, NULL,
2383
  0, 0,
2384
 
2385
  rgn_add_remove_insn,
2386
  begin_schedule_ready,
2387
  advance_target_bb,
2388
  SCHED_RGN
2389
};
2390
 
2391
/* This variable holds the data and hooks needed to the Haifa scheduler backend
2392
   for the interblock scheduler frontend.  */
2393
static struct haifa_sched_info rgn_sched_info;
2394
 
2395
/* Returns maximum priority that an insn was assigned to.  */
2396
 
2397
int
2398
get_rgn_sched_max_insns_priority (void)
2399
{
2400
  return rgn_sched_info.sched_max_insns_priority;
2401
}
2402
 
2403
/* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register.  */
2404
 
2405
static bool
2406
sets_likely_spilled (rtx pat)
2407
{
2408
  bool ret = false;
2409
  note_stores (pat, sets_likely_spilled_1, &ret);
2410
  return ret;
2411
}
2412
 
2413
static void
2414
sets_likely_spilled_1 (rtx x, const_rtx pat, void *data)
2415
{
2416
  bool *ret = (bool *) data;
2417
 
2418
  if (GET_CODE (pat) == SET
2419
      && REG_P (x)
2420
      && REGNO (x) < FIRST_PSEUDO_REGISTER
2421
      && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2422
    *ret = true;
2423
}
2424
 
2425
/* A bitmap to note insns that participate in any dependency.  Used in
2426
   add_branch_dependences.  */
2427
static sbitmap insn_referenced;
2428
 
2429
/* Add dependences so that branches are scheduled to run last in their
2430
   block.  */
2431
static void
2432
add_branch_dependences (rtx head, rtx tail)
2433
{
2434
  rtx insn, last;
2435
 
2436
  /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2437
     that can throw exceptions, force them to remain in order at the end of
2438
     the block by adding dependencies and giving the last a high priority.
2439
     There may be notes present, and prev_head may also be a note.
2440
 
2441
     Branches must obviously remain at the end.  Calls should remain at the
2442
     end since moving them results in worse register allocation.  Uses remain
2443
     at the end to ensure proper register allocation.
2444
 
2445
     cc0 setters remain at the end because they can't be moved away from
2446
     their cc0 user.
2447
 
2448
     COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
2449
 
2450
     Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2451
     are not moved before reload because we can wind up with register
2452
     allocation failures.  */
2453
 
2454
  while (tail != head && DEBUG_INSN_P (tail))
2455
    tail = PREV_INSN (tail);
2456
 
2457
  insn = tail;
2458
  last = 0;
2459
  while (CALL_P (insn)
2460
         || JUMP_P (insn)
2461
         || (NONJUMP_INSN_P (insn)
2462
             && (GET_CODE (PATTERN (insn)) == USE
2463
                 || GET_CODE (PATTERN (insn)) == CLOBBER
2464
                 || can_throw_internal (insn)
2465
#ifdef HAVE_cc0
2466
                 || sets_cc0_p (PATTERN (insn))
2467
#endif
2468
                 || (!reload_completed
2469
                     && sets_likely_spilled (PATTERN (insn)))))
2470
         || NOTE_P (insn))
2471
    {
2472
      if (!NOTE_P (insn))
2473
        {
2474
          if (last != 0
2475
              && sd_find_dep_between (insn, last, false) == NULL)
2476
            {
2477
              if (! sched_insns_conditions_mutex_p (last, insn))
2478
                add_dependence (last, insn, REG_DEP_ANTI);
2479
              SET_BIT (insn_referenced, INSN_LUID (insn));
2480
            }
2481
 
2482
          CANT_MOVE (insn) = 1;
2483
 
2484
          last = insn;
2485
        }
2486
 
2487
      /* Don't overrun the bounds of the basic block.  */
2488
      if (insn == head)
2489
        break;
2490
 
2491
      do
2492
        insn = PREV_INSN (insn);
2493
      while (insn != head && DEBUG_INSN_P (insn));
2494
    }
2495
 
2496
  /* Make sure these insns are scheduled last in their block.  */
2497
  insn = last;
2498
  if (insn != 0)
2499
    while (insn != head)
2500
      {
2501
        insn = prev_nonnote_insn (insn);
2502
 
2503
        if (TEST_BIT (insn_referenced, INSN_LUID (insn))
2504
            || DEBUG_INSN_P (insn))
2505
          continue;
2506
 
2507
        if (! sched_insns_conditions_mutex_p (last, insn))
2508
          add_dependence (last, insn, REG_DEP_ANTI);
2509
      }
2510
 
2511
  if (!targetm.have_conditional_execution ())
2512
    return;
2513
 
2514
  /* Finally, if the block ends in a jump, and we are doing intra-block
2515
     scheduling, make sure that the branch depends on any COND_EXEC insns
2516
     inside the block to avoid moving the COND_EXECs past the branch insn.
2517
 
2518
     We only have to do this after reload, because (1) before reload there
2519
     are no COND_EXEC insns, and (2) the region scheduler is an intra-block
2520
     scheduler after reload.
2521
 
2522
     FIXME: We could in some cases move COND_EXEC insns past the branch if
2523
     this scheduler would be a little smarter.  Consider this code:
2524
 
2525
                T = [addr]
2526
        C  ?    addr += 4
2527
        !C ?    X += 12
2528
        C  ?    T += 1
2529
        C  ?    jump foo
2530
 
2531
     On a target with a one cycle stall on a memory access the optimal
2532
     sequence would be:
2533
 
2534
                T = [addr]
2535
        C  ?    addr += 4
2536
        C  ?    T += 1
2537
        C  ?    jump foo
2538
        !C ?    X += 12
2539
 
2540
     We don't want to put the 'X += 12' before the branch because it just
2541
     wastes a cycle of execution time when the branch is taken.
2542
 
2543
     Note that in the example "!C" will always be true.  That is another
2544
     possible improvement for handling COND_EXECs in this scheduler: it
2545
     could remove always-true predicates.  */
2546
 
2547
  if (!reload_completed || ! JUMP_P (tail))
2548
    return;
2549
 
2550
  insn = tail;
2551
  while (insn != head)
2552
    {
2553
      insn = PREV_INSN (insn);
2554
 
2555
      /* Note that we want to add this dependency even when
2556
         sched_insns_conditions_mutex_p returns true.  The whole point
2557
         is that we _want_ this dependency, even if these insns really
2558
         are independent.  */
2559
      if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
2560
        add_dependence (tail, insn, REG_DEP_ANTI);
2561
    }
2562
}
2563
 
2564
/* Data structures for the computation of data dependences in a regions.  We
2565
   keep one `deps' structure for every basic block.  Before analyzing the
2566
   data dependences for a bb, its variables are initialized as a function of
2567
   the variables of its predecessors.  When the analysis for a bb completes,
2568
   we save the contents to the corresponding bb_deps[bb] variable.  */
2569
 
2570
static struct deps_desc *bb_deps;
2571
 
2572
/* Duplicate the INSN_LIST elements of COPY and prepend them to OLD.  */
2573
 
2574
static rtx
2575
concat_INSN_LIST (rtx copy, rtx old)
2576
{
2577
  rtx new_rtx = old;
2578
  for (; copy ; copy = XEXP (copy, 1))
2579
    new_rtx = alloc_INSN_LIST (XEXP (copy, 0), new_rtx);
2580
  return new_rtx;
2581
}
2582
 
2583
static void
2584
concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
2585
                      rtx *old_mems_p)
2586
{
2587
  rtx new_insns = *old_insns_p;
2588
  rtx new_mems = *old_mems_p;
2589
 
2590
  while (copy_insns)
2591
    {
2592
      new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2593
      new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2594
      copy_insns = XEXP (copy_insns, 1);
2595
      copy_mems = XEXP (copy_mems, 1);
2596
    }
2597
 
2598
  *old_insns_p = new_insns;
2599
  *old_mems_p = new_mems;
2600
}
2601
 
2602
/* Join PRED_DEPS to the SUCC_DEPS.  */
2603
void
2604
deps_join (struct deps_desc *succ_deps, struct deps_desc *pred_deps)
2605
{
2606
  unsigned reg;
2607
  reg_set_iterator rsi;
2608
 
2609
  /* The reg_last lists are inherited by successor.  */
2610
  EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2611
    {
2612
      struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2613
      struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2614
 
2615
      succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2616
      succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2617
      succ_rl->implicit_sets
2618
        = concat_INSN_LIST (pred_rl->implicit_sets, succ_rl->implicit_sets);
2619
      succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2620
                                            succ_rl->clobbers);
2621
      succ_rl->uses_length += pred_rl->uses_length;
2622
      succ_rl->clobbers_length += pred_rl->clobbers_length;
2623
    }
2624
  IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2625
 
2626
  /* Mem read/write lists are inherited by successor.  */
2627
  concat_insn_mem_list (pred_deps->pending_read_insns,
2628
                        pred_deps->pending_read_mems,
2629
                        &succ_deps->pending_read_insns,
2630
                        &succ_deps->pending_read_mems);
2631
  concat_insn_mem_list (pred_deps->pending_write_insns,
2632
                        pred_deps->pending_write_mems,
2633
                        &succ_deps->pending_write_insns,
2634
                        &succ_deps->pending_write_mems);
2635
 
2636
  succ_deps->last_pending_memory_flush
2637
    = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2638
                        succ_deps->last_pending_memory_flush);
2639
 
2640
  succ_deps->pending_read_list_length += pred_deps->pending_read_list_length;
2641
  succ_deps->pending_write_list_length += pred_deps->pending_write_list_length;
2642
  succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2643
 
2644
  /* last_function_call is inherited by successor.  */
2645
  succ_deps->last_function_call
2646
    = concat_INSN_LIST (pred_deps->last_function_call,
2647
                        succ_deps->last_function_call);
2648
 
2649
  /* last_function_call_may_noreturn is inherited by successor.  */
2650
  succ_deps->last_function_call_may_noreturn
2651
    = concat_INSN_LIST (pred_deps->last_function_call_may_noreturn,
2652
                        succ_deps->last_function_call_may_noreturn);
2653
 
2654
  /* sched_before_next_call is inherited by successor.  */
2655
  succ_deps->sched_before_next_call
2656
    = concat_INSN_LIST (pred_deps->sched_before_next_call,
2657
                        succ_deps->sched_before_next_call);
2658
}
2659
 
2660
/* After computing the dependencies for block BB, propagate the dependencies
2661
   found in TMP_DEPS to the successors of the block.  */
2662
static void
2663
propagate_deps (int bb, struct deps_desc *pred_deps)
2664
{
2665
  basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb));
2666
  edge_iterator ei;
2667
  edge e;
2668
 
2669
  /* bb's structures are inherited by its successors.  */
2670
  FOR_EACH_EDGE (e, ei, block->succs)
2671
    {
2672
      /* Only bbs "below" bb, in the same region, are interesting.  */
2673
      if (e->dest == EXIT_BLOCK_PTR
2674
          || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2675
          || BLOCK_TO_BB (e->dest->index) <= bb)
2676
        continue;
2677
 
2678
      deps_join (bb_deps + BLOCK_TO_BB (e->dest->index), pred_deps);
2679
    }
2680
 
2681
  /* These lists should point to the right place, for correct
2682
     freeing later.  */
2683
  bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2684
  bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2685
  bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2686
  bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2687
 
2688
  /* Can't allow these to be freed twice.  */
2689
  pred_deps->pending_read_insns = 0;
2690
  pred_deps->pending_read_mems = 0;
2691
  pred_deps->pending_write_insns = 0;
2692
  pred_deps->pending_write_mems = 0;
2693
}
2694
 
2695
/* Compute dependences inside bb.  In a multiple blocks region:
2696
   (1) a bb is analyzed after its predecessors, and (2) the lists in
2697
   effect at the end of bb (after analyzing for bb) are inherited by
2698
   bb's successors.
2699
 
2700
   Specifically for reg-reg data dependences, the block insns are
2701
   scanned by sched_analyze () top-to-bottom.  Three lists are
2702
   maintained by sched_analyze (): reg_last[].sets for register DEFs,
2703
   reg_last[].implicit_sets for implicit hard register DEFs, and
2704
   reg_last[].uses for register USEs.
2705
 
2706
   When analysis is completed for bb, we update for its successors:
2707
   ;  - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2708
   ;  - IMPLICIT_DEFS[succ] = Union (IMPLICIT_DEFS [succ], IMPLICIT_DEFS [bb])
2709
   ;  - USES[succ] = Union (USES [succ], DEFS [bb])
2710
 
2711
   The mechanism for computing mem-mem data dependence is very
2712
   similar, and the result is interblock dependences in the region.  */
2713
 
2714
static void
2715
compute_block_dependences (int bb)
2716
{
2717
  rtx head, tail;
2718
  struct deps_desc tmp_deps;
2719
 
2720
  tmp_deps = bb_deps[bb];
2721
 
2722
  /* Do the analysis for this block.  */
2723
  gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2724
  get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2725
 
2726
  sched_analyze (&tmp_deps, head, tail);
2727
 
2728
  /* Selective scheduling handles control dependencies by itself.  */
2729
  if (!sel_sched_p ())
2730
    add_branch_dependences (head, tail);
2731
 
2732
  if (current_nr_blocks > 1)
2733
    propagate_deps (bb, &tmp_deps);
2734
 
2735
  /* Free up the INSN_LISTs.  */
2736
  free_deps (&tmp_deps);
2737
 
2738
  if (targetm.sched.dependencies_evaluation_hook)
2739
    targetm.sched.dependencies_evaluation_hook (head, tail);
2740
}
2741
 
2742
/* Free dependencies of instructions inside BB.  */
2743
static void
2744
free_block_dependencies (int bb)
2745
{
2746
  rtx head;
2747
  rtx tail;
2748
 
2749
  get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2750
 
2751
  if (no_real_insns_p (head, tail))
2752
    return;
2753
 
2754
  sched_free_deps (head, tail, true);
2755
}
2756
 
2757
/* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2758
   them to the unused_*_list variables, so that they can be reused.  */
2759
 
2760
static void
2761
free_pending_lists (void)
2762
{
2763
  int bb;
2764
 
2765
  for (bb = 0; bb < current_nr_blocks; bb++)
2766
    {
2767
      free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2768
      free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2769
      free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2770
      free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2771
    }
2772
}
2773
 
2774
/* Print dependences for debugging starting from FROM_BB.
2775
   Callable from debugger.  */
2776
/* Print dependences for debugging starting from FROM_BB.
2777
   Callable from debugger.  */
2778
void
2779
debug_rgn_dependencies (int from_bb)
2780
{
2781
  int bb;
2782
 
2783
  fprintf (sched_dump,
2784
           ";;   --------------- forward dependences: ------------ \n");
2785
 
2786
  for (bb = from_bb; bb < current_nr_blocks; bb++)
2787
    {
2788
      rtx head, tail;
2789
 
2790
      get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2791
      fprintf (sched_dump, "\n;;   --- Region Dependences --- b %d bb %d \n",
2792
               BB_TO_BLOCK (bb), bb);
2793
 
2794
      debug_dependencies (head, tail);
2795
    }
2796
}
2797
 
2798
/* Print dependencies information for instructions between HEAD and TAIL.
2799
   ??? This function would probably fit best in haifa-sched.c.  */
2800
void debug_dependencies (rtx head, rtx tail)
2801
{
2802
  rtx insn;
2803
  rtx next_tail = NEXT_INSN (tail);
2804
 
2805
  fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%14s\n",
2806
           "insn", "code", "bb", "dep", "prio", "cost",
2807
           "reservation");
2808
  fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%14s\n",
2809
           "----", "----", "--", "---", "----", "----",
2810
           "-----------");
2811
 
2812
  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2813
    {
2814
      if (! INSN_P (insn))
2815
        {
2816
          int n;
2817
          fprintf (sched_dump, ";;   %6d ", INSN_UID (insn));
2818
          if (NOTE_P (insn))
2819
            {
2820
              n = NOTE_KIND (insn);
2821
              fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2822
            }
2823
          else
2824
            fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2825
          continue;
2826
        }
2827
 
2828
      fprintf (sched_dump,
2829
               ";;   %s%5d%6d%6d%6d%6d%6d   ",
2830
               (SCHED_GROUP_P (insn) ? "+" : " "),
2831
               INSN_UID (insn),
2832
               INSN_CODE (insn),
2833
               BLOCK_NUM (insn),
2834
               sched_emulate_haifa_p ? -1 : sd_lists_size (insn, SD_LIST_BACK),
2835
               (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2836
                               : INSN_PRIORITY (insn))
2837
                : INSN_PRIORITY (insn)),
2838
               (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2839
                               : insn_cost (insn))
2840
                : insn_cost (insn)));
2841
 
2842
      if (recog_memoized (insn) < 0)
2843
        fprintf (sched_dump, "nothing");
2844
      else
2845
        print_reservation (sched_dump, insn);
2846
 
2847
      fprintf (sched_dump, "\t: ");
2848
      {
2849
        sd_iterator_def sd_it;
2850
        dep_t dep;
2851
 
2852
        FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
2853
          fprintf (sched_dump, "%d ", INSN_UID (DEP_CON (dep)));
2854
      }
2855
      fprintf (sched_dump, "\n");
2856
    }
2857
 
2858
  fprintf (sched_dump, "\n");
2859
}
2860
 
2861
/* Returns true if all the basic blocks of the current region have
2862
   NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region.  */
2863
bool
2864
sched_is_disabled_for_current_region_p (void)
2865
{
2866
  int bb;
2867
 
2868
  for (bb = 0; bb < current_nr_blocks; bb++)
2869
    if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2870
      return false;
2871
 
2872
  return true;
2873
}
2874
 
2875
/* Free all region dependencies saved in INSN_BACK_DEPS and
2876
   INSN_RESOLVED_BACK_DEPS.  The Haifa scheduler does this on the fly
2877
   when scheduling, so this function is supposed to be called from
2878
   the selective scheduling only.  */
2879
void
2880
free_rgn_deps (void)
2881
{
2882
  int bb;
2883
 
2884
  for (bb = 0; bb < current_nr_blocks; bb++)
2885
    {
2886
      rtx head, tail;
2887
 
2888
      gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2889
      get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2890
 
2891
      sched_free_deps (head, tail, false);
2892
    }
2893
}
2894
 
2895
static int rgn_n_insns;
2896
 
2897
/* Compute insn priority for a current region.  */
2898
void
2899
compute_priorities (void)
2900
{
2901
  int bb;
2902
 
2903
  current_sched_info->sched_max_insns_priority = 0;
2904
  for (bb = 0; bb < current_nr_blocks; bb++)
2905
    {
2906
      rtx head, tail;
2907
 
2908
      gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2909
      get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2910
 
2911
      if (no_real_insns_p (head, tail))
2912
        continue;
2913
 
2914
      rgn_n_insns += set_priorities (head, tail);
2915
    }
2916
  current_sched_info->sched_max_insns_priority++;
2917
}
2918
 
2919
/* Schedule a region.  A region is either an inner loop, a loop-free
2920
   subroutine, or a single basic block.  Each bb in the region is
2921
   scheduled after its flow predecessors.  */
2922
 
2923
static void
2924
schedule_region (int rgn)
2925
{
2926
  int bb;
2927
  int sched_rgn_n_insns = 0;
2928
 
2929
  rgn_n_insns = 0;
2930
 
2931
  rgn_setup_region (rgn);
2932
 
2933
  /* Don't schedule region that is marked by
2934
     NOTE_DISABLE_SCHED_OF_BLOCK.  */
2935
  if (sched_is_disabled_for_current_region_p ())
2936
    return;
2937
 
2938
  sched_rgn_compute_dependencies (rgn);
2939
 
2940
  sched_rgn_local_init (rgn);
2941
 
2942
  /* Set priorities.  */
2943
  compute_priorities ();
2944
 
2945
  sched_extend_ready_list (rgn_n_insns);
2946
 
2947
  if (sched_pressure_p)
2948
    {
2949
      sched_init_region_reg_pressure_info ();
2950
      for (bb = 0; bb < current_nr_blocks; bb++)
2951
        {
2952
          basic_block first_bb, last_bb;
2953
          rtx head, tail;
2954
 
2955
          first_bb = EBB_FIRST_BB (bb);
2956
          last_bb = EBB_LAST_BB (bb);
2957
 
2958
          get_ebb_head_tail (first_bb, last_bb, &head, &tail);
2959
 
2960
          if (no_real_insns_p (head, tail))
2961
            {
2962
              gcc_assert (first_bb == last_bb);
2963
              continue;
2964
            }
2965
          sched_setup_bb_reg_pressure_info (first_bb, PREV_INSN (head));
2966
        }
2967
    }
2968
 
2969
  /* Now we can schedule all blocks.  */
2970
  for (bb = 0; bb < current_nr_blocks; bb++)
2971
    {
2972
      basic_block first_bb, last_bb, curr_bb;
2973
      rtx head, tail;
2974
 
2975
      first_bb = EBB_FIRST_BB (bb);
2976
      last_bb = EBB_LAST_BB (bb);
2977
 
2978
      get_ebb_head_tail (first_bb, last_bb, &head, &tail);
2979
 
2980
      if (no_real_insns_p (head, tail))
2981
        {
2982
          gcc_assert (first_bb == last_bb);
2983
          continue;
2984
        }
2985
 
2986
      current_sched_info->prev_head = PREV_INSN (head);
2987
      current_sched_info->next_tail = NEXT_INSN (tail);
2988
 
2989
      remove_notes (head, tail);
2990
 
2991
      unlink_bb_notes (first_bb, last_bb);
2992
 
2993
      target_bb = bb;
2994
 
2995
      gcc_assert (flag_schedule_interblock || current_nr_blocks == 1);
2996
      current_sched_info->queue_must_finish_empty = current_nr_blocks == 1;
2997
 
2998
      curr_bb = first_bb;
2999
      if (dbg_cnt (sched_block))
3000
        {
3001
          schedule_block (&curr_bb);
3002
          gcc_assert (EBB_FIRST_BB (bb) == first_bb);
3003
          sched_rgn_n_insns += sched_n_insns;
3004
        }
3005
      else
3006
        {
3007
          sched_rgn_n_insns += rgn_n_insns;
3008
        }
3009
 
3010
      /* Clean up.  */
3011
      if (current_nr_blocks > 1)
3012
        free_trg_info ();
3013
    }
3014
 
3015
  /* Sanity check: verify that all region insns were scheduled.  */
3016
  gcc_assert (sched_rgn_n_insns == rgn_n_insns);
3017
 
3018
  sched_finish_ready_list ();
3019
 
3020
  /* Done with this region.  */
3021
  sched_rgn_local_finish ();
3022
 
3023
  /* Free dependencies.  */
3024
  for (bb = 0; bb < current_nr_blocks; ++bb)
3025
    free_block_dependencies (bb);
3026
 
3027
  gcc_assert (haifa_recovery_bb_ever_added_p
3028
              || deps_pools_are_empty_p ());
3029
}
3030
 
3031
/* Initialize data structures for region scheduling.  */
3032
 
3033
void
3034
sched_rgn_init (bool single_blocks_p)
3035
{
3036
  min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE)
3037
                    / 100);
3038
 
3039
  nr_inter = 0;
3040
  nr_spec = 0;
3041
 
3042
  extend_regions ();
3043
 
3044
  CONTAINING_RGN (ENTRY_BLOCK) = -1;
3045
  CONTAINING_RGN (EXIT_BLOCK) = -1;
3046
 
3047
  /* Compute regions for scheduling.  */
3048
  if (single_blocks_p
3049
      || n_basic_blocks == NUM_FIXED_BLOCKS + 1
3050
      || !flag_schedule_interblock
3051
      || is_cfg_nonregular ())
3052
    {
3053
      find_single_block_region (sel_sched_p ());
3054
    }
3055
  else
3056
    {
3057
      /* Compute the dominators and post dominators.  */
3058
      if (!sel_sched_p ())
3059
        calculate_dominance_info (CDI_DOMINATORS);
3060
 
3061
      /* Find regions.  */
3062
      find_rgns ();
3063
 
3064
      if (sched_verbose >= 3)
3065
        debug_regions ();
3066
 
3067
      /* For now.  This will move as more and more of haifa is converted
3068
         to using the cfg code.  */
3069
      if (!sel_sched_p ())
3070
        free_dominance_info (CDI_DOMINATORS);
3071
    }
3072
 
3073
  gcc_assert (0 < nr_regions && nr_regions <= n_basic_blocks);
3074
 
3075
  RGN_BLOCKS (nr_regions) = (RGN_BLOCKS (nr_regions - 1) +
3076
                             RGN_NR_BLOCKS (nr_regions - 1));
3077
}
3078
 
3079
/* Free data structures for region scheduling.  */
3080
void
3081
sched_rgn_finish (void)
3082
{
3083
  /* Reposition the prologue and epilogue notes in case we moved the
3084
     prologue/epilogue insns.  */
3085
  if (reload_completed)
3086
    reposition_prologue_and_epilogue_notes ();
3087
 
3088
  if (sched_verbose)
3089
    {
3090
      if (reload_completed == 0
3091
          && flag_schedule_interblock)
3092
        {
3093
          fprintf (sched_dump,
3094
                   "\n;; Procedure interblock/speculative motions == %d/%d \n",
3095
                   nr_inter, nr_spec);
3096
        }
3097
      else
3098
        gcc_assert (nr_inter <= 0);
3099
      fprintf (sched_dump, "\n\n");
3100
    }
3101
 
3102
  nr_regions = 0;
3103
 
3104
  free (rgn_table);
3105
  rgn_table = NULL;
3106
 
3107
  free (rgn_bb_table);
3108
  rgn_bb_table = NULL;
3109
 
3110
  free (block_to_bb);
3111
  block_to_bb = NULL;
3112
 
3113
  free (containing_rgn);
3114
  containing_rgn = NULL;
3115
 
3116
  free (ebb_head);
3117
  ebb_head = NULL;
3118
}
3119
 
3120
/* Setup global variables like CURRENT_BLOCKS and CURRENT_NR_BLOCK to
3121
   point to the region RGN.  */
3122
void
3123
rgn_setup_region (int rgn)
3124
{
3125
  int bb;
3126
 
3127
  /* Set variables for the current region.  */
3128
  current_nr_blocks = RGN_NR_BLOCKS (rgn);
3129
  current_blocks = RGN_BLOCKS (rgn);
3130
 
3131
  /* EBB_HEAD is a region-scope structure.  But we realloc it for
3132
     each region to save time/memory/something else.
3133
     See comments in add_block1, for what reasons we allocate +1 element.  */
3134
  ebb_head = XRESIZEVEC (int, ebb_head, current_nr_blocks + 1);
3135
  for (bb = 0; bb <= current_nr_blocks; bb++)
3136
    ebb_head[bb] = current_blocks + bb;
3137
}
3138
 
3139
/* Compute instruction dependencies in region RGN.  */
3140
void
3141
sched_rgn_compute_dependencies (int rgn)
3142
{
3143
  if (!RGN_DONT_CALC_DEPS (rgn))
3144
    {
3145
      int bb;
3146
 
3147
      if (sel_sched_p ())
3148
        sched_emulate_haifa_p = 1;
3149
 
3150
      init_deps_global ();
3151
 
3152
      /* Initializations for region data dependence analysis.  */
3153
      bb_deps = XNEWVEC (struct deps_desc, current_nr_blocks);
3154
      for (bb = 0; bb < current_nr_blocks; bb++)
3155
        init_deps (bb_deps + bb, false);
3156
 
3157
      /* Initialize bitmap used in add_branch_dependences.  */
3158
      insn_referenced = sbitmap_alloc (sched_max_luid);
3159
      sbitmap_zero (insn_referenced);
3160
 
3161
      /* Compute backward dependencies.  */
3162
      for (bb = 0; bb < current_nr_blocks; bb++)
3163
        compute_block_dependences (bb);
3164
 
3165
      sbitmap_free (insn_referenced);
3166
      free_pending_lists ();
3167
      finish_deps_global ();
3168
      free (bb_deps);
3169
 
3170
      /* We don't want to recalculate this twice.  */
3171
      RGN_DONT_CALC_DEPS (rgn) = 1;
3172
 
3173
      if (sel_sched_p ())
3174
        sched_emulate_haifa_p = 0;
3175
    }
3176
  else
3177
    /* (This is a recovery block.  It is always a single block region.)
3178
       OR (We use selective scheduling.)  */
3179
    gcc_assert (current_nr_blocks == 1 || sel_sched_p ());
3180
}
3181
 
3182
/* Init region data structures.  Returns true if this region should
3183
   not be scheduled.  */
3184
void
3185
sched_rgn_local_init (int rgn)
3186
{
3187
  int bb;
3188
 
3189
  /* Compute interblock info: probabilities, split-edges, dominators, etc.  */
3190
  if (current_nr_blocks > 1)
3191
    {
3192
      basic_block block;
3193
      edge e;
3194
      edge_iterator ei;
3195
 
3196
      prob = XNEWVEC (int, current_nr_blocks);
3197
 
3198
      dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
3199
      sbitmap_vector_zero (dom, current_nr_blocks);
3200
 
3201
      /* Use ->aux to implement EDGE_TO_BIT mapping.  */
3202
      rgn_nr_edges = 0;
3203
      FOR_EACH_BB (block)
3204
        {
3205
          if (CONTAINING_RGN (block->index) != rgn)
3206
            continue;
3207
          FOR_EACH_EDGE (e, ei, block->succs)
3208
            SET_EDGE_TO_BIT (e, rgn_nr_edges++);
3209
        }
3210
 
3211
      rgn_edges = XNEWVEC (edge, rgn_nr_edges);
3212
      rgn_nr_edges = 0;
3213
      FOR_EACH_BB (block)
3214
        {
3215
          if (CONTAINING_RGN (block->index) != rgn)
3216
            continue;
3217
          FOR_EACH_EDGE (e, ei, block->succs)
3218
            rgn_edges[rgn_nr_edges++] = e;
3219
        }
3220
 
3221
      /* Split edges.  */
3222
      pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3223
      sbitmap_vector_zero (pot_split, current_nr_blocks);
3224
      ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3225
      sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
3226
 
3227
      /* Compute probabilities, dominators, split_edges.  */
3228
      for (bb = 0; bb < current_nr_blocks; bb++)
3229
        compute_dom_prob_ps (bb);
3230
 
3231
      /* Cleanup ->aux used for EDGE_TO_BIT mapping.  */
3232
      /* We don't need them anymore.  But we want to avoid duplication of
3233
         aux fields in the newly created edges.  */
3234
      FOR_EACH_BB (block)
3235
        {
3236
          if (CONTAINING_RGN (block->index) != rgn)
3237
            continue;
3238
          FOR_EACH_EDGE (e, ei, block->succs)
3239
            e->aux = NULL;
3240
        }
3241
    }
3242
}
3243
 
3244
/* Free data computed for the finished region.  */
3245
void
3246
sched_rgn_local_free (void)
3247
{
3248
  free (prob);
3249
  sbitmap_vector_free (dom);
3250
  sbitmap_vector_free (pot_split);
3251
  sbitmap_vector_free (ancestor_edges);
3252
  free (rgn_edges);
3253
}
3254
 
3255
/* Free data computed for the finished region.  */
3256
void
3257
sched_rgn_local_finish (void)
3258
{
3259
  if (current_nr_blocks > 1 && !sel_sched_p ())
3260
    {
3261
      sched_rgn_local_free ();
3262
    }
3263
}
3264
 
3265
/* Setup scheduler infos.  */
3266
void
3267
rgn_setup_common_sched_info (void)
3268
{
3269
  memcpy (&rgn_common_sched_info, &haifa_common_sched_info,
3270
          sizeof (rgn_common_sched_info));
3271
 
3272
  rgn_common_sched_info.fix_recovery_cfg = rgn_fix_recovery_cfg;
3273
  rgn_common_sched_info.add_block = rgn_add_block;
3274
  rgn_common_sched_info.estimate_number_of_insns
3275
    = rgn_estimate_number_of_insns;
3276
  rgn_common_sched_info.sched_pass_id = SCHED_RGN_PASS;
3277
 
3278
  common_sched_info = &rgn_common_sched_info;
3279
}
3280
 
3281
/* Setup all *_sched_info structures (for the Haifa frontend
3282
   and for the dependence analysis) in the interblock scheduler.  */
3283
void
3284
rgn_setup_sched_infos (void)
3285
{
3286
  if (!sel_sched_p ())
3287
    memcpy (&rgn_sched_deps_info, &rgn_const_sched_deps_info,
3288
            sizeof (rgn_sched_deps_info));
3289
  else
3290
    memcpy (&rgn_sched_deps_info, &rgn_const_sel_sched_deps_info,
3291
            sizeof (rgn_sched_deps_info));
3292
 
3293
  sched_deps_info = &rgn_sched_deps_info;
3294
 
3295
  memcpy (&rgn_sched_info, &rgn_const_sched_info, sizeof (rgn_sched_info));
3296
  current_sched_info = &rgn_sched_info;
3297
}
3298
 
3299
/* The one entry point in this file.  */
3300
void
3301
schedule_insns (void)
3302
{
3303
  int rgn;
3304
 
3305
  /* Taking care of this degenerate case makes the rest of
3306
     this code simpler.  */
3307
  if (n_basic_blocks == NUM_FIXED_BLOCKS)
3308
    return;
3309
 
3310
  rgn_setup_common_sched_info ();
3311
  rgn_setup_sched_infos ();
3312
 
3313
  haifa_sched_init ();
3314
  sched_rgn_init (reload_completed);
3315
 
3316
  bitmap_initialize (&not_in_df, 0);
3317
  bitmap_clear (&not_in_df);
3318
 
3319
  /* Schedule every region in the subroutine.  */
3320
  for (rgn = 0; rgn < nr_regions; rgn++)
3321
    if (dbg_cnt (sched_region))
3322
      schedule_region (rgn);
3323
 
3324
  /* Clean up.  */
3325
  sched_rgn_finish ();
3326
  bitmap_clear (&not_in_df);
3327
 
3328
  haifa_sched_finish ();
3329
}
3330
 
3331
/* INSN has been added to/removed from current region.  */
3332
static void
3333
rgn_add_remove_insn (rtx insn, int remove_p)
3334
{
3335
  if (!remove_p)
3336
    rgn_n_insns++;
3337
  else
3338
    rgn_n_insns--;
3339
 
3340
  if (INSN_BB (insn) == target_bb)
3341
    {
3342
      if (!remove_p)
3343
        target_n_insns++;
3344
      else
3345
        target_n_insns--;
3346
    }
3347
}
3348
 
3349
/* Extend internal data structures.  */
3350
void
3351
extend_regions (void)
3352
{
3353
  rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks);
3354
  rgn_bb_table = XRESIZEVEC (int, rgn_bb_table, n_basic_blocks);
3355
  block_to_bb = XRESIZEVEC (int, block_to_bb, last_basic_block);
3356
  containing_rgn = XRESIZEVEC (int, containing_rgn, last_basic_block);
3357
}
3358
 
3359
void
3360
rgn_make_new_region_out_of_new_block (basic_block bb)
3361
{
3362
  int i;
3363
 
3364
  i = RGN_BLOCKS (nr_regions);
3365
  /* I - first free position in rgn_bb_table.  */
3366
 
3367
  rgn_bb_table[i] = bb->index;
3368
  RGN_NR_BLOCKS (nr_regions) = 1;
3369
  RGN_HAS_REAL_EBB (nr_regions) = 0;
3370
  RGN_DONT_CALC_DEPS (nr_regions) = 0;
3371
  CONTAINING_RGN (bb->index) = nr_regions;
3372
  BLOCK_TO_BB (bb->index) = 0;
3373
 
3374
  nr_regions++;
3375
 
3376
  RGN_BLOCKS (nr_regions) = i + 1;
3377
}
3378
 
3379
/* BB was added to ebb after AFTER.  */
3380
static void
3381
rgn_add_block (basic_block bb, basic_block after)
3382
{
3383
  extend_regions ();
3384
  bitmap_set_bit (&not_in_df, bb->index);
3385
 
3386
  if (after == 0 || after == EXIT_BLOCK_PTR)
3387
    {
3388
      rgn_make_new_region_out_of_new_block (bb);
3389
      RGN_DONT_CALC_DEPS (nr_regions - 1) = (after == EXIT_BLOCK_PTR);
3390
    }
3391
  else
3392
    {
3393
      int i, pos;
3394
 
3395
      /* We need to fix rgn_table, block_to_bb, containing_rgn
3396
         and ebb_head.  */
3397
 
3398
      BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index);
3399
 
3400
      /* We extend ebb_head to one more position to
3401
         easily find the last position of the last ebb in
3402
         the current region.  Thus, ebb_head[BLOCK_TO_BB (after) + 1]
3403
         is _always_ valid for access.  */
3404
 
3405
      i = BLOCK_TO_BB (after->index) + 1;
3406
      pos = ebb_head[i] - 1;
3407
      /* Now POS is the index of the last block in the region.  */
3408
 
3409
      /* Find index of basic block AFTER.  */
3410
      for (; rgn_bb_table[pos] != after->index; pos--);
3411
 
3412
      pos++;
3413
      gcc_assert (pos > ebb_head[i - 1]);
3414
 
3415
      /* i - ebb right after "AFTER".  */
3416
      /* ebb_head[i] - VALID.  */
3417
 
3418
      /* Source position: ebb_head[i]
3419
         Destination position: ebb_head[i] + 1
3420
         Last position:
3421
           RGN_BLOCKS (nr_regions) - 1
3422
         Number of elements to copy: (last_position) - (source_position) + 1
3423
       */
3424
 
3425
      memmove (rgn_bb_table + pos + 1,
3426
               rgn_bb_table + pos,
3427
               ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1)
3428
               * sizeof (*rgn_bb_table));
3429
 
3430
      rgn_bb_table[pos] = bb->index;
3431
 
3432
      for (; i <= current_nr_blocks; i++)
3433
        ebb_head [i]++;
3434
 
3435
      i = CONTAINING_RGN (after->index);
3436
      CONTAINING_RGN (bb->index) = i;
3437
 
3438
      RGN_HAS_REAL_EBB (i) = 1;
3439
 
3440
      for (++i; i <= nr_regions; i++)
3441
        RGN_BLOCKS (i)++;
3442
    }
3443
}
3444
 
3445
/* Fix internal data after interblock movement of jump instruction.
3446
   For parameter meaning please refer to
3447
   sched-int.h: struct sched_info: fix_recovery_cfg.  */
3448
static void
3449
rgn_fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti)
3450
{
3451
  int old_pos, new_pos, i;
3452
 
3453
  BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi);
3454
 
3455
  for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1;
3456
       rgn_bb_table[old_pos] != check_bb_nexti;
3457
       old_pos--);
3458
  gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]);
3459
 
3460
  for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1;
3461
       rgn_bb_table[new_pos] != bbi;
3462
       new_pos--);
3463
  new_pos++;
3464
  gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]);
3465
 
3466
  gcc_assert (new_pos < old_pos);
3467
 
3468
  memmove (rgn_bb_table + new_pos + 1,
3469
           rgn_bb_table + new_pos,
3470
           (old_pos - new_pos) * sizeof (*rgn_bb_table));
3471
 
3472
  rgn_bb_table[new_pos] = check_bb_nexti;
3473
 
3474
  for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++)
3475
    ebb_head[i]++;
3476
}
3477
 
3478
/* Return next block in ebb chain.  For parameter meaning please refer to
3479
   sched-int.h: struct sched_info: advance_target_bb.  */
3480
static basic_block
3481
advance_target_bb (basic_block bb, rtx insn)
3482
{
3483
  if (insn)
3484
    return 0;
3485
 
3486
  gcc_assert (BLOCK_TO_BB (bb->index) == target_bb
3487
              && BLOCK_TO_BB (bb->next_bb->index) == target_bb);
3488
  return bb->next_bb;
3489
}
3490
 
3491
#endif
3492
 
3493
static bool
3494
gate_handle_sched (void)
3495
{
3496
#ifdef INSN_SCHEDULING
3497
  return flag_schedule_insns && dbg_cnt (sched_func);
3498
#else
3499
  return 0;
3500
#endif
3501
}
3502
 
3503
/* Run instruction scheduler.  */
3504
static unsigned int
3505
rest_of_handle_sched (void)
3506
{
3507
#ifdef INSN_SCHEDULING
3508
  if (flag_selective_scheduling
3509
      && ! maybe_skip_selective_scheduling ())
3510
    run_selective_scheduling ();
3511
  else
3512
    schedule_insns ();
3513
#endif
3514
  return 0;
3515
}
3516
 
3517
static bool
3518
gate_handle_sched2 (void)
3519
{
3520
#ifdef INSN_SCHEDULING
3521
  return optimize > 0 && flag_schedule_insns_after_reload
3522
    && dbg_cnt (sched2_func);
3523
#else
3524
  return 0;
3525
#endif
3526
}
3527
 
3528
/* Run second scheduling pass after reload.  */
3529
static unsigned int
3530
rest_of_handle_sched2 (void)
3531
{
3532
#ifdef INSN_SCHEDULING
3533
  if (flag_selective_scheduling2
3534
      && ! maybe_skip_selective_scheduling ())
3535
    run_selective_scheduling ();
3536
  else
3537
    {
3538
      /* Do control and data sched analysis again,
3539
         and write some more of the results to dump file.  */
3540
      if (flag_sched2_use_superblocks)
3541
        schedule_ebbs ();
3542
      else
3543
        schedule_insns ();
3544
    }
3545
#endif
3546
  return 0;
3547
}
3548
 
3549
struct rtl_opt_pass pass_sched =
3550
{
3551
 {
3552
  RTL_PASS,
3553
  "sched1",                             /* name */
3554
  gate_handle_sched,                    /* gate */
3555
  rest_of_handle_sched,                 /* execute */
3556
  NULL,                                 /* sub */
3557
  NULL,                                 /* next */
3558
  0,                                    /* static_pass_number */
3559
  TV_SCHED,                             /* tv_id */
3560
  0,                                    /* properties_required */
3561
  0,                                    /* properties_provided */
3562
  0,                                    /* properties_destroyed */
3563
  0,                                    /* todo_flags_start */
3564
  TODO_df_finish | TODO_verify_rtl_sharing |
3565
  TODO_dump_func |
3566
  TODO_verify_flow |
3567
  TODO_ggc_collect                      /* todo_flags_finish */
3568
 }
3569
};
3570
 
3571
struct rtl_opt_pass pass_sched2 =
3572
{
3573
 {
3574
  RTL_PASS,
3575
  "sched2",                             /* name */
3576
  gate_handle_sched2,                   /* gate */
3577
  rest_of_handle_sched2,                /* execute */
3578
  NULL,                                 /* sub */
3579
  NULL,                                 /* next */
3580
  0,                                    /* static_pass_number */
3581
  TV_SCHED2,                            /* tv_id */
3582
  0,                                    /* properties_required */
3583
  0,                                    /* properties_provided */
3584
  0,                                    /* properties_destroyed */
3585
  0,                                    /* todo_flags_start */
3586
  TODO_df_finish | TODO_verify_rtl_sharing |
3587
  TODO_dump_func |
3588
  TODO_verify_flow |
3589
  TODO_ggc_collect                      /* todo_flags_finish */
3590
 }
3591
};

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.