OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [sched-rgn.c] - Blame information for rev 695

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Instruction scheduling pass.
2
   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3
   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011
4
   Free Software Foundation, Inc.
5
   Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
6
   and currently maintained by, Jim Wilson (wilson@cygnus.com)
7
 
8
This file is part of GCC.
9
 
10
GCC is free software; you can redistribute it and/or modify it under
11
the terms of the GNU General Public License as published by the Free
12
Software Foundation; either version 3, or (at your option) any later
13
version.
14
 
15
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16
WARRANTY; without even the implied warranty of MERCHANTABILITY or
17
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18
for more details.
19
 
20
You should have received a copy of the GNU General Public License
21
along with GCC; see the file COPYING3.  If not see
22
<http://www.gnu.org/licenses/>.  */
23
 
24
/* This pass implements list scheduling within basic blocks.  It is
25
   run twice: (1) after flow analysis, but before register allocation,
26
   and (2) after register allocation.
27
 
28
   The first run performs interblock scheduling, moving insns between
29
   different blocks in the same "region", and the second runs only
30
   basic block scheduling.
31
 
32
   Interblock motions performed are useful motions and speculative
33
   motions, including speculative loads.  Motions requiring code
34
   duplication are not supported.  The identification of motion type
35
   and the check for validity of speculative motions requires
36
   construction and analysis of the function's control flow graph.
37
 
38
   The main entry point for this pass is schedule_insns(), called for
39
   each function.  The work of the scheduler is organized in three
40
   levels: (1) function level: insns are subject to splitting,
41
   control-flow-graph is constructed, regions are computed (after
42
   reload, each region is of one block), (2) region level: control
43
   flow graph attributes required for interblock scheduling are
44
   computed (dominators, reachability, etc.), data dependences and
45
   priorities are computed, and (3) block level: insns in the block
46
   are actually scheduled.  */
47
 
48
#include "config.h"
49
#include "system.h"
50
#include "coretypes.h"
51
#include "tm.h"
52
#include "diagnostic-core.h"
53
#include "rtl.h"
54
#include "tm_p.h"
55
#include "hard-reg-set.h"
56
#include "regs.h"
57
#include "function.h"
58
#include "flags.h"
59
#include "insn-config.h"
60
#include "insn-attr.h"
61
#include "except.h"
62
#include "recog.h"
63
#include "cfglayout.h"
64
#include "params.h"
65
#include "sched-int.h"
66
#include "sel-sched.h"
67
#include "target.h"
68
#include "timevar.h"
69
#include "tree-pass.h"
70
#include "dbgcnt.h"
71
 
72
#ifdef INSN_SCHEDULING
73
 
74
/* Some accessor macros for h_i_d members only used within this file.  */
75
#define FED_BY_SPEC_LOAD(INSN) (HID (INSN)->fed_by_spec_load)
76
#define IS_LOAD_INSN(INSN) (HID (insn)->is_load_insn)
77
 
78
/* nr_inter/spec counts interblock/speculative motion for the function.  */
79
static int nr_inter, nr_spec;
80
 
81
static int is_cfg_nonregular (void);
82
 
83
/* Number of regions in the procedure.  */
84
int nr_regions = 0;
85
 
86
/* Table of region descriptions.  */
87
region *rgn_table = NULL;
88
 
89
/* Array of lists of regions' blocks.  */
90
int *rgn_bb_table = NULL;
91
 
92
/* Topological order of blocks in the region (if b2 is reachable from
93
   b1, block_to_bb[b2] > block_to_bb[b1]).  Note: A basic block is
94
   always referred to by either block or b, while its topological
95
   order name (in the region) is referred to by bb.  */
96
int *block_to_bb = NULL;
97
 
98
/* The number of the region containing a block.  */
99
int *containing_rgn = NULL;
100
 
101
/* ebb_head [i] - is index in rgn_bb_table of the head basic block of i'th ebb.
102
   Currently we can get a ebb only through splitting of currently
103
   scheduling block, therefore, we don't need ebb_head array for every region,
104
   hence, its sufficient to hold it for current one only.  */
105
int *ebb_head = NULL;
106
 
107
/* The minimum probability of reaching a source block so that it will be
108
   considered for speculative scheduling.  */
109
static int min_spec_prob;
110
 
111
static void find_single_block_region (bool);
112
static void find_rgns (void);
113
static bool too_large (int, int *, int *);
114
 
115
/* Blocks of the current region being scheduled.  */
116
int current_nr_blocks;
117
int current_blocks;
118
 
119
/* A speculative motion requires checking live information on the path
120
   from 'source' to 'target'.  The split blocks are those to be checked.
121
   After a speculative motion, live information should be modified in
122
   the 'update' blocks.
123
 
124
   Lists of split and update blocks for each candidate of the current
125
   target are in array bblst_table.  */
126
static basic_block *bblst_table;
127
static int bblst_size, bblst_last;
128
 
129
/* Target info declarations.
130
 
131
   The block currently being scheduled is referred to as the "target" block,
132
   while other blocks in the region from which insns can be moved to the
133
   target are called "source" blocks.  The candidate structure holds info
134
   about such sources: are they valid?  Speculative?  Etc.  */
135
typedef struct
136
{
137
  basic_block *first_member;
138
  int nr_members;
139
}
140
bblst;
141
 
142
typedef struct
143
{
144
  char is_valid;
145
  char is_speculative;
146
  int src_prob;
147
  bblst split_bbs;
148
  bblst update_bbs;
149
}
150
candidate;
151
 
152
static candidate *candidate_table;
153
#define IS_VALID(src) (candidate_table[src].is_valid)
154
#define IS_SPECULATIVE(src) (candidate_table[src].is_speculative)
155
#define IS_SPECULATIVE_INSN(INSN)                       \
156
  (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
157
#define SRC_PROB(src) ( candidate_table[src].src_prob )
158
 
159
/* The bb being currently scheduled.  */
160
int target_bb;
161
 
162
/* List of edges.  */
163
typedef struct
164
{
165
  edge *first_member;
166
  int nr_members;
167
}
168
edgelst;
169
 
170
static edge *edgelst_table;
171
static int edgelst_last;
172
 
173
static void extract_edgelst (sbitmap, edgelst *);
174
 
175
/* Target info functions.  */
176
static void split_edges (int, int, edgelst *);
177
static void compute_trg_info (int);
178
void debug_candidate (int);
179
void debug_candidates (int);
180
 
181
/* Dominators array: dom[i] contains the sbitmap of dominators of
182
   bb i in the region.  */
183
static sbitmap *dom;
184
 
185
/* bb 0 is the only region entry.  */
186
#define IS_RGN_ENTRY(bb) (!bb)
187
 
188
/* Is bb_src dominated by bb_trg.  */
189
#define IS_DOMINATED(bb_src, bb_trg)                                 \
190
( TEST_BIT (dom[bb_src], bb_trg) )
191
 
192
/* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
193
   the probability of bb i relative to the region entry.  */
194
static int *prob;
195
 
196
/* Bit-set of edges, where bit i stands for edge i.  */
197
typedef sbitmap edgeset;
198
 
199
/* Number of edges in the region.  */
200
static int rgn_nr_edges;
201
 
202
/* Array of size rgn_nr_edges.  */
203
static edge *rgn_edges;
204
 
205
/* Mapping from each edge in the graph to its number in the rgn.  */
206
#define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
207
#define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
208
 
209
/* The split edges of a source bb is different for each target
210
   bb.  In order to compute this efficiently, the 'potential-split edges'
211
   are computed for each bb prior to scheduling a region.  This is actually
212
   the split edges of each bb relative to the region entry.
213
 
214
   pot_split[bb] is the set of potential split edges of bb.  */
215
static edgeset *pot_split;
216
 
217
/* For every bb, a set of its ancestor edges.  */
218
static edgeset *ancestor_edges;
219
 
220
#define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
221
 
222
/* Speculative scheduling functions.  */
223
static int check_live_1 (int, rtx);
224
static void update_live_1 (int, rtx);
225
static int is_pfree (rtx, int, int);
226
static int find_conditional_protection (rtx, int);
227
static int is_conditionally_protected (rtx, int, int);
228
static int is_prisky (rtx, int, int);
229
static int is_exception_free (rtx, int, int);
230
 
231
static bool sets_likely_spilled (rtx);
232
static void sets_likely_spilled_1 (rtx, const_rtx, void *);
233
static void add_branch_dependences (rtx, rtx);
234
static void compute_block_dependences (int);
235
 
236
static void schedule_region (int);
237
static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
238
static void propagate_deps (int, struct deps_desc *);
239
static void free_pending_lists (void);
240
 
241
/* Functions for construction of the control flow graph.  */
242
 
243
/* Return 1 if control flow graph should not be constructed, 0 otherwise.
244
 
245
   We decide not to build the control flow graph if there is possibly more
246
   than one entry to the function, if computed branches exist, if we
247
   have nonlocal gotos, or if we have an unreachable loop.  */
248
 
249
static int
250
is_cfg_nonregular (void)
251
{
252
  basic_block b;
253
  rtx insn;
254
 
255
  /* If we have a label that could be the target of a nonlocal goto, then
256
     the cfg is not well structured.  */
257
  if (nonlocal_goto_handler_labels)
258
    return 1;
259
 
260
  /* If we have any forced labels, then the cfg is not well structured.  */
261
  if (forced_labels)
262
    return 1;
263
 
264
  /* If we have exception handlers, then we consider the cfg not well
265
     structured.  ?!?  We should be able to handle this now that we
266
     compute an accurate cfg for EH.  */
267
  if (current_function_has_exception_handlers ())
268
    return 1;
269
 
270
  /* If we have insns which refer to labels as non-jumped-to operands,
271
     then we consider the cfg not well structured.  */
272
  FOR_EACH_BB (b)
273
    FOR_BB_INSNS (b, insn)
274
      {
275
        rtx note, next, set, dest;
276
 
277
        /* If this function has a computed jump, then we consider the cfg
278
           not well structured.  */
279
        if (JUMP_P (insn) && computed_jump_p (insn))
280
          return 1;
281
 
282
        if (!INSN_P (insn))
283
          continue;
284
 
285
        note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
286
        if (note == NULL_RTX)
287
          continue;
288
 
289
        /* For that label not to be seen as a referred-to label, this
290
           must be a single-set which is feeding a jump *only*.  This
291
           could be a conditional jump with the label split off for
292
           machine-specific reasons or a casesi/tablejump.  */
293
        next = next_nonnote_insn (insn);
294
        if (next == NULL_RTX
295
            || !JUMP_P (next)
296
            || (JUMP_LABEL (next) != XEXP (note, 0)
297
                && find_reg_note (next, REG_LABEL_TARGET,
298
                                  XEXP (note, 0)) == NULL_RTX)
299
            || BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next))
300
          return 1;
301
 
302
        set = single_set (insn);
303
        if (set == NULL_RTX)
304
          return 1;
305
 
306
        dest = SET_DEST (set);
307
        if (!REG_P (dest) || !dead_or_set_p (next, dest))
308
          return 1;
309
      }
310
 
311
  /* Unreachable loops with more than one basic block are detected
312
     during the DFS traversal in find_rgns.
313
 
314
     Unreachable loops with a single block are detected here.  This
315
     test is redundant with the one in find_rgns, but it's much
316
     cheaper to go ahead and catch the trivial case here.  */
317
  FOR_EACH_BB (b)
318
    {
319
      if (EDGE_COUNT (b->preds) == 0
320
          || (single_pred_p (b)
321
              && single_pred (b) == b))
322
        return 1;
323
    }
324
 
325
  /* All the tests passed.  Consider the cfg well structured.  */
326
  return 0;
327
}
328
 
329
/* Extract list of edges from a bitmap containing EDGE_TO_BIT bits.  */
330
 
331
static void
332
extract_edgelst (sbitmap set, edgelst *el)
333
{
334
  unsigned int i = 0;
335
  sbitmap_iterator sbi;
336
 
337
  /* edgelst table space is reused in each call to extract_edgelst.  */
338
  edgelst_last = 0;
339
 
340
  el->first_member = &edgelst_table[edgelst_last];
341
  el->nr_members = 0;
342
 
343
  /* Iterate over each word in the bitset.  */
344
  EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi)
345
    {
346
      edgelst_table[edgelst_last++] = rgn_edges[i];
347
      el->nr_members++;
348
    }
349
}
350
 
351
/* Functions for the construction of regions.  */
352
 
353
/* Print the regions, for debugging purposes.  Callable from debugger.  */
354
 
355
DEBUG_FUNCTION void
356
debug_regions (void)
357
{
358
  int rgn, bb;
359
 
360
  fprintf (sched_dump, "\n;;   ------------ REGIONS ----------\n\n");
361
  for (rgn = 0; rgn < nr_regions; rgn++)
362
    {
363
      fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
364
               rgn_table[rgn].rgn_nr_blocks);
365
      fprintf (sched_dump, ";;\tbb/block: ");
366
 
367
      /* We don't have ebb_head initialized yet, so we can't use
368
         BB_TO_BLOCK ().  */
369
      current_blocks = RGN_BLOCKS (rgn);
370
 
371
      for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
372
        fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
373
 
374
      fprintf (sched_dump, "\n\n");
375
    }
376
}
377
 
378
/* Print the region's basic blocks.  */
379
 
380
DEBUG_FUNCTION void
381
debug_region (int rgn)
382
{
383
  int bb;
384
 
385
  fprintf (stderr, "\n;;   ------------ REGION %d ----------\n\n", rgn);
386
  fprintf (stderr, ";;\trgn %d nr_blocks %d:\n", rgn,
387
           rgn_table[rgn].rgn_nr_blocks);
388
  fprintf (stderr, ";;\tbb/block: ");
389
 
390
  /* We don't have ebb_head initialized yet, so we can't use
391
     BB_TO_BLOCK ().  */
392
  current_blocks = RGN_BLOCKS (rgn);
393
 
394
  for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
395
    fprintf (stderr, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
396
 
397
  fprintf (stderr, "\n\n");
398
 
399
  for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
400
    {
401
      debug_bb_n_slim (rgn_bb_table[current_blocks + bb]);
402
      fprintf (stderr, "\n");
403
    }
404
 
405
  fprintf (stderr, "\n");
406
 
407
}
408
 
409
/* True when a bb with index BB_INDEX contained in region RGN.  */
410
static bool
411
bb_in_region_p (int bb_index, int rgn)
412
{
413
  int i;
414
 
415
  for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
416
    if (rgn_bb_table[current_blocks + i] == bb_index)
417
      return true;
418
 
419
  return false;
420
}
421
 
422
/* Dump region RGN to file F using dot syntax.  */
423
void
424
dump_region_dot (FILE *f, int rgn)
425
{
426
  int i;
427
 
428
  fprintf (f, "digraph Region_%d {\n", rgn);
429
 
430
  /* We don't have ebb_head initialized yet, so we can't use
431
     BB_TO_BLOCK ().  */
432
  current_blocks = RGN_BLOCKS (rgn);
433
 
434
  for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
435
    {
436
      edge e;
437
      edge_iterator ei;
438
      int src_bb_num = rgn_bb_table[current_blocks + i];
439
      struct basic_block_def *bb = BASIC_BLOCK (src_bb_num);
440
 
441
      FOR_EACH_EDGE (e, ei, bb->succs)
442
        if (bb_in_region_p (e->dest->index, rgn))
443
          fprintf (f, "\t%d -> %d\n", src_bb_num, e->dest->index);
444
    }
445
  fprintf (f, "}\n");
446
}
447
 
448
/* The same, but first open a file specified by FNAME.  */
449
void
450
dump_region_dot_file (const char *fname, int rgn)
451
{
452
  FILE *f = fopen (fname, "wt");
453
  dump_region_dot (f, rgn);
454
  fclose (f);
455
}
456
 
457
/* Build a single block region for each basic block in the function.
458
   This allows for using the same code for interblock and basic block
459
   scheduling.  */
460
 
461
static void
462
find_single_block_region (bool ebbs_p)
463
{
464
  basic_block bb, ebb_start;
465
  int i = 0;
466
 
467
  nr_regions = 0;
468
 
469
  if (ebbs_p) {
470
    int probability_cutoff;
471
    if (profile_info && flag_branch_probabilities)
472
      probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
473
    else
474
      probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
475
    probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
476
 
477
    FOR_EACH_BB (ebb_start)
478
      {
479
        RGN_NR_BLOCKS (nr_regions) = 0;
480
        RGN_BLOCKS (nr_regions) = i;
481
        RGN_DONT_CALC_DEPS (nr_regions) = 0;
482
        RGN_HAS_REAL_EBB (nr_regions) = 0;
483
 
484
        for (bb = ebb_start; ; bb = bb->next_bb)
485
          {
486
            edge e;
487
 
488
            rgn_bb_table[i] = bb->index;
489
            RGN_NR_BLOCKS (nr_regions)++;
490
            CONTAINING_RGN (bb->index) = nr_regions;
491
            BLOCK_TO_BB (bb->index) = i - RGN_BLOCKS (nr_regions);
492
            i++;
493
 
494
            if (bb->next_bb == EXIT_BLOCK_PTR
495
                || LABEL_P (BB_HEAD (bb->next_bb)))
496
              break;
497
 
498
            e = find_fallthru_edge (bb->succs);
499
            if (! e)
500
              break;
501
            if (e->probability <= probability_cutoff)
502
              break;
503
          }
504
 
505
        ebb_start = bb;
506
        nr_regions++;
507
      }
508
  }
509
  else
510
    FOR_EACH_BB (bb)
511
      {
512
        rgn_bb_table[nr_regions] = bb->index;
513
        RGN_NR_BLOCKS (nr_regions) = 1;
514
        RGN_BLOCKS (nr_regions) = nr_regions;
515
        RGN_DONT_CALC_DEPS (nr_regions) = 0;
516
        RGN_HAS_REAL_EBB (nr_regions) = 0;
517
 
518
        CONTAINING_RGN (bb->index) = nr_regions;
519
        BLOCK_TO_BB (bb->index) = 0;
520
        nr_regions++;
521
      }
522
}
523
 
524
/* Estimate number of the insns in the BB.  */
525
static int
526
rgn_estimate_number_of_insns (basic_block bb)
527
{
528
  int count;
529
 
530
  count = INSN_LUID (BB_END (bb)) - INSN_LUID (BB_HEAD (bb));
531
 
532
  if (MAY_HAVE_DEBUG_INSNS)
533
    {
534
      rtx insn;
535
 
536
      FOR_BB_INSNS (bb, insn)
537
        if (DEBUG_INSN_P (insn))
538
          count--;
539
    }
540
 
541
  return count;
542
}
543
 
544
/* Update number of blocks and the estimate for number of insns
545
   in the region.  Return true if the region is "too large" for interblock
546
   scheduling (compile time considerations).  */
547
 
548
static bool
549
too_large (int block, int *num_bbs, int *num_insns)
550
{
551
  (*num_bbs)++;
552
  (*num_insns) += (common_sched_info->estimate_number_of_insns
553
                   (BASIC_BLOCK (block)));
554
 
555
  return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
556
          || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
557
}
558
 
559
/* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
560
   is still an inner loop.  Put in max_hdr[blk] the header of the most inner
561
   loop containing blk.  */
562
#define UPDATE_LOOP_RELATIONS(blk, hdr)         \
563
{                                               \
564
  if (max_hdr[blk] == -1)                       \
565
    max_hdr[blk] = hdr;                         \
566
  else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr])  \
567
    RESET_BIT (inner, hdr);                     \
568
  else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr])  \
569
    {                                           \
570
      RESET_BIT (inner,max_hdr[blk]);           \
571
      max_hdr[blk] = hdr;                       \
572
    }                                           \
573
}
574
 
575
/* Find regions for interblock scheduling.
576
 
577
   A region for scheduling can be:
578
 
579
     * A loop-free procedure, or
580
 
581
     * A reducible inner loop, or
582
 
583
     * A basic block not contained in any other region.
584
 
585
   ?!? In theory we could build other regions based on extended basic
586
   blocks or reverse extended basic blocks.  Is it worth the trouble?
587
 
588
   Loop blocks that form a region are put into the region's block list
589
   in topological order.
590
 
591
   This procedure stores its results into the following global (ick) variables
592
 
593
     * rgn_nr
594
     * rgn_table
595
     * rgn_bb_table
596
     * block_to_bb
597
     * containing region
598
 
599
   We use dominator relationships to avoid making regions out of non-reducible
600
   loops.
601
 
602
   This procedure needs to be converted to work on pred/succ lists instead
603
   of edge tables.  That would simplify it somewhat.  */
604
 
605
static void
606
haifa_find_rgns (void)
607
{
608
  int *max_hdr, *dfs_nr, *degree;
609
  char no_loops = 1;
610
  int node, child, loop_head, i, head, tail;
611
  int count = 0, sp, idx = 0;
612
  edge_iterator current_edge;
613
  edge_iterator *stack;
614
  int num_bbs, num_insns, unreachable;
615
  int too_large_failure;
616
  basic_block bb;
617
 
618
  /* Note if a block is a natural loop header.  */
619
  sbitmap header;
620
 
621
  /* Note if a block is a natural inner loop header.  */
622
  sbitmap inner;
623
 
624
  /* Note if a block is in the block queue.  */
625
  sbitmap in_queue;
626
 
627
  /* Note if a block is in the block queue.  */
628
  sbitmap in_stack;
629
 
630
  /* Perform a DFS traversal of the cfg.  Identify loop headers, inner loops
631
     and a mapping from block to its loop header (if the block is contained
632
     in a loop, else -1).
633
 
634
     Store results in HEADER, INNER, and MAX_HDR respectively, these will
635
     be used as inputs to the second traversal.
636
 
637
     STACK, SP and DFS_NR are only used during the first traversal.  */
638
 
639
  /* Allocate and initialize variables for the first traversal.  */
640
  max_hdr = XNEWVEC (int, last_basic_block);
641
  dfs_nr = XCNEWVEC (int, last_basic_block);
642
  stack = XNEWVEC (edge_iterator, n_edges);
643
 
644
  inner = sbitmap_alloc (last_basic_block);
645
  sbitmap_ones (inner);
646
 
647
  header = sbitmap_alloc (last_basic_block);
648
  sbitmap_zero (header);
649
 
650
  in_queue = sbitmap_alloc (last_basic_block);
651
  sbitmap_zero (in_queue);
652
 
653
  in_stack = sbitmap_alloc (last_basic_block);
654
  sbitmap_zero (in_stack);
655
 
656
  for (i = 0; i < last_basic_block; i++)
657
    max_hdr[i] = -1;
658
 
659
  #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
660
  #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
661
 
662
  /* DFS traversal to find inner loops in the cfg.  */
663
 
664
  current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs);
665
  sp = -1;
666
 
667
  while (1)
668
    {
669
      if (EDGE_PASSED (current_edge))
670
        {
671
          /* We have reached a leaf node or a node that was already
672
             processed.  Pop edges off the stack until we find
673
             an edge that has not yet been processed.  */
674
          while (sp >= 0 && EDGE_PASSED (current_edge))
675
            {
676
              /* Pop entry off the stack.  */
677
              current_edge = stack[sp--];
678
              node = ei_edge (current_edge)->src->index;
679
              gcc_assert (node != ENTRY_BLOCK);
680
              child = ei_edge (current_edge)->dest->index;
681
              gcc_assert (child != EXIT_BLOCK);
682
              RESET_BIT (in_stack, child);
683
              if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
684
                UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
685
              ei_next (&current_edge);
686
            }
687
 
688
          /* See if have finished the DFS tree traversal.  */
689
          if (sp < 0 && EDGE_PASSED (current_edge))
690
            break;
691
 
692
          /* Nope, continue the traversal with the popped node.  */
693
          continue;
694
        }
695
 
696
      /* Process a node.  */
697
      node = ei_edge (current_edge)->src->index;
698
      gcc_assert (node != ENTRY_BLOCK);
699
      SET_BIT (in_stack, node);
700
      dfs_nr[node] = ++count;
701
 
702
      /* We don't traverse to the exit block.  */
703
      child = ei_edge (current_edge)->dest->index;
704
      if (child == EXIT_BLOCK)
705
        {
706
          SET_EDGE_PASSED (current_edge);
707
          ei_next (&current_edge);
708
          continue;
709
        }
710
 
711
      /* If the successor is in the stack, then we've found a loop.
712
         Mark the loop, if it is not a natural loop, then it will
713
         be rejected during the second traversal.  */
714
      if (TEST_BIT (in_stack, child))
715
        {
716
          no_loops = 0;
717
          SET_BIT (header, child);
718
          UPDATE_LOOP_RELATIONS (node, child);
719
          SET_EDGE_PASSED (current_edge);
720
          ei_next (&current_edge);
721
          continue;
722
        }
723
 
724
      /* If the child was already visited, then there is no need to visit
725
         it again.  Just update the loop relationships and restart
726
         with a new edge.  */
727
      if (dfs_nr[child])
728
        {
729
          if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
730
            UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
731
          SET_EDGE_PASSED (current_edge);
732
          ei_next (&current_edge);
733
          continue;
734
        }
735
 
736
      /* Push an entry on the stack and continue DFS traversal.  */
737
      stack[++sp] = current_edge;
738
      SET_EDGE_PASSED (current_edge);
739
      current_edge = ei_start (ei_edge (current_edge)->dest->succs);
740
    }
741
 
742
  /* Reset ->aux field used by EDGE_PASSED.  */
743
  FOR_ALL_BB (bb)
744
    {
745
      edge_iterator ei;
746
      edge e;
747
      FOR_EACH_EDGE (e, ei, bb->succs)
748
        e->aux = NULL;
749
    }
750
 
751
 
752
  /* Another check for unreachable blocks.  The earlier test in
753
     is_cfg_nonregular only finds unreachable blocks that do not
754
     form a loop.
755
 
756
     The DFS traversal will mark every block that is reachable from
757
     the entry node by placing a nonzero value in dfs_nr.  Thus if
758
     dfs_nr is zero for any block, then it must be unreachable.  */
759
  unreachable = 0;
760
  FOR_EACH_BB (bb)
761
    if (dfs_nr[bb->index] == 0)
762
      {
763
        unreachable = 1;
764
        break;
765
      }
766
 
767
  /* Gross.  To avoid wasting memory, the second pass uses the dfs_nr array
768
     to hold degree counts.  */
769
  degree = dfs_nr;
770
 
771
  FOR_EACH_BB (bb)
772
    degree[bb->index] = EDGE_COUNT (bb->preds);
773
 
774
  /* Do not perform region scheduling if there are any unreachable
775
     blocks.  */
776
  if (!unreachable)
777
    {
778
      int *queue, *degree1 = NULL;
779
      /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put
780
         there basic blocks, which are forced to be region heads.
781
         This is done to try to assemble few smaller regions
782
         from a too_large region.  */
783
      sbitmap extended_rgn_header = NULL;
784
      bool extend_regions_p;
785
 
786
      if (no_loops)
787
        SET_BIT (header, 0);
788
 
789
      /* Second traversal:find reducible inner loops and topologically sort
790
         block of each region.  */
791
 
792
      queue = XNEWVEC (int, n_basic_blocks);
793
 
794
      extend_regions_p = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS) > 0;
795
      if (extend_regions_p)
796
        {
797
          degree1 = XNEWVEC (int, last_basic_block);
798
          extended_rgn_header = sbitmap_alloc (last_basic_block);
799
          sbitmap_zero (extended_rgn_header);
800
        }
801
 
802
      /* Find blocks which are inner loop headers.  We still have non-reducible
803
         loops to consider at this point.  */
804
      FOR_EACH_BB (bb)
805
        {
806
          if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
807
            {
808
              edge e;
809
              edge_iterator ei;
810
              basic_block jbb;
811
 
812
              /* Now check that the loop is reducible.  We do this separate
813
                 from finding inner loops so that we do not find a reducible
814
                 loop which contains an inner non-reducible loop.
815
 
816
                 A simple way to find reducible/natural loops is to verify
817
                 that each block in the loop is dominated by the loop
818
                 header.
819
 
820
                 If there exists a block that is not dominated by the loop
821
                 header, then the block is reachable from outside the loop
822
                 and thus the loop is not a natural loop.  */
823
              FOR_EACH_BB (jbb)
824
                {
825
                  /* First identify blocks in the loop, except for the loop
826
                     entry block.  */
827
                  if (bb->index == max_hdr[jbb->index] && bb != jbb)
828
                    {
829
                      /* Now verify that the block is dominated by the loop
830
                         header.  */
831
                      if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
832
                        break;
833
                    }
834
                }
835
 
836
              /* If we exited the loop early, then I is the header of
837
                 a non-reducible loop and we should quit processing it
838
                 now.  */
839
              if (jbb != EXIT_BLOCK_PTR)
840
                continue;
841
 
842
              /* I is a header of an inner loop, or block 0 in a subroutine
843
                 with no loops at all.  */
844
              head = tail = -1;
845
              too_large_failure = 0;
846
              loop_head = max_hdr[bb->index];
847
 
848
              if (extend_regions_p)
849
                /* We save degree in case when we meet a too_large region
850
                   and cancel it.  We need a correct degree later when
851
                   calling extend_rgns.  */
852
                memcpy (degree1, degree, last_basic_block * sizeof (int));
853
 
854
              /* Decrease degree of all I's successors for topological
855
                 ordering.  */
856
              FOR_EACH_EDGE (e, ei, bb->succs)
857
                if (e->dest != EXIT_BLOCK_PTR)
858
                  --degree[e->dest->index];
859
 
860
              /* Estimate # insns, and count # blocks in the region.  */
861
              num_bbs = 1;
862
              num_insns = common_sched_info->estimate_number_of_insns (bb);
863
 
864
              /* Find all loop latches (blocks with back edges to the loop
865
                 header) or all the leaf blocks in the cfg has no loops.
866
 
867
                 Place those blocks into the queue.  */
868
              if (no_loops)
869
                {
870
                  FOR_EACH_BB (jbb)
871
                    /* Leaf nodes have only a single successor which must
872
                       be EXIT_BLOCK.  */
873
                    if (single_succ_p (jbb)
874
                        && single_succ (jbb) == EXIT_BLOCK_PTR)
875
                      {
876
                        queue[++tail] = jbb->index;
877
                        SET_BIT (in_queue, jbb->index);
878
 
879
                        if (too_large (jbb->index, &num_bbs, &num_insns))
880
                          {
881
                            too_large_failure = 1;
882
                            break;
883
                          }
884
                      }
885
                }
886
              else
887
                {
888
                  edge e;
889
 
890
                  FOR_EACH_EDGE (e, ei, bb->preds)
891
                    {
892
                      if (e->src == ENTRY_BLOCK_PTR)
893
                        continue;
894
 
895
                      node = e->src->index;
896
 
897
                      if (max_hdr[node] == loop_head && node != bb->index)
898
                        {
899
                          /* This is a loop latch.  */
900
                          queue[++tail] = node;
901
                          SET_BIT (in_queue, node);
902
 
903
                          if (too_large (node, &num_bbs, &num_insns))
904
                            {
905
                              too_large_failure = 1;
906
                              break;
907
                            }
908
                        }
909
                    }
910
                }
911
 
912
              /* Now add all the blocks in the loop to the queue.
913
 
914
             We know the loop is a natural loop; however the algorithm
915
             above will not always mark certain blocks as being in the
916
             loop.  Consider:
917
                node   children
918
                 a        b,c
919
                 b        c
920
                 c        a,d
921
                 d        b
922
 
923
             The algorithm in the DFS traversal may not mark B & D as part
924
             of the loop (i.e. they will not have max_hdr set to A).
925
 
926
             We know they can not be loop latches (else they would have
927
             had max_hdr set since they'd have a backedge to a dominator
928
             block).  So we don't need them on the initial queue.
929
 
930
             We know they are part of the loop because they are dominated
931
             by the loop header and can be reached by a backwards walk of
932
             the edges starting with nodes on the initial queue.
933
 
934
             It is safe and desirable to include those nodes in the
935
             loop/scheduling region.  To do so we would need to decrease
936
             the degree of a node if it is the target of a backedge
937
             within the loop itself as the node is placed in the queue.
938
 
939
             We do not do this because I'm not sure that the actual
940
             scheduling code will properly handle this case. ?!? */
941
 
942
              while (head < tail && !too_large_failure)
943
                {
944
                  edge e;
945
                  child = queue[++head];
946
 
947
                  FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
948
                    {
949
                      node = e->src->index;
950
 
951
                      /* See discussion above about nodes not marked as in
952
                         this loop during the initial DFS traversal.  */
953
                      if (e->src == ENTRY_BLOCK_PTR
954
                          || max_hdr[node] != loop_head)
955
                        {
956
                          tail = -1;
957
                          break;
958
                        }
959
                      else if (!TEST_BIT (in_queue, node) && node != bb->index)
960
                        {
961
                          queue[++tail] = node;
962
                          SET_BIT (in_queue, node);
963
 
964
                          if (too_large (node, &num_bbs, &num_insns))
965
                            {
966
                              too_large_failure = 1;
967
                              break;
968
                            }
969
                        }
970
                    }
971
                }
972
 
973
              if (tail >= 0 && !too_large_failure)
974
                {
975
                  /* Place the loop header into list of region blocks.  */
976
                  degree[bb->index] = -1;
977
                  rgn_bb_table[idx] = bb->index;
978
                  RGN_NR_BLOCKS (nr_regions) = num_bbs;
979
                  RGN_BLOCKS (nr_regions) = idx++;
980
                  RGN_DONT_CALC_DEPS (nr_regions) = 0;
981
                  RGN_HAS_REAL_EBB (nr_regions) = 0;
982
                  CONTAINING_RGN (bb->index) = nr_regions;
983
                  BLOCK_TO_BB (bb->index) = count = 0;
984
 
985
                  /* Remove blocks from queue[] when their in degree
986
                     becomes zero.  Repeat until no blocks are left on the
987
                     list.  This produces a topological list of blocks in
988
                     the region.  */
989
                  while (tail >= 0)
990
                    {
991
                      if (head < 0)
992
                        head = tail;
993
                      child = queue[head];
994
                      if (degree[child] == 0)
995
                        {
996
                          edge e;
997
 
998
                          degree[child] = -1;
999
                          rgn_bb_table[idx++] = child;
1000
                          BLOCK_TO_BB (child) = ++count;
1001
                          CONTAINING_RGN (child) = nr_regions;
1002
                          queue[head] = queue[tail--];
1003
 
1004
                          FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
1005
                            if (e->dest != EXIT_BLOCK_PTR)
1006
                              --degree[e->dest->index];
1007
                        }
1008
                      else
1009
                        --head;
1010
                    }
1011
                  ++nr_regions;
1012
                }
1013
              else if (extend_regions_p)
1014
                {
1015
                  /* Restore DEGREE.  */
1016
                  int *t = degree;
1017
 
1018
                  degree = degree1;
1019
                  degree1 = t;
1020
 
1021
                  /* And force successors of BB to be region heads.
1022
                     This may provide several smaller regions instead
1023
                     of one too_large region.  */
1024
                  FOR_EACH_EDGE (e, ei, bb->succs)
1025
                    if (e->dest != EXIT_BLOCK_PTR)
1026
                      SET_BIT (extended_rgn_header, e->dest->index);
1027
                }
1028
            }
1029
        }
1030
      free (queue);
1031
 
1032
      if (extend_regions_p)
1033
        {
1034
          free (degree1);
1035
 
1036
          sbitmap_a_or_b (header, header, extended_rgn_header);
1037
          sbitmap_free (extended_rgn_header);
1038
 
1039
          extend_rgns (degree, &idx, header, max_hdr);
1040
        }
1041
    }
1042
 
1043
  /* Any block that did not end up in a region is placed into a region
1044
     by itself.  */
1045
  FOR_EACH_BB (bb)
1046
    if (degree[bb->index] >= 0)
1047
      {
1048
        rgn_bb_table[idx] = bb->index;
1049
        RGN_NR_BLOCKS (nr_regions) = 1;
1050
        RGN_BLOCKS (nr_regions) = idx++;
1051
        RGN_DONT_CALC_DEPS (nr_regions) = 0;
1052
        RGN_HAS_REAL_EBB (nr_regions) = 0;
1053
        CONTAINING_RGN (bb->index) = nr_regions++;
1054
        BLOCK_TO_BB (bb->index) = 0;
1055
      }
1056
 
1057
  free (max_hdr);
1058
  free (degree);
1059
  free (stack);
1060
  sbitmap_free (header);
1061
  sbitmap_free (inner);
1062
  sbitmap_free (in_queue);
1063
  sbitmap_free (in_stack);
1064
}
1065
 
1066
 
1067
/* Wrapper function.
1068
   If FLAG_SEL_SCHED_PIPELINING is set, then use custom function to form
1069
   regions.  Otherwise just call find_rgns_haifa.  */
1070
static void
1071
find_rgns (void)
1072
{
1073
  if (sel_sched_p () && flag_sel_sched_pipelining)
1074
    sel_find_rgns ();
1075
  else
1076
    haifa_find_rgns ();
1077
}
1078
 
1079
static int gather_region_statistics (int **);
1080
static void print_region_statistics (int *, int, int *, int);
1081
 
1082
/* Calculate the histogram that shows the number of regions having the
1083
   given number of basic blocks, and store it in the RSP array.  Return
1084
   the size of this array.  */
1085
static int
1086
gather_region_statistics (int **rsp)
1087
{
1088
  int i, *a = 0, a_sz = 0;
1089
 
1090
  /* a[i] is the number of regions that have (i + 1) basic blocks.  */
1091
  for (i = 0; i < nr_regions; i++)
1092
    {
1093
      int nr_blocks = RGN_NR_BLOCKS (i);
1094
 
1095
      gcc_assert (nr_blocks >= 1);
1096
 
1097
      if (nr_blocks > a_sz)
1098
        {
1099
          a = XRESIZEVEC (int, a, nr_blocks);
1100
          do
1101
            a[a_sz++] = 0;
1102
          while (a_sz != nr_blocks);
1103
        }
1104
 
1105
      a[nr_blocks - 1]++;
1106
    }
1107
 
1108
  *rsp = a;
1109
  return a_sz;
1110
}
1111
 
1112
/* Print regions statistics.  S1 and S2 denote the data before and after
1113
   calling extend_rgns, respectively.  */
1114
static void
1115
print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz)
1116
{
1117
  int i;
1118
 
1119
  /* We iterate until s2_sz because extend_rgns does not decrease
1120
     the maximal region size.  */
1121
  for (i = 1; i < s2_sz; i++)
1122
    {
1123
      int n1, n2;
1124
 
1125
      n2 = s2[i];
1126
 
1127
      if (n2 == 0)
1128
        continue;
1129
 
1130
      if (i >= s1_sz)
1131
        n1 = 0;
1132
      else
1133
        n1 = s1[i];
1134
 
1135
      fprintf (sched_dump, ";; Region extension statistics: size %d: " \
1136
               "was %d + %d more\n", i + 1, n1, n2 - n1);
1137
    }
1138
}
1139
 
1140
/* Extend regions.
1141
   DEGREE - Array of incoming edge count, considering only
1142
   the edges, that don't have their sources in formed regions yet.
1143
   IDXP - pointer to the next available index in rgn_bb_table.
1144
   HEADER - set of all region heads.
1145
   LOOP_HDR - mapping from block to the containing loop
1146
   (two blocks can reside within one region if they have
1147
   the same loop header).  */
1148
void
1149
extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr)
1150
{
1151
  int *order, i, rescan = 0, idx = *idxp, iter = 0, max_iter, *max_hdr;
1152
  int nblocks = n_basic_blocks - NUM_FIXED_BLOCKS;
1153
 
1154
  max_iter = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS);
1155
 
1156
  max_hdr = XNEWVEC (int, last_basic_block);
1157
 
1158
  order = XNEWVEC (int, last_basic_block);
1159
  post_order_compute (order, false, false);
1160
 
1161
  for (i = nblocks - 1; i >= 0; i--)
1162
    {
1163
      int bbn = order[i];
1164
      if (degree[bbn] >= 0)
1165
        {
1166
          max_hdr[bbn] = bbn;
1167
          rescan = 1;
1168
        }
1169
      else
1170
        /* This block already was processed in find_rgns.  */
1171
        max_hdr[bbn] = -1;
1172
    }
1173
 
1174
  /* The idea is to topologically walk through CFG in top-down order.
1175
     During the traversal, if all the predecessors of a node are
1176
     marked to be in the same region (they all have the same max_hdr),
1177
     then current node is also marked to be a part of that region.
1178
     Otherwise the node starts its own region.
1179
     CFG should be traversed until no further changes are made.  On each
1180
     iteration the set of the region heads is extended (the set of those
1181
     blocks that have max_hdr[bbi] == bbi).  This set is upper bounded by the
1182
     set of all basic blocks, thus the algorithm is guaranteed to
1183
     terminate.  */
1184
 
1185
  while (rescan && iter < max_iter)
1186
    {
1187
      rescan = 0;
1188
 
1189
      for (i = nblocks - 1; i >= 0; i--)
1190
        {
1191
          edge e;
1192
          edge_iterator ei;
1193
          int bbn = order[i];
1194
 
1195
          if (max_hdr[bbn] != -1 && !TEST_BIT (header, bbn))
1196
            {
1197
              int hdr = -1;
1198
 
1199
              FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->preds)
1200
                {
1201
                  int predn = e->src->index;
1202
 
1203
                  if (predn != ENTRY_BLOCK
1204
                      /* If pred wasn't processed in find_rgns.  */
1205
                      && max_hdr[predn] != -1
1206
                      /* And pred and bb reside in the same loop.
1207
                         (Or out of any loop).  */
1208
                      && loop_hdr[bbn] == loop_hdr[predn])
1209
                    {
1210
                      if (hdr == -1)
1211
                        /* Then bb extends the containing region of pred.  */
1212
                        hdr = max_hdr[predn];
1213
                      else if (hdr != max_hdr[predn])
1214
                        /* Too bad, there are at least two predecessors
1215
                           that reside in different regions.  Thus, BB should
1216
                           begin its own region.  */
1217
                        {
1218
                          hdr = bbn;
1219
                          break;
1220
                        }
1221
                    }
1222
                  else
1223
                    /* BB starts its own region.  */
1224
                    {
1225
                      hdr = bbn;
1226
                      break;
1227
                    }
1228
                }
1229
 
1230
              if (hdr == bbn)
1231
                {
1232
                  /* If BB start its own region,
1233
                     update set of headers with BB.  */
1234
                  SET_BIT (header, bbn);
1235
                  rescan = 1;
1236
                }
1237
              else
1238
                gcc_assert (hdr != -1);
1239
 
1240
              max_hdr[bbn] = hdr;
1241
            }
1242
        }
1243
 
1244
      iter++;
1245
    }
1246
 
1247
  /* Statistics were gathered on the SPEC2000 package of tests with
1248
     mainline weekly snapshot gcc-4.1-20051015 on ia64.
1249
 
1250
     Statistics for SPECint:
1251
     1 iteration : 1751 cases (38.7%)
1252
     2 iterations: 2770 cases (61.3%)
1253
     Blocks wrapped in regions by find_rgns without extension: 18295 blocks
1254
     Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks
1255
     (We don't count single block regions here).
1256
 
1257
     Statistics for SPECfp:
1258
     1 iteration : 621 cases (35.9%)
1259
     2 iterations: 1110 cases (64.1%)
1260
     Blocks wrapped in regions by find_rgns without extension: 6476 blocks
1261
     Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks
1262
     (We don't count single block regions here).
1263
 
1264
     By default we do at most 2 iterations.
1265
     This can be overridden with max-sched-extend-regions-iters parameter:
1266
 
1267
     N > 0 - do at most N iterations.  */
1268
 
1269
  if (sched_verbose && iter != 0)
1270
    fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter,
1271
             rescan ? "... failed" : "");
1272
 
1273
  if (!rescan && iter != 0)
1274
    {
1275
      int *s1 = NULL, s1_sz = 0;
1276
 
1277
      /* Save the old statistics for later printout.  */
1278
      if (sched_verbose >= 6)
1279
        s1_sz = gather_region_statistics (&s1);
1280
 
1281
      /* We have succeeded.  Now assemble the regions.  */
1282
      for (i = nblocks - 1; i >= 0; i--)
1283
        {
1284
          int bbn = order[i];
1285
 
1286
          if (max_hdr[bbn] == bbn)
1287
            /* BBN is a region head.  */
1288
            {
1289
              edge e;
1290
              edge_iterator ei;
1291
              int num_bbs = 0, j, num_insns = 0, large;
1292
 
1293
              large = too_large (bbn, &num_bbs, &num_insns);
1294
 
1295
              degree[bbn] = -1;
1296
              rgn_bb_table[idx] = bbn;
1297
              RGN_BLOCKS (nr_regions) = idx++;
1298
              RGN_DONT_CALC_DEPS (nr_regions) = 0;
1299
              RGN_HAS_REAL_EBB (nr_regions) = 0;
1300
              CONTAINING_RGN (bbn) = nr_regions;
1301
              BLOCK_TO_BB (bbn) = 0;
1302
 
1303
              FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->succs)
1304
                if (e->dest != EXIT_BLOCK_PTR)
1305
                  degree[e->dest->index]--;
1306
 
1307
              if (!large)
1308
                /* Here we check whether the region is too_large.  */
1309
                for (j = i - 1; j >= 0; j--)
1310
                  {
1311
                    int succn = order[j];
1312
                    if (max_hdr[succn] == bbn)
1313
                      {
1314
                        if ((large = too_large (succn, &num_bbs, &num_insns)))
1315
                          break;
1316
                      }
1317
                  }
1318
 
1319
              if (large)
1320
                /* If the region is too_large, then wrap every block of
1321
                   the region into single block region.
1322
                   Here we wrap region head only.  Other blocks are
1323
                   processed in the below cycle.  */
1324
                {
1325
                  RGN_NR_BLOCKS (nr_regions) = 1;
1326
                  nr_regions++;
1327
                }
1328
 
1329
              num_bbs = 1;
1330
 
1331
              for (j = i - 1; j >= 0; j--)
1332
                {
1333
                  int succn = order[j];
1334
 
1335
                  if (max_hdr[succn] == bbn)
1336
                    /* This cycle iterates over all basic blocks, that
1337
                       are supposed to be in the region with head BBN,
1338
                       and wraps them into that region (or in single
1339
                       block region).  */
1340
                    {
1341
                      gcc_assert (degree[succn] == 0);
1342
 
1343
                      degree[succn] = -1;
1344
                      rgn_bb_table[idx] = succn;
1345
                      BLOCK_TO_BB (succn) = large ? 0 : num_bbs++;
1346
                      CONTAINING_RGN (succn) = nr_regions;
1347
 
1348
                      if (large)
1349
                        /* Wrap SUCCN into single block region.  */
1350
                        {
1351
                          RGN_BLOCKS (nr_regions) = idx;
1352
                          RGN_NR_BLOCKS (nr_regions) = 1;
1353
                          RGN_DONT_CALC_DEPS (nr_regions) = 0;
1354
                          RGN_HAS_REAL_EBB (nr_regions) = 0;
1355
                          nr_regions++;
1356
                        }
1357
 
1358
                      idx++;
1359
 
1360
                      FOR_EACH_EDGE (e, ei, BASIC_BLOCK (succn)->succs)
1361
                        if (e->dest != EXIT_BLOCK_PTR)
1362
                          degree[e->dest->index]--;
1363
                    }
1364
                }
1365
 
1366
              if (!large)
1367
                {
1368
                  RGN_NR_BLOCKS (nr_regions) = num_bbs;
1369
                  nr_regions++;
1370
                }
1371
            }
1372
        }
1373
 
1374
      if (sched_verbose >= 6)
1375
        {
1376
          int *s2, s2_sz;
1377
 
1378
          /* Get the new statistics and print the comparison with the
1379
             one before calling this function.  */
1380
          s2_sz = gather_region_statistics (&s2);
1381
          print_region_statistics (s1, s1_sz, s2, s2_sz);
1382
          free (s1);
1383
          free (s2);
1384
        }
1385
    }
1386
 
1387
  free (order);
1388
  free (max_hdr);
1389
 
1390
  *idxp = idx;
1391
}
1392
 
1393
/* Functions for regions scheduling information.  */
1394
 
1395
/* Compute dominators, probability, and potential-split-edges of bb.
1396
   Assume that these values were already computed for bb's predecessors.  */
1397
 
1398
static void
1399
compute_dom_prob_ps (int bb)
1400
{
1401
  edge_iterator in_ei;
1402
  edge in_edge;
1403
 
1404
  /* We shouldn't have any real ebbs yet.  */
1405
  gcc_assert (ebb_head [bb] == bb + current_blocks);
1406
 
1407
  if (IS_RGN_ENTRY (bb))
1408
    {
1409
      SET_BIT (dom[bb], 0);
1410
      prob[bb] = REG_BR_PROB_BASE;
1411
      return;
1412
    }
1413
 
1414
  prob[bb] = 0;
1415
 
1416
  /* Initialize dom[bb] to '111..1'.  */
1417
  sbitmap_ones (dom[bb]);
1418
 
1419
  FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
1420
    {
1421
      int pred_bb;
1422
      edge out_edge;
1423
      edge_iterator out_ei;
1424
 
1425
      if (in_edge->src == ENTRY_BLOCK_PTR)
1426
        continue;
1427
 
1428
      pred_bb = BLOCK_TO_BB (in_edge->src->index);
1429
      sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]);
1430
      sbitmap_a_or_b (ancestor_edges[bb],
1431
                      ancestor_edges[bb], ancestor_edges[pred_bb]);
1432
 
1433
      SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
1434
 
1435
      sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
1436
 
1437
      FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
1438
        SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
1439
 
1440
      prob[bb] += ((prob[pred_bb] * in_edge->probability) / REG_BR_PROB_BASE);
1441
    }
1442
 
1443
  SET_BIT (dom[bb], bb);
1444
  sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1445
 
1446
  if (sched_verbose >= 2)
1447
    fprintf (sched_dump, ";;  bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1448
             (100 * prob[bb]) / REG_BR_PROB_BASE);
1449
}
1450
 
1451
/* Functions for target info.  */
1452
 
1453
/* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1454
   Note that bb_trg dominates bb_src.  */
1455
 
1456
static void
1457
split_edges (int bb_src, int bb_trg, edgelst *bl)
1458
{
1459
  sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
1460
  sbitmap_copy (src, pot_split[bb_src]);
1461
 
1462
  sbitmap_difference (src, src, pot_split[bb_trg]);
1463
  extract_edgelst (src, bl);
1464
  sbitmap_free (src);
1465
}
1466
 
1467
/* Find the valid candidate-source-blocks for the target block TRG, compute
1468
   their probability, and check if they are speculative or not.
1469
   For speculative sources, compute their update-blocks and split-blocks.  */
1470
 
1471
static void
1472
compute_trg_info (int trg)
1473
{
1474
  candidate *sp;
1475
  edgelst el = { NULL, 0 };
1476
  int i, j, k, update_idx;
1477
  basic_block block;
1478
  sbitmap visited;
1479
  edge_iterator ei;
1480
  edge e;
1481
 
1482
  candidate_table = XNEWVEC (candidate, current_nr_blocks);
1483
 
1484
  bblst_last = 0;
1485
  /* bblst_table holds split blocks and update blocks for each block after
1486
     the current one in the region.  split blocks and update blocks are
1487
     the TO blocks of region edges, so there can be at most rgn_nr_edges
1488
     of them.  */
1489
  bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1490
  bblst_table = XNEWVEC (basic_block, bblst_size);
1491
 
1492
  edgelst_last = 0;
1493
  edgelst_table = XNEWVEC (edge, rgn_nr_edges);
1494
 
1495
  /* Define some of the fields for the target bb as well.  */
1496
  sp = candidate_table + trg;
1497
  sp->is_valid = 1;
1498
  sp->is_speculative = 0;
1499
  sp->src_prob = REG_BR_PROB_BASE;
1500
 
1501
  visited = sbitmap_alloc (last_basic_block);
1502
 
1503
  for (i = trg + 1; i < current_nr_blocks; i++)
1504
    {
1505
      sp = candidate_table + i;
1506
 
1507
      sp->is_valid = IS_DOMINATED (i, trg);
1508
      if (sp->is_valid)
1509
        {
1510
          int tf = prob[trg], cf = prob[i];
1511
 
1512
          /* In CFGs with low probability edges TF can possibly be zero.  */
1513
          sp->src_prob = (tf ? ((cf * REG_BR_PROB_BASE) / tf) : 0);
1514
          sp->is_valid = (sp->src_prob >= min_spec_prob);
1515
        }
1516
 
1517
      if (sp->is_valid)
1518
        {
1519
          split_edges (i, trg, &el);
1520
          sp->is_speculative = (el.nr_members) ? 1 : 0;
1521
          if (sp->is_speculative && !flag_schedule_speculative)
1522
            sp->is_valid = 0;
1523
        }
1524
 
1525
      if (sp->is_valid)
1526
        {
1527
          /* Compute split blocks and store them in bblst_table.
1528
             The TO block of every split edge is a split block.  */
1529
          sp->split_bbs.first_member = &bblst_table[bblst_last];
1530
          sp->split_bbs.nr_members = el.nr_members;
1531
          for (j = 0; j < el.nr_members; bblst_last++, j++)
1532
            bblst_table[bblst_last] = el.first_member[j]->dest;
1533
          sp->update_bbs.first_member = &bblst_table[bblst_last];
1534
 
1535
          /* Compute update blocks and store them in bblst_table.
1536
             For every split edge, look at the FROM block, and check
1537
             all out edges.  For each out edge that is not a split edge,
1538
             add the TO block to the update block list.  This list can end
1539
             up with a lot of duplicates.  We need to weed them out to avoid
1540
             overrunning the end of the bblst_table.  */
1541
 
1542
          update_idx = 0;
1543
          sbitmap_zero (visited);
1544
          for (j = 0; j < el.nr_members; j++)
1545
            {
1546
              block = el.first_member[j]->src;
1547
              FOR_EACH_EDGE (e, ei, block->succs)
1548
                {
1549
                  if (!TEST_BIT (visited, e->dest->index))
1550
                    {
1551
                      for (k = 0; k < el.nr_members; k++)
1552
                        if (e == el.first_member[k])
1553
                          break;
1554
 
1555
                      if (k >= el.nr_members)
1556
                        {
1557
                          bblst_table[bblst_last++] = e->dest;
1558
                          SET_BIT (visited, e->dest->index);
1559
                          update_idx++;
1560
                        }
1561
                    }
1562
                }
1563
            }
1564
          sp->update_bbs.nr_members = update_idx;
1565
 
1566
          /* Make sure we didn't overrun the end of bblst_table.  */
1567
          gcc_assert (bblst_last <= bblst_size);
1568
        }
1569
      else
1570
        {
1571
          sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1572
 
1573
          sp->is_speculative = 0;
1574
          sp->src_prob = 0;
1575
        }
1576
    }
1577
 
1578
  sbitmap_free (visited);
1579
}
1580
 
1581
/* Free the computed target info.  */
1582
static void
1583
free_trg_info (void)
1584
{
1585
  free (candidate_table);
1586
  free (bblst_table);
1587
  free (edgelst_table);
1588
}
1589
 
1590
/* Print candidates info, for debugging purposes.  Callable from debugger.  */
1591
 
1592
DEBUG_FUNCTION void
1593
debug_candidate (int i)
1594
{
1595
  if (!candidate_table[i].is_valid)
1596
    return;
1597
 
1598
  if (candidate_table[i].is_speculative)
1599
    {
1600
      int j;
1601
      fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1602
 
1603
      fprintf (sched_dump, "split path: ");
1604
      for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1605
        {
1606
          int b = candidate_table[i].split_bbs.first_member[j]->index;
1607
 
1608
          fprintf (sched_dump, " %d ", b);
1609
        }
1610
      fprintf (sched_dump, "\n");
1611
 
1612
      fprintf (sched_dump, "update path: ");
1613
      for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1614
        {
1615
          int b = candidate_table[i].update_bbs.first_member[j]->index;
1616
 
1617
          fprintf (sched_dump, " %d ", b);
1618
        }
1619
      fprintf (sched_dump, "\n");
1620
    }
1621
  else
1622
    {
1623
      fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1624
    }
1625
}
1626
 
1627
/* Print candidates info, for debugging purposes.  Callable from debugger.  */
1628
 
1629
DEBUG_FUNCTION void
1630
debug_candidates (int trg)
1631
{
1632
  int i;
1633
 
1634
  fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1635
           BB_TO_BLOCK (trg), trg);
1636
  for (i = trg + 1; i < current_nr_blocks; i++)
1637
    debug_candidate (i);
1638
}
1639
 
1640
/* Functions for speculative scheduling.  */
1641
 
1642
static bitmap_head not_in_df;
1643
 
1644
/* Return 0 if x is a set of a register alive in the beginning of one
1645
   of the split-blocks of src, otherwise return 1.  */
1646
 
1647
static int
1648
check_live_1 (int src, rtx x)
1649
{
1650
  int i;
1651
  int regno;
1652
  rtx reg = SET_DEST (x);
1653
 
1654
  if (reg == 0)
1655
    return 1;
1656
 
1657
  while (GET_CODE (reg) == SUBREG
1658
         || GET_CODE (reg) == ZERO_EXTRACT
1659
         || GET_CODE (reg) == STRICT_LOW_PART)
1660
    reg = XEXP (reg, 0);
1661
 
1662
  if (GET_CODE (reg) == PARALLEL)
1663
    {
1664
      int i;
1665
 
1666
      for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1667
        if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1668
          if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1669
            return 1;
1670
 
1671
      return 0;
1672
    }
1673
 
1674
  if (!REG_P (reg))
1675
    return 1;
1676
 
1677
  regno = REGNO (reg);
1678
 
1679
  if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1680
    {
1681
      /* Global registers are assumed live.  */
1682
      return 0;
1683
    }
1684
  else
1685
    {
1686
      if (regno < FIRST_PSEUDO_REGISTER)
1687
        {
1688
          /* Check for hard registers.  */
1689
          int j = hard_regno_nregs[regno][GET_MODE (reg)];
1690
          while (--j >= 0)
1691
            {
1692
              for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1693
                {
1694
                  basic_block b = candidate_table[src].split_bbs.first_member[i];
1695
                  int t = bitmap_bit_p (&not_in_df, b->index);
1696
 
1697
                  /* We can have split blocks, that were recently generated.
1698
                     Such blocks are always outside current region.  */
1699
                  gcc_assert (!t || (CONTAINING_RGN (b->index)
1700
                                     != CONTAINING_RGN (BB_TO_BLOCK (src))));
1701
 
1702
                  if (t || REGNO_REG_SET_P (df_get_live_in (b), regno + j))
1703
                    return 0;
1704
                }
1705
            }
1706
        }
1707
      else
1708
        {
1709
          /* Check for pseudo registers.  */
1710
          for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1711
            {
1712
              basic_block b = candidate_table[src].split_bbs.first_member[i];
1713
              int t = bitmap_bit_p (&not_in_df, b->index);
1714
 
1715
              gcc_assert (!t || (CONTAINING_RGN (b->index)
1716
                                 != CONTAINING_RGN (BB_TO_BLOCK (src))));
1717
 
1718
              if (t || REGNO_REG_SET_P (df_get_live_in (b), regno))
1719
                return 0;
1720
            }
1721
        }
1722
    }
1723
 
1724
  return 1;
1725
}
1726
 
1727
/* If x is a set of a register R, mark that R is alive in the beginning
1728
   of every update-block of src.  */
1729
 
1730
static void
1731
update_live_1 (int src, rtx x)
1732
{
1733
  int i;
1734
  int regno;
1735
  rtx reg = SET_DEST (x);
1736
 
1737
  if (reg == 0)
1738
    return;
1739
 
1740
  while (GET_CODE (reg) == SUBREG
1741
         || GET_CODE (reg) == ZERO_EXTRACT
1742
         || GET_CODE (reg) == STRICT_LOW_PART)
1743
    reg = XEXP (reg, 0);
1744
 
1745
  if (GET_CODE (reg) == PARALLEL)
1746
    {
1747
      int i;
1748
 
1749
      for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1750
        if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1751
          update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1752
 
1753
      return;
1754
    }
1755
 
1756
  if (!REG_P (reg))
1757
    return;
1758
 
1759
  /* Global registers are always live, so the code below does not apply
1760
     to them.  */
1761
 
1762
  regno = REGNO (reg);
1763
 
1764
  if (! HARD_REGISTER_NUM_P (regno)
1765
      || !global_regs[regno])
1766
    {
1767
      for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1768
        {
1769
          basic_block b = candidate_table[src].update_bbs.first_member[i];
1770
 
1771
          if (HARD_REGISTER_NUM_P (regno))
1772
            bitmap_set_range (df_get_live_in (b), regno,
1773
                              hard_regno_nregs[regno][GET_MODE (reg)]);
1774
          else
1775
            bitmap_set_bit (df_get_live_in (b), regno);
1776
        }
1777
    }
1778
}
1779
 
1780
/* Return 1 if insn can be speculatively moved from block src to trg,
1781
   otherwise return 0.  Called before first insertion of insn to
1782
   ready-list or before the scheduling.  */
1783
 
1784
static int
1785
check_live (rtx insn, int src)
1786
{
1787
  /* Find the registers set by instruction.  */
1788
  if (GET_CODE (PATTERN (insn)) == SET
1789
      || GET_CODE (PATTERN (insn)) == CLOBBER)
1790
    return check_live_1 (src, PATTERN (insn));
1791
  else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1792
    {
1793
      int j;
1794
      for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1795
        if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1796
             || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1797
            && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1798
          return 0;
1799
 
1800
      return 1;
1801
    }
1802
 
1803
  return 1;
1804
}
1805
 
1806
/* Update the live registers info after insn was moved speculatively from
1807
   block src to trg.  */
1808
 
1809
static void
1810
update_live (rtx insn, int src)
1811
{
1812
  /* Find the registers set by instruction.  */
1813
  if (GET_CODE (PATTERN (insn)) == SET
1814
      || GET_CODE (PATTERN (insn)) == CLOBBER)
1815
    update_live_1 (src, PATTERN (insn));
1816
  else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1817
    {
1818
      int j;
1819
      for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1820
        if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1821
            || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1822
          update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1823
    }
1824
}
1825
 
1826
/* Nonzero if block bb_to is equal to, or reachable from block bb_from.  */
1827
#define IS_REACHABLE(bb_from, bb_to)                                    \
1828
  (bb_from == bb_to                                                     \
1829
   || IS_RGN_ENTRY (bb_from)                                            \
1830
   || (TEST_BIT (ancestor_edges[bb_to],                                 \
1831
         EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from)))))))
1832
 
1833
/* Turns on the fed_by_spec_load flag for insns fed by load_insn.  */
1834
 
1835
static void
1836
set_spec_fed (rtx load_insn)
1837
{
1838
  sd_iterator_def sd_it;
1839
  dep_t dep;
1840
 
1841
  FOR_EACH_DEP (load_insn, SD_LIST_FORW, sd_it, dep)
1842
    if (DEP_TYPE (dep) == REG_DEP_TRUE)
1843
      FED_BY_SPEC_LOAD (DEP_CON (dep)) = 1;
1844
}
1845
 
1846
/* On the path from the insn to load_insn_bb, find a conditional
1847
branch depending on insn, that guards the speculative load.  */
1848
 
1849
static int
1850
find_conditional_protection (rtx insn, int load_insn_bb)
1851
{
1852
  sd_iterator_def sd_it;
1853
  dep_t dep;
1854
 
1855
  /* Iterate through DEF-USE forward dependences.  */
1856
  FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
1857
    {
1858
      rtx next = DEP_CON (dep);
1859
 
1860
      if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1861
           CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1862
          && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1863
          && load_insn_bb != INSN_BB (next)
1864
          && DEP_TYPE (dep) == REG_DEP_TRUE
1865
          && (JUMP_P (next)
1866
              || find_conditional_protection (next, load_insn_bb)))
1867
        return 1;
1868
    }
1869
  return 0;
1870
}                               /* find_conditional_protection */
1871
 
1872
/* Returns 1 if the same insn1 that participates in the computation
1873
   of load_insn's address is feeding a conditional branch that is
1874
   guarding on load_insn. This is true if we find two DEF-USE
1875
   chains:
1876
   insn1 -> ... -> conditional-branch
1877
   insn1 -> ... -> load_insn,
1878
   and if a flow path exists:
1879
   insn1 -> ... -> conditional-branch -> ... -> load_insn,
1880
   and if insn1 is on the path
1881
   region-entry -> ... -> bb_trg -> ... load_insn.
1882
 
1883
   Locate insn1 by climbing on INSN_BACK_DEPS from load_insn.
1884
   Locate the branch by following INSN_FORW_DEPS from insn1.  */
1885
 
1886
static int
1887
is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1888
{
1889
  sd_iterator_def sd_it;
1890
  dep_t dep;
1891
 
1892
  FOR_EACH_DEP (load_insn, SD_LIST_BACK, sd_it, dep)
1893
    {
1894
      rtx insn1 = DEP_PRO (dep);
1895
 
1896
      /* Must be a DEF-USE dependence upon non-branch.  */
1897
      if (DEP_TYPE (dep) != REG_DEP_TRUE
1898
          || JUMP_P (insn1))
1899
        continue;
1900
 
1901
      /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn.  */
1902
      if (INSN_BB (insn1) == bb_src
1903
          || (CONTAINING_RGN (BLOCK_NUM (insn1))
1904
              != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1905
          || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1906
              && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1907
        continue;
1908
 
1909
      /* Now search for the conditional-branch.  */
1910
      if (find_conditional_protection (insn1, bb_src))
1911
        return 1;
1912
 
1913
      /* Recursive step: search another insn1, "above" current insn1.  */
1914
      return is_conditionally_protected (insn1, bb_src, bb_trg);
1915
    }
1916
 
1917
  /* The chain does not exist.  */
1918
  return 0;
1919
}                               /* is_conditionally_protected */
1920
 
1921
/* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1922
   load_insn can move speculatively from bb_src to bb_trg.  All the
1923
   following must hold:
1924
 
1925
   (1) both loads have 1 base register (PFREE_CANDIDATEs).
1926
   (2) load_insn and load1 have a def-use dependence upon
1927
   the same insn 'insn1'.
1928
   (3) either load2 is in bb_trg, or:
1929
   - there's only one split-block, and
1930
   - load1 is on the escape path, and
1931
 
1932
   From all these we can conclude that the two loads access memory
1933
   addresses that differ at most by a constant, and hence if moving
1934
   load_insn would cause an exception, it would have been caused by
1935
   load2 anyhow.  */
1936
 
1937
static int
1938
is_pfree (rtx load_insn, int bb_src, int bb_trg)
1939
{
1940
  sd_iterator_def back_sd_it;
1941
  dep_t back_dep;
1942
  candidate *candp = candidate_table + bb_src;
1943
 
1944
  if (candp->split_bbs.nr_members != 1)
1945
    /* Must have exactly one escape block.  */
1946
    return 0;
1947
 
1948
  FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep)
1949
    {
1950
      rtx insn1 = DEP_PRO (back_dep);
1951
 
1952
      if (DEP_TYPE (back_dep) == REG_DEP_TRUE)
1953
        /* Found a DEF-USE dependence (insn1, load_insn).  */
1954
        {
1955
          sd_iterator_def fore_sd_it;
1956
          dep_t fore_dep;
1957
 
1958
          FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep)
1959
            {
1960
              rtx insn2 = DEP_CON (fore_dep);
1961
 
1962
              if (DEP_TYPE (fore_dep) == REG_DEP_TRUE)
1963
                {
1964
                  /* Found a DEF-USE dependence (insn1, insn2).  */
1965
                  if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1966
                    /* insn2 not guaranteed to be a 1 base reg load.  */
1967
                    continue;
1968
 
1969
                  if (INSN_BB (insn2) == bb_trg)
1970
                    /* insn2 is the similar load, in the target block.  */
1971
                    return 1;
1972
 
1973
                  if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1974
                    /* insn2 is a similar load, in a split-block.  */
1975
                    return 1;
1976
                }
1977
            }
1978
        }
1979
    }
1980
 
1981
  /* Couldn't find a similar load.  */
1982
  return 0;
1983
}                               /* is_pfree */
1984
 
1985
/* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1986
   a load moved speculatively, or if load_insn is protected by
1987
   a compare on load_insn's address).  */
1988
 
1989
static int
1990
is_prisky (rtx load_insn, int bb_src, int bb_trg)
1991
{
1992
  if (FED_BY_SPEC_LOAD (load_insn))
1993
    return 1;
1994
 
1995
  if (sd_lists_empty_p (load_insn, SD_LIST_BACK))
1996
    /* Dependence may 'hide' out of the region.  */
1997
    return 1;
1998
 
1999
  if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2000
    return 1;
2001
 
2002
  return 0;
2003
}
2004
 
2005
/* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2006
   Return 1 if insn is exception-free (and the motion is valid)
2007
   and 0 otherwise.  */
2008
 
2009
static int
2010
is_exception_free (rtx insn, int bb_src, int bb_trg)
2011
{
2012
  int insn_class = haifa_classify_insn (insn);
2013
 
2014
  /* Handle non-load insns.  */
2015
  switch (insn_class)
2016
    {
2017
    case TRAP_FREE:
2018
      return 1;
2019
    case TRAP_RISKY:
2020
      return 0;
2021
    default:;
2022
    }
2023
 
2024
  /* Handle loads.  */
2025
  if (!flag_schedule_speculative_load)
2026
    return 0;
2027
  IS_LOAD_INSN (insn) = 1;
2028
  switch (insn_class)
2029
    {
2030
    case IFREE:
2031
      return (1);
2032
    case IRISKY:
2033
      return 0;
2034
    case PFREE_CANDIDATE:
2035
      if (is_pfree (insn, bb_src, bb_trg))
2036
        return 1;
2037
      /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate.  */
2038
    case PRISKY_CANDIDATE:
2039
      if (!flag_schedule_speculative_load_dangerous
2040
          || is_prisky (insn, bb_src, bb_trg))
2041
        return 0;
2042
      break;
2043
    default:;
2044
    }
2045
 
2046
  return flag_schedule_speculative_load_dangerous;
2047
}
2048
 
2049
/* The number of insns from the current block scheduled so far.  */
2050
static int sched_target_n_insns;
2051
/* The number of insns from the current block to be scheduled in total.  */
2052
static int target_n_insns;
2053
/* The number of insns from the entire region scheduled so far.  */
2054
static int sched_n_insns;
2055
 
2056
/* Implementations of the sched_info functions for region scheduling.  */
2057
static void init_ready_list (void);
2058
static int can_schedule_ready_p (rtx);
2059
static void begin_schedule_ready (rtx);
2060
static ds_t new_ready (rtx, ds_t);
2061
static int schedule_more_p (void);
2062
static const char *rgn_print_insn (const_rtx, int);
2063
static int rgn_rank (rtx, rtx);
2064
static void compute_jump_reg_dependencies (rtx, regset);
2065
 
2066
/* Functions for speculative scheduling.  */
2067
static void rgn_add_remove_insn (rtx, int);
2068
static void rgn_add_block (basic_block, basic_block);
2069
static void rgn_fix_recovery_cfg (int, int, int);
2070
static basic_block advance_target_bb (basic_block, rtx);
2071
 
2072
/* Return nonzero if there are more insns that should be scheduled.  */
2073
 
2074
static int
2075
schedule_more_p (void)
2076
{
2077
  return sched_target_n_insns < target_n_insns;
2078
}
2079
 
2080
/* Add all insns that are initially ready to the ready list READY.  Called
2081
   once before scheduling a set of insns.  */
2082
 
2083
static void
2084
init_ready_list (void)
2085
{
2086
  rtx prev_head = current_sched_info->prev_head;
2087
  rtx next_tail = current_sched_info->next_tail;
2088
  int bb_src;
2089
  rtx insn;
2090
 
2091
  target_n_insns = 0;
2092
  sched_target_n_insns = 0;
2093
  sched_n_insns = 0;
2094
 
2095
  /* Print debugging information.  */
2096
  if (sched_verbose >= 5)
2097
    debug_rgn_dependencies (target_bb);
2098
 
2099
  /* Prepare current target block info.  */
2100
  if (current_nr_blocks > 1)
2101
    compute_trg_info (target_bb);
2102
 
2103
  /* Initialize ready list with all 'ready' insns in target block.
2104
     Count number of insns in the target block being scheduled.  */
2105
  for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2106
    {
2107
      try_ready (insn);
2108
      target_n_insns++;
2109
 
2110
      gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL));
2111
    }
2112
 
2113
  /* Add to ready list all 'ready' insns in valid source blocks.
2114
     For speculative insns, check-live, exception-free, and
2115
     issue-delay.  */
2116
  for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2117
    if (IS_VALID (bb_src))
2118
      {
2119
        rtx src_head;
2120
        rtx src_next_tail;
2121
        rtx tail, head;
2122
 
2123
        get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src),
2124
                           &head, &tail);
2125
        src_next_tail = NEXT_INSN (tail);
2126
        src_head = head;
2127
 
2128
        for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2129
          if (INSN_P (insn))
2130
            try_ready (insn);
2131
      }
2132
}
2133
 
2134
/* Called after taking INSN from the ready list.  Returns nonzero if this
2135
   insn can be scheduled, nonzero if we should silently discard it.  */
2136
 
2137
static int
2138
can_schedule_ready_p (rtx insn)
2139
{
2140
  /* An interblock motion?  */
2141
  if (INSN_BB (insn) != target_bb
2142
      && IS_SPECULATIVE_INSN (insn)
2143
      && !check_live (insn, INSN_BB (insn)))
2144
    return 0;
2145
  else
2146
    return 1;
2147
}
2148
 
2149
/* Updates counter and other information.  Split from can_schedule_ready_p ()
2150
   because when we schedule insn speculatively then insn passed to
2151
   can_schedule_ready_p () differs from the one passed to
2152
   begin_schedule_ready ().  */
2153
static void
2154
begin_schedule_ready (rtx insn)
2155
{
2156
  /* An interblock motion?  */
2157
  if (INSN_BB (insn) != target_bb)
2158
    {
2159
      if (IS_SPECULATIVE_INSN (insn))
2160
        {
2161
          gcc_assert (check_live (insn, INSN_BB (insn)));
2162
 
2163
          update_live (insn, INSN_BB (insn));
2164
 
2165
          /* For speculative load, mark insns fed by it.  */
2166
          if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2167
            set_spec_fed (insn);
2168
 
2169
          nr_spec++;
2170
        }
2171
      nr_inter++;
2172
    }
2173
  else
2174
    {
2175
      /* In block motion.  */
2176
      sched_target_n_insns++;
2177
    }
2178
  sched_n_insns++;
2179
}
2180
 
2181
/* Called after INSN has all its hard dependencies resolved and the speculation
2182
   of type TS is enough to overcome them all.
2183
   Return nonzero if it should be moved to the ready list or the queue, or zero
2184
   if we should silently discard it.  */
2185
static ds_t
2186
new_ready (rtx next, ds_t ts)
2187
{
2188
  if (INSN_BB (next) != target_bb)
2189
    {
2190
      int not_ex_free = 0;
2191
 
2192
      /* For speculative insns, before inserting to ready/queue,
2193
         check live, exception-free, and issue-delay.  */
2194
      if (!IS_VALID (INSN_BB (next))
2195
          || CANT_MOVE (next)
2196
          || (IS_SPECULATIVE_INSN (next)
2197
              && ((recog_memoized (next) >= 0
2198
                   && min_insn_conflict_delay (curr_state, next, next)
2199
                   > PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY))
2200
                  || IS_SPECULATION_CHECK_P (next)
2201
                  || !check_live (next, INSN_BB (next))
2202
                  || (not_ex_free = !is_exception_free (next, INSN_BB (next),
2203
                                                        target_bb)))))
2204
        {
2205
          if (not_ex_free
2206
              /* We are here because is_exception_free () == false.
2207
                 But we possibly can handle that with control speculation.  */
2208
              && sched_deps_info->generate_spec_deps
2209
              && spec_info->mask & BEGIN_CONTROL)
2210
            {
2211
              ds_t new_ds;
2212
 
2213
              /* Add control speculation to NEXT's dependency type.  */
2214
              new_ds = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK);
2215
 
2216
              /* Check if NEXT can be speculated with new dependency type.  */
2217
              if (sched_insn_is_legitimate_for_speculation_p (next, new_ds))
2218
                /* Here we got new control-speculative instruction.  */
2219
                ts = new_ds;
2220
              else
2221
                /* NEXT isn't ready yet.  */
2222
                ts = (ts & ~SPECULATIVE) | HARD_DEP;
2223
            }
2224
          else
2225
            /* NEXT isn't ready yet.  */
2226
            ts = (ts & ~SPECULATIVE) | HARD_DEP;
2227
        }
2228
    }
2229
 
2230
  return ts;
2231
}
2232
 
2233
/* Return a string that contains the insn uid and optionally anything else
2234
   necessary to identify this insn in an output.  It's valid to use a
2235
   static buffer for this.  The ALIGNED parameter should cause the string
2236
   to be formatted so that multiple output lines will line up nicely.  */
2237
 
2238
static const char *
2239
rgn_print_insn (const_rtx insn, int aligned)
2240
{
2241
  static char tmp[80];
2242
 
2243
  if (aligned)
2244
    sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2245
  else
2246
    {
2247
      if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2248
        sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2249
      else
2250
        sprintf (tmp, "%d", INSN_UID (insn));
2251
    }
2252
  return tmp;
2253
}
2254
 
2255
/* Compare priority of two insns.  Return a positive number if the second
2256
   insn is to be preferred for scheduling, and a negative one if the first
2257
   is to be preferred.  Zero if they are equally good.  */
2258
 
2259
static int
2260
rgn_rank (rtx insn1, rtx insn2)
2261
{
2262
  /* Some comparison make sense in interblock scheduling only.  */
2263
  if (INSN_BB (insn1) != INSN_BB (insn2))
2264
    {
2265
      int spec_val, prob_val;
2266
 
2267
      /* Prefer an inblock motion on an interblock motion.  */
2268
      if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2269
        return 1;
2270
      if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2271
        return -1;
2272
 
2273
      /* Prefer a useful motion on a speculative one.  */
2274
      spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2275
      if (spec_val)
2276
        return spec_val;
2277
 
2278
      /* Prefer a more probable (speculative) insn.  */
2279
      prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2280
      if (prob_val)
2281
        return prob_val;
2282
    }
2283
  return 0;
2284
}
2285
 
2286
/* NEXT is an instruction that depends on INSN (a backward dependence);
2287
   return nonzero if we should include this dependence in priority
2288
   calculations.  */
2289
 
2290
int
2291
contributes_to_priority (rtx next, rtx insn)
2292
{
2293
  /* NEXT and INSN reside in one ebb.  */
2294
  return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn));
2295
}
2296
 
2297
/* INSN is a JUMP_INSN.  Store the set of registers that must be
2298
   considered as used by this jump in USED.  */
2299
 
2300
static void
2301
compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
2302
                               regset used ATTRIBUTE_UNUSED)
2303
{
2304
  /* Nothing to do here, since we postprocess jumps in
2305
     add_branch_dependences.  */
2306
}
2307
 
2308
/* This variable holds common_sched_info hooks and data relevant to
2309
   the interblock scheduler.  */
2310
static struct common_sched_info_def rgn_common_sched_info;
2311
 
2312
 
2313
/* This holds data for the dependence analysis relevant to
2314
   the interblock scheduler.  */
2315
static struct sched_deps_info_def rgn_sched_deps_info;
2316
 
2317
/* This holds constant data used for initializing the above structure
2318
   for the Haifa scheduler.  */
2319
static const struct sched_deps_info_def rgn_const_sched_deps_info =
2320
  {
2321
    compute_jump_reg_dependencies,
2322
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2323
    0, 0, 0
2324
  };
2325
 
2326
/* Same as above, but for the selective scheduler.  */
2327
static const struct sched_deps_info_def rgn_const_sel_sched_deps_info =
2328
  {
2329
    compute_jump_reg_dependencies,
2330
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2331
    0, 0, 0
2332
  };
2333
 
2334
/* Return true if scheduling INSN will trigger finish of scheduling
2335
   current block.  */
2336
static bool
2337
rgn_insn_finishes_block_p (rtx insn)
2338
{
2339
  if (INSN_BB (insn) == target_bb
2340
      && sched_target_n_insns + 1 == target_n_insns)
2341
    /* INSN is the last not-scheduled instruction in the current block.  */
2342
    return true;
2343
 
2344
  return false;
2345
}
2346
 
2347
/* Used in schedule_insns to initialize current_sched_info for scheduling
2348
   regions (or single basic blocks).  */
2349
 
2350
static const struct haifa_sched_info rgn_const_sched_info =
2351
{
2352
  init_ready_list,
2353
  can_schedule_ready_p,
2354
  schedule_more_p,
2355
  new_ready,
2356
  rgn_rank,
2357
  rgn_print_insn,
2358
  contributes_to_priority,
2359
  rgn_insn_finishes_block_p,
2360
 
2361
  NULL, NULL,
2362
  NULL, NULL,
2363
  0, 0,
2364
 
2365
  rgn_add_remove_insn,
2366
  begin_schedule_ready,
2367
  NULL,
2368
  advance_target_bb,
2369
  NULL, NULL,
2370
  SCHED_RGN
2371
};
2372
 
2373
/* This variable holds the data and hooks needed to the Haifa scheduler backend
2374
   for the interblock scheduler frontend.  */
2375
static struct haifa_sched_info rgn_sched_info;
2376
 
2377
/* Returns maximum priority that an insn was assigned to.  */
2378
 
2379
int
2380
get_rgn_sched_max_insns_priority (void)
2381
{
2382
  return rgn_sched_info.sched_max_insns_priority;
2383
}
2384
 
2385
/* Determine if PAT sets a TARGET_CLASS_LIKELY_SPILLED_P register.  */
2386
 
2387
static bool
2388
sets_likely_spilled (rtx pat)
2389
{
2390
  bool ret = false;
2391
  note_stores (pat, sets_likely_spilled_1, &ret);
2392
  return ret;
2393
}
2394
 
2395
static void
2396
sets_likely_spilled_1 (rtx x, const_rtx pat, void *data)
2397
{
2398
  bool *ret = (bool *) data;
2399
 
2400
  if (GET_CODE (pat) == SET
2401
      && REG_P (x)
2402
      && HARD_REGISTER_P (x)
2403
      && targetm.class_likely_spilled_p (REGNO_REG_CLASS (REGNO (x))))
2404
    *ret = true;
2405
}
2406
 
2407
/* A bitmap to note insns that participate in any dependency.  Used in
2408
   add_branch_dependences.  */
2409
static sbitmap insn_referenced;
2410
 
2411
/* Add dependences so that branches are scheduled to run last in their
2412
   block.  */
2413
static void
2414
add_branch_dependences (rtx head, rtx tail)
2415
{
2416
  rtx insn, last;
2417
 
2418
  /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2419
     that can throw exceptions, force them to remain in order at the end of
2420
     the block by adding dependencies and giving the last a high priority.
2421
     There may be notes present, and prev_head may also be a note.
2422
 
2423
     Branches must obviously remain at the end.  Calls should remain at the
2424
     end since moving them results in worse register allocation.  Uses remain
2425
     at the end to ensure proper register allocation.
2426
 
2427
     cc0 setters remain at the end because they can't be moved away from
2428
     their cc0 user.
2429
 
2430
     COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
2431
 
2432
     Insns setting TARGET_CLASS_LIKELY_SPILLED_P registers (usually return
2433
     values) are not moved before reload because we can wind up with register
2434
     allocation failures.  */
2435
 
2436
  while (tail != head && DEBUG_INSN_P (tail))
2437
    tail = PREV_INSN (tail);
2438
 
2439
  insn = tail;
2440
  last = 0;
2441
  while (CALL_P (insn)
2442
         || JUMP_P (insn)
2443
         || (NONJUMP_INSN_P (insn)
2444
             && (GET_CODE (PATTERN (insn)) == USE
2445
                 || GET_CODE (PATTERN (insn)) == CLOBBER
2446
                 || can_throw_internal (insn)
2447
#ifdef HAVE_cc0
2448
                 || sets_cc0_p (PATTERN (insn))
2449
#endif
2450
                 || (!reload_completed
2451
                     && sets_likely_spilled (PATTERN (insn)))))
2452
         || NOTE_P (insn))
2453
    {
2454
      if (!NOTE_P (insn))
2455
        {
2456
          if (last != 0
2457
              && sd_find_dep_between (insn, last, false) == NULL)
2458
            {
2459
              if (! sched_insns_conditions_mutex_p (last, insn))
2460
                add_dependence (last, insn, REG_DEP_ANTI);
2461
              SET_BIT (insn_referenced, INSN_LUID (insn));
2462
            }
2463
 
2464
          CANT_MOVE (insn) = 1;
2465
 
2466
          last = insn;
2467
        }
2468
 
2469
      /* Don't overrun the bounds of the basic block.  */
2470
      if (insn == head)
2471
        break;
2472
 
2473
      do
2474
        insn = PREV_INSN (insn);
2475
      while (insn != head && DEBUG_INSN_P (insn));
2476
    }
2477
 
2478
  /* Make sure these insns are scheduled last in their block.  */
2479
  insn = last;
2480
  if (insn != 0)
2481
    while (insn != head)
2482
      {
2483
        insn = prev_nonnote_insn (insn);
2484
 
2485
        if (TEST_BIT (insn_referenced, INSN_LUID (insn))
2486
            || DEBUG_INSN_P (insn))
2487
          continue;
2488
 
2489
        if (! sched_insns_conditions_mutex_p (last, insn))
2490
          add_dependence (last, insn, REG_DEP_ANTI);
2491
      }
2492
 
2493
  if (!targetm.have_conditional_execution ())
2494
    return;
2495
 
2496
  /* Finally, if the block ends in a jump, and we are doing intra-block
2497
     scheduling, make sure that the branch depends on any COND_EXEC insns
2498
     inside the block to avoid moving the COND_EXECs past the branch insn.
2499
 
2500
     We only have to do this after reload, because (1) before reload there
2501
     are no COND_EXEC insns, and (2) the region scheduler is an intra-block
2502
     scheduler after reload.
2503
 
2504
     FIXME: We could in some cases move COND_EXEC insns past the branch if
2505
     this scheduler would be a little smarter.  Consider this code:
2506
 
2507
                T = [addr]
2508
        C  ?    addr += 4
2509
        !C ?    X += 12
2510
        C  ?    T += 1
2511
        C  ?    jump foo
2512
 
2513
     On a target with a one cycle stall on a memory access the optimal
2514
     sequence would be:
2515
 
2516
                T = [addr]
2517
        C  ?    addr += 4
2518
        C  ?    T += 1
2519
        C  ?    jump foo
2520
        !C ?    X += 12
2521
 
2522
     We don't want to put the 'X += 12' before the branch because it just
2523
     wastes a cycle of execution time when the branch is taken.
2524
 
2525
     Note that in the example "!C" will always be true.  That is another
2526
     possible improvement for handling COND_EXECs in this scheduler: it
2527
     could remove always-true predicates.  */
2528
 
2529
  if (!reload_completed || ! JUMP_P (tail))
2530
    return;
2531
 
2532
  insn = tail;
2533
  while (insn != head)
2534
    {
2535
      insn = PREV_INSN (insn);
2536
 
2537
      /* Note that we want to add this dependency even when
2538
         sched_insns_conditions_mutex_p returns true.  The whole point
2539
         is that we _want_ this dependency, even if these insns really
2540
         are independent.  */
2541
      if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
2542
        add_dependence (tail, insn, REG_DEP_ANTI);
2543
    }
2544
}
2545
 
2546
/* Data structures for the computation of data dependences in a regions.  We
2547
   keep one `deps' structure for every basic block.  Before analyzing the
2548
   data dependences for a bb, its variables are initialized as a function of
2549
   the variables of its predecessors.  When the analysis for a bb completes,
2550
   we save the contents to the corresponding bb_deps[bb] variable.  */
2551
 
2552
static struct deps_desc *bb_deps;
2553
 
2554
static void
2555
concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
2556
                      rtx *old_mems_p)
2557
{
2558
  rtx new_insns = *old_insns_p;
2559
  rtx new_mems = *old_mems_p;
2560
 
2561
  while (copy_insns)
2562
    {
2563
      new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2564
      new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2565
      copy_insns = XEXP (copy_insns, 1);
2566
      copy_mems = XEXP (copy_mems, 1);
2567
    }
2568
 
2569
  *old_insns_p = new_insns;
2570
  *old_mems_p = new_mems;
2571
}
2572
 
2573
/* Join PRED_DEPS to the SUCC_DEPS.  */
2574
void
2575
deps_join (struct deps_desc *succ_deps, struct deps_desc *pred_deps)
2576
{
2577
  unsigned reg;
2578
  reg_set_iterator rsi;
2579
 
2580
  /* The reg_last lists are inherited by successor.  */
2581
  EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2582
    {
2583
      struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2584
      struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2585
 
2586
      succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2587
      succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2588
      succ_rl->implicit_sets
2589
        = concat_INSN_LIST (pred_rl->implicit_sets, succ_rl->implicit_sets);
2590
      succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2591
                                            succ_rl->clobbers);
2592
      succ_rl->uses_length += pred_rl->uses_length;
2593
      succ_rl->clobbers_length += pred_rl->clobbers_length;
2594
    }
2595
  IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2596
 
2597
  /* Mem read/write lists are inherited by successor.  */
2598
  concat_insn_mem_list (pred_deps->pending_read_insns,
2599
                        pred_deps->pending_read_mems,
2600
                        &succ_deps->pending_read_insns,
2601
                        &succ_deps->pending_read_mems);
2602
  concat_insn_mem_list (pred_deps->pending_write_insns,
2603
                        pred_deps->pending_write_mems,
2604
                        &succ_deps->pending_write_insns,
2605
                        &succ_deps->pending_write_mems);
2606
 
2607
  succ_deps->pending_jump_insns
2608
    = concat_INSN_LIST (pred_deps->pending_jump_insns,
2609
                        succ_deps->pending_jump_insns);
2610
  succ_deps->last_pending_memory_flush
2611
    = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2612
                        succ_deps->last_pending_memory_flush);
2613
 
2614
  succ_deps->pending_read_list_length += pred_deps->pending_read_list_length;
2615
  succ_deps->pending_write_list_length += pred_deps->pending_write_list_length;
2616
  succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2617
 
2618
  /* last_function_call is inherited by successor.  */
2619
  succ_deps->last_function_call
2620
    = concat_INSN_LIST (pred_deps->last_function_call,
2621
                        succ_deps->last_function_call);
2622
 
2623
  /* last_function_call_may_noreturn is inherited by successor.  */
2624
  succ_deps->last_function_call_may_noreturn
2625
    = concat_INSN_LIST (pred_deps->last_function_call_may_noreturn,
2626
                        succ_deps->last_function_call_may_noreturn);
2627
 
2628
  /* sched_before_next_call is inherited by successor.  */
2629
  succ_deps->sched_before_next_call
2630
    = concat_INSN_LIST (pred_deps->sched_before_next_call,
2631
                        succ_deps->sched_before_next_call);
2632
}
2633
 
2634
/* After computing the dependencies for block BB, propagate the dependencies
2635
   found in TMP_DEPS to the successors of the block.  */
2636
static void
2637
propagate_deps (int bb, struct deps_desc *pred_deps)
2638
{
2639
  basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb));
2640
  edge_iterator ei;
2641
  edge e;
2642
 
2643
  /* bb's structures are inherited by its successors.  */
2644
  FOR_EACH_EDGE (e, ei, block->succs)
2645
    {
2646
      /* Only bbs "below" bb, in the same region, are interesting.  */
2647
      if (e->dest == EXIT_BLOCK_PTR
2648
          || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2649
          || BLOCK_TO_BB (e->dest->index) <= bb)
2650
        continue;
2651
 
2652
      deps_join (bb_deps + BLOCK_TO_BB (e->dest->index), pred_deps);
2653
    }
2654
 
2655
  /* These lists should point to the right place, for correct
2656
     freeing later.  */
2657
  bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2658
  bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2659
  bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2660
  bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2661
  bb_deps[bb].pending_jump_insns = pred_deps->pending_jump_insns;
2662
 
2663
  /* Can't allow these to be freed twice.  */
2664
  pred_deps->pending_read_insns = 0;
2665
  pred_deps->pending_read_mems = 0;
2666
  pred_deps->pending_write_insns = 0;
2667
  pred_deps->pending_write_mems = 0;
2668
  pred_deps->pending_jump_insns = 0;
2669
}
2670
 
2671
/* Compute dependences inside bb.  In a multiple blocks region:
2672
   (1) a bb is analyzed after its predecessors, and (2) the lists in
2673
   effect at the end of bb (after analyzing for bb) are inherited by
2674
   bb's successors.
2675
 
2676
   Specifically for reg-reg data dependences, the block insns are
2677
   scanned by sched_analyze () top-to-bottom.  Three lists are
2678
   maintained by sched_analyze (): reg_last[].sets for register DEFs,
2679
   reg_last[].implicit_sets for implicit hard register DEFs, and
2680
   reg_last[].uses for register USEs.
2681
 
2682
   When analysis is completed for bb, we update for its successors:
2683
   ;  - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2684
   ;  - IMPLICIT_DEFS[succ] = Union (IMPLICIT_DEFS [succ], IMPLICIT_DEFS [bb])
2685
   ;  - USES[succ] = Union (USES [succ], DEFS [bb])
2686
 
2687
   The mechanism for computing mem-mem data dependence is very
2688
   similar, and the result is interblock dependences in the region.  */
2689
 
2690
static void
2691
compute_block_dependences (int bb)
2692
{
2693
  rtx head, tail;
2694
  struct deps_desc tmp_deps;
2695
 
2696
  tmp_deps = bb_deps[bb];
2697
 
2698
  /* Do the analysis for this block.  */
2699
  gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2700
  get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2701
 
2702
  sched_analyze (&tmp_deps, head, tail);
2703
 
2704
  /* Selective scheduling handles control dependencies by itself.  */
2705
  if (!sel_sched_p ())
2706
    add_branch_dependences (head, tail);
2707
 
2708
  if (current_nr_blocks > 1)
2709
    propagate_deps (bb, &tmp_deps);
2710
 
2711
  /* Free up the INSN_LISTs.  */
2712
  free_deps (&tmp_deps);
2713
 
2714
  if (targetm.sched.dependencies_evaluation_hook)
2715
    targetm.sched.dependencies_evaluation_hook (head, tail);
2716
}
2717
 
2718
/* Free dependencies of instructions inside BB.  */
2719
static void
2720
free_block_dependencies (int bb)
2721
{
2722
  rtx head;
2723
  rtx tail;
2724
 
2725
  get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2726
 
2727
  if (no_real_insns_p (head, tail))
2728
    return;
2729
 
2730
  sched_free_deps (head, tail, true);
2731
}
2732
 
2733
/* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2734
   them to the unused_*_list variables, so that they can be reused.  */
2735
 
2736
static void
2737
free_pending_lists (void)
2738
{
2739
  int bb;
2740
 
2741
  for (bb = 0; bb < current_nr_blocks; bb++)
2742
    {
2743
      free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2744
      free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2745
      free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2746
      free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2747
      free_INSN_LIST_list (&bb_deps[bb].pending_jump_insns);
2748
    }
2749
}
2750
 
2751
/* Print dependences for debugging starting from FROM_BB.
2752
   Callable from debugger.  */
2753
/* Print dependences for debugging starting from FROM_BB.
2754
   Callable from debugger.  */
2755
DEBUG_FUNCTION void
2756
debug_rgn_dependencies (int from_bb)
2757
{
2758
  int bb;
2759
 
2760
  fprintf (sched_dump,
2761
           ";;   --------------- forward dependences: ------------ \n");
2762
 
2763
  for (bb = from_bb; bb < current_nr_blocks; bb++)
2764
    {
2765
      rtx head, tail;
2766
 
2767
      get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2768
      fprintf (sched_dump, "\n;;   --- Region Dependences --- b %d bb %d \n",
2769
               BB_TO_BLOCK (bb), bb);
2770
 
2771
      debug_dependencies (head, tail);
2772
    }
2773
}
2774
 
2775
/* Print dependencies information for instructions between HEAD and TAIL.
2776
   ??? This function would probably fit best in haifa-sched.c.  */
2777
void debug_dependencies (rtx head, rtx tail)
2778
{
2779
  rtx insn;
2780
  rtx next_tail = NEXT_INSN (tail);
2781
 
2782
  fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%14s\n",
2783
           "insn", "code", "bb", "dep", "prio", "cost",
2784
           "reservation");
2785
  fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%14s\n",
2786
           "----", "----", "--", "---", "----", "----",
2787
           "-----------");
2788
 
2789
  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2790
    {
2791
      if (! INSN_P (insn))
2792
        {
2793
          int n;
2794
          fprintf (sched_dump, ";;   %6d ", INSN_UID (insn));
2795
          if (NOTE_P (insn))
2796
            {
2797
              n = NOTE_KIND (insn);
2798
              fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2799
            }
2800
          else
2801
            fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2802
          continue;
2803
        }
2804
 
2805
      fprintf (sched_dump,
2806
               ";;   %s%5d%6d%6d%6d%6d%6d   ",
2807
               (SCHED_GROUP_P (insn) ? "+" : " "),
2808
               INSN_UID (insn),
2809
               INSN_CODE (insn),
2810
               BLOCK_NUM (insn),
2811
               sched_emulate_haifa_p ? -1 : sd_lists_size (insn, SD_LIST_BACK),
2812
               (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2813
                               : INSN_PRIORITY (insn))
2814
                : INSN_PRIORITY (insn)),
2815
               (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2816
                               : insn_cost (insn))
2817
                : insn_cost (insn)));
2818
 
2819
      if (recog_memoized (insn) < 0)
2820
        fprintf (sched_dump, "nothing");
2821
      else
2822
        print_reservation (sched_dump, insn);
2823
 
2824
      fprintf (sched_dump, "\t: ");
2825
      {
2826
        sd_iterator_def sd_it;
2827
        dep_t dep;
2828
 
2829
        FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
2830
          fprintf (sched_dump, "%d ", INSN_UID (DEP_CON (dep)));
2831
      }
2832
      fprintf (sched_dump, "\n");
2833
    }
2834
 
2835
  fprintf (sched_dump, "\n");
2836
}
2837
 
2838
/* Returns true if all the basic blocks of the current region have
2839
   NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region.  */
2840
bool
2841
sched_is_disabled_for_current_region_p (void)
2842
{
2843
  int bb;
2844
 
2845
  for (bb = 0; bb < current_nr_blocks; bb++)
2846
    if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2847
      return false;
2848
 
2849
  return true;
2850
}
2851
 
2852
/* Free all region dependencies saved in INSN_BACK_DEPS and
2853
   INSN_RESOLVED_BACK_DEPS.  The Haifa scheduler does this on the fly
2854
   when scheduling, so this function is supposed to be called from
2855
   the selective scheduling only.  */
2856
void
2857
free_rgn_deps (void)
2858
{
2859
  int bb;
2860
 
2861
  for (bb = 0; bb < current_nr_blocks; bb++)
2862
    {
2863
      rtx head, tail;
2864
 
2865
      gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2866
      get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2867
 
2868
      sched_free_deps (head, tail, false);
2869
    }
2870
}
2871
 
2872
static int rgn_n_insns;
2873
 
2874
/* Compute insn priority for a current region.  */
2875
void
2876
compute_priorities (void)
2877
{
2878
  int bb;
2879
 
2880
  current_sched_info->sched_max_insns_priority = 0;
2881
  for (bb = 0; bb < current_nr_blocks; bb++)
2882
    {
2883
      rtx head, tail;
2884
 
2885
      gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2886
      get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2887
 
2888
      if (no_real_insns_p (head, tail))
2889
        continue;
2890
 
2891
      rgn_n_insns += set_priorities (head, tail);
2892
    }
2893
  current_sched_info->sched_max_insns_priority++;
2894
}
2895
 
2896
/* Schedule a region.  A region is either an inner loop, a loop-free
2897
   subroutine, or a single basic block.  Each bb in the region is
2898
   scheduled after its flow predecessors.  */
2899
 
2900
static void
2901
schedule_region (int rgn)
2902
{
2903
  int bb;
2904
  int sched_rgn_n_insns = 0;
2905
 
2906
  rgn_n_insns = 0;
2907
 
2908
  rgn_setup_region (rgn);
2909
 
2910
  /* Don't schedule region that is marked by
2911
     NOTE_DISABLE_SCHED_OF_BLOCK.  */
2912
  if (sched_is_disabled_for_current_region_p ())
2913
    return;
2914
 
2915
  sched_rgn_compute_dependencies (rgn);
2916
 
2917
  sched_rgn_local_init (rgn);
2918
 
2919
  /* Set priorities.  */
2920
  compute_priorities ();
2921
 
2922
  sched_extend_ready_list (rgn_n_insns);
2923
 
2924
  if (sched_pressure_p)
2925
    {
2926
      sched_init_region_reg_pressure_info ();
2927
      for (bb = 0; bb < current_nr_blocks; bb++)
2928
        {
2929
          basic_block first_bb, last_bb;
2930
          rtx head, tail;
2931
 
2932
          first_bb = EBB_FIRST_BB (bb);
2933
          last_bb = EBB_LAST_BB (bb);
2934
 
2935
          get_ebb_head_tail (first_bb, last_bb, &head, &tail);
2936
 
2937
          if (no_real_insns_p (head, tail))
2938
            {
2939
              gcc_assert (first_bb == last_bb);
2940
              continue;
2941
            }
2942
          sched_setup_bb_reg_pressure_info (first_bb, PREV_INSN (head));
2943
        }
2944
    }
2945
 
2946
  /* Now we can schedule all blocks.  */
2947
  for (bb = 0; bb < current_nr_blocks; bb++)
2948
    {
2949
      basic_block first_bb, last_bb, curr_bb;
2950
      rtx head, tail;
2951
 
2952
      first_bb = EBB_FIRST_BB (bb);
2953
      last_bb = EBB_LAST_BB (bb);
2954
 
2955
      get_ebb_head_tail (first_bb, last_bb, &head, &tail);
2956
 
2957
      if (no_real_insns_p (head, tail))
2958
        {
2959
          gcc_assert (first_bb == last_bb);
2960
          continue;
2961
        }
2962
 
2963
      current_sched_info->prev_head = PREV_INSN (head);
2964
      current_sched_info->next_tail = NEXT_INSN (tail);
2965
 
2966
      remove_notes (head, tail);
2967
 
2968
      unlink_bb_notes (first_bb, last_bb);
2969
 
2970
      target_bb = bb;
2971
 
2972
      gcc_assert (flag_schedule_interblock || current_nr_blocks == 1);
2973
      current_sched_info->queue_must_finish_empty = current_nr_blocks == 1;
2974
 
2975
      curr_bb = first_bb;
2976
      if (dbg_cnt (sched_block))
2977
        {
2978
          schedule_block (&curr_bb);
2979
          gcc_assert (EBB_FIRST_BB (bb) == first_bb);
2980
          sched_rgn_n_insns += sched_n_insns;
2981
        }
2982
      else
2983
        {
2984
          sched_rgn_n_insns += rgn_n_insns;
2985
        }
2986
 
2987
      /* Clean up.  */
2988
      if (current_nr_blocks > 1)
2989
        free_trg_info ();
2990
    }
2991
 
2992
  /* Sanity check: verify that all region insns were scheduled.  */
2993
  gcc_assert (sched_rgn_n_insns == rgn_n_insns);
2994
 
2995
  sched_finish_ready_list ();
2996
 
2997
  /* Done with this region.  */
2998
  sched_rgn_local_finish ();
2999
 
3000
  /* Free dependencies.  */
3001
  for (bb = 0; bb < current_nr_blocks; ++bb)
3002
    free_block_dependencies (bb);
3003
 
3004
  gcc_assert (haifa_recovery_bb_ever_added_p
3005
              || deps_pools_are_empty_p ());
3006
}
3007
 
3008
/* Initialize data structures for region scheduling.  */
3009
 
3010
void
3011
sched_rgn_init (bool single_blocks_p)
3012
{
3013
  min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE)
3014
                    / 100);
3015
 
3016
  nr_inter = 0;
3017
  nr_spec = 0;
3018
 
3019
  extend_regions ();
3020
 
3021
  CONTAINING_RGN (ENTRY_BLOCK) = -1;
3022
  CONTAINING_RGN (EXIT_BLOCK) = -1;
3023
 
3024
  /* Compute regions for scheduling.  */
3025
  if (single_blocks_p
3026
      || n_basic_blocks == NUM_FIXED_BLOCKS + 1
3027
      || !flag_schedule_interblock
3028
      || is_cfg_nonregular ())
3029
    {
3030
      find_single_block_region (sel_sched_p ());
3031
    }
3032
  else
3033
    {
3034
      /* Compute the dominators and post dominators.  */
3035
      if (!sel_sched_p ())
3036
        calculate_dominance_info (CDI_DOMINATORS);
3037
 
3038
      /* Find regions.  */
3039
      find_rgns ();
3040
 
3041
      if (sched_verbose >= 3)
3042
        debug_regions ();
3043
 
3044
      /* For now.  This will move as more and more of haifa is converted
3045
         to using the cfg code.  */
3046
      if (!sel_sched_p ())
3047
        free_dominance_info (CDI_DOMINATORS);
3048
    }
3049
 
3050
  gcc_assert (0 < nr_regions && nr_regions <= n_basic_blocks);
3051
 
3052
  RGN_BLOCKS (nr_regions) = (RGN_BLOCKS (nr_regions - 1) +
3053
                             RGN_NR_BLOCKS (nr_regions - 1));
3054
}
3055
 
3056
/* Free data structures for region scheduling.  */
3057
void
3058
sched_rgn_finish (void)
3059
{
3060
  /* Reposition the prologue and epilogue notes in case we moved the
3061
     prologue/epilogue insns.  */
3062
  if (reload_completed)
3063
    reposition_prologue_and_epilogue_notes ();
3064
 
3065
  if (sched_verbose)
3066
    {
3067
      if (reload_completed == 0
3068
          && flag_schedule_interblock)
3069
        {
3070
          fprintf (sched_dump,
3071
                   "\n;; Procedure interblock/speculative motions == %d/%d \n",
3072
                   nr_inter, nr_spec);
3073
        }
3074
      else
3075
        gcc_assert (nr_inter <= 0);
3076
      fprintf (sched_dump, "\n\n");
3077
    }
3078
 
3079
  nr_regions = 0;
3080
 
3081
  free (rgn_table);
3082
  rgn_table = NULL;
3083
 
3084
  free (rgn_bb_table);
3085
  rgn_bb_table = NULL;
3086
 
3087
  free (block_to_bb);
3088
  block_to_bb = NULL;
3089
 
3090
  free (containing_rgn);
3091
  containing_rgn = NULL;
3092
 
3093
  free (ebb_head);
3094
  ebb_head = NULL;
3095
}
3096
 
3097
/* Setup global variables like CURRENT_BLOCKS and CURRENT_NR_BLOCK to
3098
   point to the region RGN.  */
3099
void
3100
rgn_setup_region (int rgn)
3101
{
3102
  int bb;
3103
 
3104
  /* Set variables for the current region.  */
3105
  current_nr_blocks = RGN_NR_BLOCKS (rgn);
3106
  current_blocks = RGN_BLOCKS (rgn);
3107
 
3108
  /* EBB_HEAD is a region-scope structure.  But we realloc it for
3109
     each region to save time/memory/something else.
3110
     See comments in add_block1, for what reasons we allocate +1 element.  */
3111
  ebb_head = XRESIZEVEC (int, ebb_head, current_nr_blocks + 1);
3112
  for (bb = 0; bb <= current_nr_blocks; bb++)
3113
    ebb_head[bb] = current_blocks + bb;
3114
}
3115
 
3116
/* Compute instruction dependencies in region RGN.  */
3117
void
3118
sched_rgn_compute_dependencies (int rgn)
3119
{
3120
  if (!RGN_DONT_CALC_DEPS (rgn))
3121
    {
3122
      int bb;
3123
 
3124
      if (sel_sched_p ())
3125
        sched_emulate_haifa_p = 1;
3126
 
3127
      init_deps_global ();
3128
 
3129
      /* Initializations for region data dependence analysis.  */
3130
      bb_deps = XNEWVEC (struct deps_desc, current_nr_blocks);
3131
      for (bb = 0; bb < current_nr_blocks; bb++)
3132
        init_deps (bb_deps + bb, false);
3133
 
3134
      /* Initialize bitmap used in add_branch_dependences.  */
3135
      insn_referenced = sbitmap_alloc (sched_max_luid);
3136
      sbitmap_zero (insn_referenced);
3137
 
3138
      /* Compute backward dependencies.  */
3139
      for (bb = 0; bb < current_nr_blocks; bb++)
3140
        compute_block_dependences (bb);
3141
 
3142
      sbitmap_free (insn_referenced);
3143
      free_pending_lists ();
3144
      finish_deps_global ();
3145
      free (bb_deps);
3146
 
3147
      /* We don't want to recalculate this twice.  */
3148
      RGN_DONT_CALC_DEPS (rgn) = 1;
3149
 
3150
      if (sel_sched_p ())
3151
        sched_emulate_haifa_p = 0;
3152
    }
3153
  else
3154
    /* (This is a recovery block.  It is always a single block region.)
3155
       OR (We use selective scheduling.)  */
3156
    gcc_assert (current_nr_blocks == 1 || sel_sched_p ());
3157
}
3158
 
3159
/* Init region data structures.  Returns true if this region should
3160
   not be scheduled.  */
3161
void
3162
sched_rgn_local_init (int rgn)
3163
{
3164
  int bb;
3165
 
3166
  /* Compute interblock info: probabilities, split-edges, dominators, etc.  */
3167
  if (current_nr_blocks > 1)
3168
    {
3169
      basic_block block;
3170
      edge e;
3171
      edge_iterator ei;
3172
 
3173
      prob = XNEWVEC (int, current_nr_blocks);
3174
 
3175
      dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
3176
      sbitmap_vector_zero (dom, current_nr_blocks);
3177
 
3178
      /* Use ->aux to implement EDGE_TO_BIT mapping.  */
3179
      rgn_nr_edges = 0;
3180
      FOR_EACH_BB (block)
3181
        {
3182
          if (CONTAINING_RGN (block->index) != rgn)
3183
            continue;
3184
          FOR_EACH_EDGE (e, ei, block->succs)
3185
            SET_EDGE_TO_BIT (e, rgn_nr_edges++);
3186
        }
3187
 
3188
      rgn_edges = XNEWVEC (edge, rgn_nr_edges);
3189
      rgn_nr_edges = 0;
3190
      FOR_EACH_BB (block)
3191
        {
3192
          if (CONTAINING_RGN (block->index) != rgn)
3193
            continue;
3194
          FOR_EACH_EDGE (e, ei, block->succs)
3195
            rgn_edges[rgn_nr_edges++] = e;
3196
        }
3197
 
3198
      /* Split edges.  */
3199
      pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3200
      sbitmap_vector_zero (pot_split, current_nr_blocks);
3201
      ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3202
      sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
3203
 
3204
      /* Compute probabilities, dominators, split_edges.  */
3205
      for (bb = 0; bb < current_nr_blocks; bb++)
3206
        compute_dom_prob_ps (bb);
3207
 
3208
      /* Cleanup ->aux used for EDGE_TO_BIT mapping.  */
3209
      /* We don't need them anymore.  But we want to avoid duplication of
3210
         aux fields in the newly created edges.  */
3211
      FOR_EACH_BB (block)
3212
        {
3213
          if (CONTAINING_RGN (block->index) != rgn)
3214
            continue;
3215
          FOR_EACH_EDGE (e, ei, block->succs)
3216
            e->aux = NULL;
3217
        }
3218
    }
3219
}
3220
 
3221
/* Free data computed for the finished region.  */
3222
void
3223
sched_rgn_local_free (void)
3224
{
3225
  free (prob);
3226
  sbitmap_vector_free (dom);
3227
  sbitmap_vector_free (pot_split);
3228
  sbitmap_vector_free (ancestor_edges);
3229
  free (rgn_edges);
3230
}
3231
 
3232
/* Free data computed for the finished region.  */
3233
void
3234
sched_rgn_local_finish (void)
3235
{
3236
  if (current_nr_blocks > 1 && !sel_sched_p ())
3237
    {
3238
      sched_rgn_local_free ();
3239
    }
3240
}
3241
 
3242
/* Setup scheduler infos.  */
3243
void
3244
rgn_setup_common_sched_info (void)
3245
{
3246
  memcpy (&rgn_common_sched_info, &haifa_common_sched_info,
3247
          sizeof (rgn_common_sched_info));
3248
 
3249
  rgn_common_sched_info.fix_recovery_cfg = rgn_fix_recovery_cfg;
3250
  rgn_common_sched_info.add_block = rgn_add_block;
3251
  rgn_common_sched_info.estimate_number_of_insns
3252
    = rgn_estimate_number_of_insns;
3253
  rgn_common_sched_info.sched_pass_id = SCHED_RGN_PASS;
3254
 
3255
  common_sched_info = &rgn_common_sched_info;
3256
}
3257
 
3258
/* Setup all *_sched_info structures (for the Haifa frontend
3259
   and for the dependence analysis) in the interblock scheduler.  */
3260
void
3261
rgn_setup_sched_infos (void)
3262
{
3263
  if (!sel_sched_p ())
3264
    memcpy (&rgn_sched_deps_info, &rgn_const_sched_deps_info,
3265
            sizeof (rgn_sched_deps_info));
3266
  else
3267
    memcpy (&rgn_sched_deps_info, &rgn_const_sel_sched_deps_info,
3268
            sizeof (rgn_sched_deps_info));
3269
 
3270
  sched_deps_info = &rgn_sched_deps_info;
3271
 
3272
  memcpy (&rgn_sched_info, &rgn_const_sched_info, sizeof (rgn_sched_info));
3273
  current_sched_info = &rgn_sched_info;
3274
}
3275
 
3276
/* The one entry point in this file.  */
3277
void
3278
schedule_insns (void)
3279
{
3280
  int rgn;
3281
 
3282
  /* Taking care of this degenerate case makes the rest of
3283
     this code simpler.  */
3284
  if (n_basic_blocks == NUM_FIXED_BLOCKS)
3285
    return;
3286
 
3287
  rgn_setup_common_sched_info ();
3288
  rgn_setup_sched_infos ();
3289
 
3290
  haifa_sched_init ();
3291
  sched_rgn_init (reload_completed);
3292
 
3293
  bitmap_initialize (&not_in_df, 0);
3294
  bitmap_clear (&not_in_df);
3295
 
3296
  /* Schedule every region in the subroutine.  */
3297
  for (rgn = 0; rgn < nr_regions; rgn++)
3298
    if (dbg_cnt (sched_region))
3299
      schedule_region (rgn);
3300
 
3301
  /* Clean up.  */
3302
  sched_rgn_finish ();
3303
  bitmap_clear (&not_in_df);
3304
 
3305
  haifa_sched_finish ();
3306
}
3307
 
3308
/* INSN has been added to/removed from current region.  */
3309
static void
3310
rgn_add_remove_insn (rtx insn, int remove_p)
3311
{
3312
  if (!remove_p)
3313
    rgn_n_insns++;
3314
  else
3315
    rgn_n_insns--;
3316
 
3317
  if (INSN_BB (insn) == target_bb)
3318
    {
3319
      if (!remove_p)
3320
        target_n_insns++;
3321
      else
3322
        target_n_insns--;
3323
    }
3324
}
3325
 
3326
/* Extend internal data structures.  */
3327
void
3328
extend_regions (void)
3329
{
3330
  rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks);
3331
  rgn_bb_table = XRESIZEVEC (int, rgn_bb_table, n_basic_blocks);
3332
  block_to_bb = XRESIZEVEC (int, block_to_bb, last_basic_block);
3333
  containing_rgn = XRESIZEVEC (int, containing_rgn, last_basic_block);
3334
}
3335
 
3336
void
3337
rgn_make_new_region_out_of_new_block (basic_block bb)
3338
{
3339
  int i;
3340
 
3341
  i = RGN_BLOCKS (nr_regions);
3342
  /* I - first free position in rgn_bb_table.  */
3343
 
3344
  rgn_bb_table[i] = bb->index;
3345
  RGN_NR_BLOCKS (nr_regions) = 1;
3346
  RGN_HAS_REAL_EBB (nr_regions) = 0;
3347
  RGN_DONT_CALC_DEPS (nr_regions) = 0;
3348
  CONTAINING_RGN (bb->index) = nr_regions;
3349
  BLOCK_TO_BB (bb->index) = 0;
3350
 
3351
  nr_regions++;
3352
 
3353
  RGN_BLOCKS (nr_regions) = i + 1;
3354
}
3355
 
3356
/* BB was added to ebb after AFTER.  */
3357
static void
3358
rgn_add_block (basic_block bb, basic_block after)
3359
{
3360
  extend_regions ();
3361
  bitmap_set_bit (&not_in_df, bb->index);
3362
 
3363
  if (after == 0 || after == EXIT_BLOCK_PTR)
3364
    {
3365
      rgn_make_new_region_out_of_new_block (bb);
3366
      RGN_DONT_CALC_DEPS (nr_regions - 1) = (after == EXIT_BLOCK_PTR);
3367
    }
3368
  else
3369
    {
3370
      int i, pos;
3371
 
3372
      /* We need to fix rgn_table, block_to_bb, containing_rgn
3373
         and ebb_head.  */
3374
 
3375
      BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index);
3376
 
3377
      /* We extend ebb_head to one more position to
3378
         easily find the last position of the last ebb in
3379
         the current region.  Thus, ebb_head[BLOCK_TO_BB (after) + 1]
3380
         is _always_ valid for access.  */
3381
 
3382
      i = BLOCK_TO_BB (after->index) + 1;
3383
      pos = ebb_head[i] - 1;
3384
      /* Now POS is the index of the last block in the region.  */
3385
 
3386
      /* Find index of basic block AFTER.  */
3387
      for (; rgn_bb_table[pos] != after->index; pos--)
3388
        ;
3389
 
3390
      pos++;
3391
      gcc_assert (pos > ebb_head[i - 1]);
3392
 
3393
      /* i - ebb right after "AFTER".  */
3394
      /* ebb_head[i] - VALID.  */
3395
 
3396
      /* Source position: ebb_head[i]
3397
         Destination position: ebb_head[i] + 1
3398
         Last position:
3399
           RGN_BLOCKS (nr_regions) - 1
3400
         Number of elements to copy: (last_position) - (source_position) + 1
3401
       */
3402
 
3403
      memmove (rgn_bb_table + pos + 1,
3404
               rgn_bb_table + pos,
3405
               ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1)
3406
               * sizeof (*rgn_bb_table));
3407
 
3408
      rgn_bb_table[pos] = bb->index;
3409
 
3410
      for (; i <= current_nr_blocks; i++)
3411
        ebb_head [i]++;
3412
 
3413
      i = CONTAINING_RGN (after->index);
3414
      CONTAINING_RGN (bb->index) = i;
3415
 
3416
      RGN_HAS_REAL_EBB (i) = 1;
3417
 
3418
      for (++i; i <= nr_regions; i++)
3419
        RGN_BLOCKS (i)++;
3420
    }
3421
}
3422
 
3423
/* Fix internal data after interblock movement of jump instruction.
3424
   For parameter meaning please refer to
3425
   sched-int.h: struct sched_info: fix_recovery_cfg.  */
3426
static void
3427
rgn_fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti)
3428
{
3429
  int old_pos, new_pos, i;
3430
 
3431
  BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi);
3432
 
3433
  for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1;
3434
       rgn_bb_table[old_pos] != check_bb_nexti;
3435
       old_pos--)
3436
    ;
3437
  gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]);
3438
 
3439
  for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1;
3440
       rgn_bb_table[new_pos] != bbi;
3441
       new_pos--)
3442
    ;
3443
  new_pos++;
3444
  gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]);
3445
 
3446
  gcc_assert (new_pos < old_pos);
3447
 
3448
  memmove (rgn_bb_table + new_pos + 1,
3449
           rgn_bb_table + new_pos,
3450
           (old_pos - new_pos) * sizeof (*rgn_bb_table));
3451
 
3452
  rgn_bb_table[new_pos] = check_bb_nexti;
3453
 
3454
  for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++)
3455
    ebb_head[i]++;
3456
}
3457
 
3458
/* Return next block in ebb chain.  For parameter meaning please refer to
3459
   sched-int.h: struct sched_info: advance_target_bb.  */
3460
static basic_block
3461
advance_target_bb (basic_block bb, rtx insn)
3462
{
3463
  if (insn)
3464
    return 0;
3465
 
3466
  gcc_assert (BLOCK_TO_BB (bb->index) == target_bb
3467
              && BLOCK_TO_BB (bb->next_bb->index) == target_bb);
3468
  return bb->next_bb;
3469
}
3470
 
3471
#endif
3472
 
3473
static bool
3474
gate_handle_sched (void)
3475
{
3476
#ifdef INSN_SCHEDULING
3477
  return flag_schedule_insns && dbg_cnt (sched_func);
3478
#else
3479
  return 0;
3480
#endif
3481
}
3482
 
3483
/* Run instruction scheduler.  */
3484
static unsigned int
3485
rest_of_handle_sched (void)
3486
{
3487
#ifdef INSN_SCHEDULING
3488
  if (flag_selective_scheduling
3489
      && ! maybe_skip_selective_scheduling ())
3490
    run_selective_scheduling ();
3491
  else
3492
    schedule_insns ();
3493
#endif
3494
  return 0;
3495
}
3496
 
3497
static bool
3498
gate_handle_sched2 (void)
3499
{
3500
#ifdef INSN_SCHEDULING
3501
  return optimize > 0 && flag_schedule_insns_after_reload
3502
    && !targetm.delay_sched2 && dbg_cnt (sched2_func);
3503
#else
3504
  return 0;
3505
#endif
3506
}
3507
 
3508
/* Run second scheduling pass after reload.  */
3509
static unsigned int
3510
rest_of_handle_sched2 (void)
3511
{
3512
#ifdef INSN_SCHEDULING
3513
  if (flag_selective_scheduling2
3514
      && ! maybe_skip_selective_scheduling ())
3515
    run_selective_scheduling ();
3516
  else
3517
    {
3518
      /* Do control and data sched analysis again,
3519
         and write some more of the results to dump file.  */
3520
      if (flag_sched2_use_superblocks)
3521
        schedule_ebbs ();
3522
      else
3523
        schedule_insns ();
3524
    }
3525
#endif
3526
  return 0;
3527
}
3528
 
3529
struct rtl_opt_pass pass_sched =
3530
{
3531
 {
3532
  RTL_PASS,
3533
  "sched1",                             /* name */
3534
  gate_handle_sched,                    /* gate */
3535
  rest_of_handle_sched,                 /* execute */
3536
  NULL,                                 /* sub */
3537
  NULL,                                 /* next */
3538
  0,                                    /* static_pass_number */
3539
  TV_SCHED,                             /* tv_id */
3540
  0,                                    /* properties_required */
3541
  0,                                    /* properties_provided */
3542
  0,                                    /* properties_destroyed */
3543
  0,                                    /* todo_flags_start */
3544
  TODO_df_finish | TODO_verify_rtl_sharing |
3545
  TODO_verify_flow |
3546
  TODO_ggc_collect                      /* todo_flags_finish */
3547
 }
3548
};
3549
 
3550
struct rtl_opt_pass pass_sched2 =
3551
{
3552
 {
3553
  RTL_PASS,
3554
  "sched2",                             /* name */
3555
  gate_handle_sched2,                   /* gate */
3556
  rest_of_handle_sched2,                /* execute */
3557
  NULL,                                 /* sub */
3558
  NULL,                                 /* next */
3559
  0,                                    /* static_pass_number */
3560
  TV_SCHED2,                            /* tv_id */
3561
  0,                                    /* properties_required */
3562
  0,                                    /* properties_provided */
3563
  0,                                    /* properties_destroyed */
3564
  0,                                    /* todo_flags_start */
3565
  TODO_df_finish | TODO_verify_rtl_sharing |
3566
  TODO_verify_flow |
3567
  TODO_ggc_collect                      /* todo_flags_finish */
3568
 }
3569
};

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.