OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [modulo-sched.c] - Blame information for rev 300

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Swing Modulo Scheduling implementation.
2
   Copyright (C) 2004, 2005, 2006, 2007, 2008
3
   Free Software Foundation, Inc.
4
   Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
5
 
6
This file is part of GCC.
7
 
8
GCC is free software; you can redistribute it and/or modify it under
9
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 3, or (at your option) any later
11
version.
12
 
13
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14
WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16
for more details.
17
 
18
You should have received a copy of the GNU General Public License
19
along with GCC; see the file COPYING3.  If not see
20
<http://www.gnu.org/licenses/>.  */
21
 
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "toplev.h"
28
#include "rtl.h"
29
#include "tm_p.h"
30
#include "hard-reg-set.h"
31
#include "regs.h"
32
#include "function.h"
33
#include "flags.h"
34
#include "insn-config.h"
35
#include "insn-attr.h"
36
#include "except.h"
37
#include "toplev.h"
38
#include "recog.h"
39
#include "sched-int.h"
40
#include "target.h"
41
#include "cfglayout.h"
42
#include "cfgloop.h"
43
#include "cfghooks.h"
44
#include "expr.h"
45
#include "params.h"
46
#include "gcov-io.h"
47
#include "ddg.h"
48
#include "timevar.h"
49
#include "tree-pass.h"
50
#include "dbgcnt.h"
51
 
52
#ifdef INSN_SCHEDULING
53
 
54
/* This file contains the implementation of the Swing Modulo Scheduler,
55
   described in the following references:
56
   [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57
       Lifetime--sensitive modulo scheduling in a production environment.
58
       IEEE Trans. on Comps., 50(3), March 2001
59
   [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60
       Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61
       PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
62
 
63
   The basic structure is:
64
   1. Build a data-dependence graph (DDG) for each loop.
65
   2. Use the DDG to order the insns of a loop (not in topological order
66
      necessarily, but rather) trying to place each insn after all its
67
      predecessors _or_ after all its successors.
68
   3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69
   4. Use the ordering to perform list-scheduling of the loop:
70
      1. Set II = MII.  We will try to schedule the loop within II cycles.
71
      2. Try to schedule the insns one by one according to the ordering.
72
         For each insn compute an interval of cycles by considering already-
73
         scheduled preds and succs (and associated latencies); try to place
74
         the insn in the cycles of this window checking for potential
75
         resource conflicts (using the DFA interface).
76
         Note: this is different from the cycle-scheduling of schedule_insns;
77
         here the insns are not scheduled monotonically top-down (nor bottom-
78
         up).
79
      3. If failed in scheduling all insns - bump II++ and try again, unless
80
         II reaches an upper bound MaxII, in which case report failure.
81
   5. If we succeeded in scheduling the loop within II cycles, we now
82
      generate prolog and epilog, decrease the counter of the loop, and
83
      perform modulo variable expansion for live ranges that span more than
84
      II cycles (i.e. use register copies to prevent a def from overwriting
85
      itself before reaching the use).
86
 
87
    SMS works with countable loops (1) whose control part can be easily
88
    decoupled from the rest of the loop and (2) whose loop count can
89
    be easily adjusted.  This is because we peel a constant number of
90
    iterations into a prologue and epilogue for which we want to avoid
91
    emitting the control part, and a kernel which is to iterate that
92
    constant number of iterations less than the original loop.  So the
93
    control part should be a set of insns clearly identified and having
94
    its own iv, not otherwise used in the loop (at-least for now), which
95
    initializes a register before the loop to the number of iterations.
96
    Currently SMS relies on the do-loop pattern to recognize such loops,
97
    where (1) the control part comprises of all insns defining and/or
98
    using a certain 'count' register and (2) the loop count can be
99
    adjusted by modifying this register prior to the loop.
100
    TODO: Rely on cfgloop analysis instead.  */
101
 
102
/* This page defines partial-schedule structures and functions for
103
   modulo scheduling.  */
104
 
105
typedef struct partial_schedule *partial_schedule_ptr;
106
typedef struct ps_insn *ps_insn_ptr;
107
 
108
/* The minimum (absolute) cycle that a node of ps was scheduled in.  */
109
#define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
110
 
111
/* The maximum (absolute) cycle that a node of ps was scheduled in.  */
112
#define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
113
 
114
/* Perform signed modulo, always returning a non-negative value.  */
115
#define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
116
 
117
/* The number of different iterations the nodes in ps span, assuming
118
   the stage boundaries are placed efficiently.  */
119
#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
120
                             + 1 + (ps)->ii - 1) / (ps)->ii)
121
 
122
/* A single instruction in the partial schedule.  */
123
struct ps_insn
124
{
125
  /* The corresponding DDG_NODE.  */
126
  ddg_node_ptr node;
127
 
128
  /* The (absolute) cycle in which the PS instruction is scheduled.
129
     Same as SCHED_TIME (node).  */
130
  int cycle;
131
 
132
  /* The next/prev PS_INSN in the same row.  */
133
  ps_insn_ptr next_in_row,
134
              prev_in_row;
135
 
136
  /* The number of nodes in the same row that come after this node.  */
137
  int row_rest_count;
138
};
139
 
140
/* Holds the partial schedule as an array of II rows.  Each entry of the
141
   array points to a linked list of PS_INSNs, which represents the
142
   instructions that are scheduled for that row.  */
143
struct partial_schedule
144
{
145
  int ii;       /* Number of rows in the partial schedule.  */
146
  int history;  /* Threshold for conflict checking using DFA.  */
147
 
148
  /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
149
  ps_insn_ptr *rows;
150
 
151
  /* The earliest absolute cycle of an insn in the partial schedule.  */
152
  int min_cycle;
153
 
154
  /* The latest absolute cycle of an insn in the partial schedule.  */
155
  int max_cycle;
156
 
157
  ddg_ptr g;    /* The DDG of the insns in the partial schedule.  */
158
};
159
 
160
/* We use this to record all the register replacements we do in
161
   the kernel so we can undo SMS if it is not profitable.  */
162
struct undo_replace_buff_elem
163
{
164
  rtx insn;
165
  rtx orig_reg;
166
  rtx new_reg;
167
  struct undo_replace_buff_elem *next;
168
};
169
 
170
 
171
 
172
static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
173
static void free_partial_schedule (partial_schedule_ptr);
174
static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
175
void print_partial_schedule (partial_schedule_ptr, FILE *);
176
static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
177
static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
178
                                                ddg_node_ptr node, int cycle,
179
                                                sbitmap must_precede,
180
                                                sbitmap must_follow);
181
static void rotate_partial_schedule (partial_schedule_ptr, int);
182
void set_row_column_for_ps (partial_schedule_ptr);
183
static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
184
static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
185
 
186
 
187
/* This page defines constants and structures for the modulo scheduling
188
   driver.  */
189
 
190
static int sms_order_nodes (ddg_ptr, int, int *, int *);
191
static void set_node_sched_params (ddg_ptr);
192
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
193
static void permute_partial_schedule (partial_schedule_ptr, rtx);
194
static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
195
                                    rtx, rtx);
196
static void duplicate_insns_of_cycles (partial_schedule_ptr,
197
                                       int, int, int, rtx);
198
 
199
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
200
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
201
#define SCHED_FIRST_REG_MOVE(x) \
202
        (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
203
#define SCHED_NREG_MOVES(x) \
204
        (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
205
#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
206
#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
207
#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
208
 
209
/* The scheduling parameters held for each node.  */
210
typedef struct node_sched_params
211
{
212
  int asap;     /* A lower-bound on the absolute scheduling cycle.  */
213
  int time;     /* The absolute scheduling cycle (time >= asap).  */
214
 
215
  /* The following field (first_reg_move) is a pointer to the first
216
     register-move instruction added to handle the modulo-variable-expansion
217
     of the register defined by this node.  This register-move copies the
218
     original register defined by the node.  */
219
  rtx first_reg_move;
220
 
221
  /* The number of register-move instructions added, immediately preceding
222
     first_reg_move.  */
223
  int nreg_moves;
224
 
225
  int row;    /* Holds time % ii.  */
226
  int stage;  /* Holds time / ii.  */
227
 
228
  /* The column of a node inside the ps.  If nodes u, v are on the same row,
229
     u will precede v if column (u) < column (v).  */
230
  int column;
231
} *node_sched_params_ptr;
232
 
233
 
234
/* The following three functions are copied from the current scheduler
235
   code in order to use sched_analyze() for computing the dependencies.
236
   They are used when initializing the sched_info structure.  */
237
static const char *
238
sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
239
{
240
  static char tmp[80];
241
 
242
  sprintf (tmp, "i%4d", INSN_UID (insn));
243
  return tmp;
244
}
245
 
246
static void
247
compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
248
                               regset cond_exec ATTRIBUTE_UNUSED,
249
                               regset used ATTRIBUTE_UNUSED,
250
                               regset set ATTRIBUTE_UNUSED)
251
{
252
}
253
 
254
static struct common_sched_info_def sms_common_sched_info;
255
 
256
static struct sched_deps_info_def sms_sched_deps_info =
257
  {
258
    compute_jump_reg_dependencies,
259
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
260
    NULL,
261
    0, 0, 0
262
  };
263
 
264
static struct haifa_sched_info sms_sched_info =
265
{
266
  NULL,
267
  NULL,
268
  NULL,
269
  NULL,
270
  NULL,
271
  sms_print_insn,
272
  NULL,
273
  NULL, /* insn_finishes_block_p */
274
  NULL, NULL,
275
  NULL, NULL,
276
  0, 0,
277
 
278
  NULL, NULL, NULL,
279
 
280
};
281
 
282
/* Given HEAD and TAIL which are the first and last insns in a loop;
283
   return the register which controls the loop.  Return zero if it has
284
   more than one occurrence in the loop besides the control part or the
285
   do-loop pattern is not of the form we expect.  */
286
static rtx
287
doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
288
{
289
#ifdef HAVE_doloop_end
290
  rtx reg, condition, insn, first_insn_not_to_check;
291
 
292
  if (!JUMP_P (tail))
293
    return NULL_RTX;
294
 
295
  /* TODO: Free SMS's dependence on doloop_condition_get.  */
296
  condition = doloop_condition_get (tail);
297
  if (! condition)
298
    return NULL_RTX;
299
 
300
  if (REG_P (XEXP (condition, 0)))
301
    reg = XEXP (condition, 0);
302
  else if (GET_CODE (XEXP (condition, 0)) == PLUS
303
           && REG_P (XEXP (XEXP (condition, 0), 0)))
304
    reg = XEXP (XEXP (condition, 0), 0);
305
  else
306
    gcc_unreachable ();
307
 
308
  /* Check that the COUNT_REG has no other occurrences in the loop
309
     until the decrement.  We assume the control part consists of
310
     either a single (parallel) branch-on-count or a (non-parallel)
311
     branch immediately preceded by a single (decrement) insn.  */
312
  first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
313
                             : PREV_INSN (tail));
314
 
315
  for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
316
    if (reg_mentioned_p (reg, insn))
317
      {
318
        if (dump_file)
319
        {
320
          fprintf (dump_file, "SMS count_reg found ");
321
          print_rtl_single (dump_file, reg);
322
          fprintf (dump_file, " outside control in insn:\n");
323
          print_rtl_single (dump_file, insn);
324
        }
325
 
326
        return NULL_RTX;
327
      }
328
 
329
  return reg;
330
#else
331
  return NULL_RTX;
332
#endif
333
}
334
 
335
/* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
336
   that the number of iterations is a compile-time constant.  If so,
337
   return the rtx that sets COUNT_REG to a constant, and set COUNT to
338
   this constant.  Otherwise return 0.  */
339
static rtx
340
const_iteration_count (rtx count_reg, basic_block pre_header,
341
                       HOST_WIDEST_INT * count)
342
{
343
  rtx insn;
344
  rtx head, tail;
345
 
346
  if (! pre_header)
347
    return NULL_RTX;
348
 
349
  get_ebb_head_tail (pre_header, pre_header, &head, &tail);
350
 
351
  for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
352
    if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
353
        rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
354
      {
355
        rtx pat = single_set (insn);
356
 
357
        if (CONST_INT_P (SET_SRC (pat)))
358
          {
359
            *count = INTVAL (SET_SRC (pat));
360
            return insn;
361
          }
362
 
363
        return NULL_RTX;
364
      }
365
 
366
  return NULL_RTX;
367
}
368
 
369
/* A very simple resource-based lower bound on the initiation interval.
370
   ??? Improve the accuracy of this bound by considering the
371
   utilization of various units.  */
372
static int
373
res_MII (ddg_ptr g)
374
{
375
  if (targetm.sched.sms_res_mii)
376
    return targetm.sched.sms_res_mii (g);
377
 
378
  return ((g->num_nodes - g->num_debug) / issue_rate);
379
}
380
 
381
 
382
/* Points to the array that contains the sched data for each node.  */
383
static node_sched_params_ptr node_sched_params;
384
 
385
/* Allocate sched_params for each node and initialize it.  Assumes that
386
   the aux field of each node contain the asap bound (computed earlier),
387
   and copies it into the sched_params field.  */
388
static void
389
set_node_sched_params (ddg_ptr g)
390
{
391
  int i;
392
 
393
  /* Allocate for each node in the DDG a place to hold the "sched_data".  */
394
  /* Initialize ASAP/ALAP/HIGHT to zero.  */
395
  node_sched_params = (node_sched_params_ptr)
396
                       xcalloc (g->num_nodes,
397
                                sizeof (struct node_sched_params));
398
 
399
  /* Set the pointer of the general data of the node to point to the
400
     appropriate sched_params structure.  */
401
  for (i = 0; i < g->num_nodes; i++)
402
    {
403
      /* Watch out for aliasing problems?  */
404
      node_sched_params[i].asap = g->nodes[i].aux.count;
405
      g->nodes[i].aux.info = &node_sched_params[i];
406
    }
407
}
408
 
409
static void
410
print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
411
{
412
  int i;
413
 
414
  if (! file)
415
    return;
416
  for (i = 0; i < num_nodes; i++)
417
    {
418
      node_sched_params_ptr nsp = &node_sched_params[i];
419
      rtx reg_move = nsp->first_reg_move;
420
      int j;
421
 
422
      fprintf (file, "Node = %d; INSN = %d\n", i,
423
               (INSN_UID (g->nodes[i].insn)));
424
      fprintf (file, " asap = %d:\n", nsp->asap);
425
      fprintf (file, " time = %d:\n", nsp->time);
426
      fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
427
      for (j = 0; j < nsp->nreg_moves; j++)
428
        {
429
          fprintf (file, " reg_move = ");
430
          print_rtl_single (file, reg_move);
431
          reg_move = PREV_INSN (reg_move);
432
        }
433
    }
434
}
435
 
436
/*
437
   Breaking intra-loop register anti-dependences:
438
   Each intra-loop register anti-dependence implies a cross-iteration true
439
   dependence of distance 1. Therefore, we can remove such false dependencies
440
   and figure out if the partial schedule broke them by checking if (for a
441
   true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
442
   if so generate a register move.   The number of such moves is equal to:
443
              SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
444
   nreg_moves = ----------------------------------- + 1 - {   dependence.
445
                            ii                          { 1 if not.
446
*/
447
static struct undo_replace_buff_elem *
448
generate_reg_moves (partial_schedule_ptr ps, bool rescan)
449
{
450
  ddg_ptr g = ps->g;
451
  int ii = ps->ii;
452
  int i;
453
  struct undo_replace_buff_elem *reg_move_replaces = NULL;
454
 
455
  for (i = 0; i < g->num_nodes; i++)
456
    {
457
      ddg_node_ptr u = &g->nodes[i];
458
      ddg_edge_ptr e;
459
      int nreg_moves = 0, i_reg_move;
460
      sbitmap *uses_of_defs;
461
      rtx last_reg_move;
462
      rtx prev_reg, old_reg;
463
 
464
      /* Compute the number of reg_moves needed for u, by looking at life
465
         ranges started at u (excluding self-loops).  */
466
      for (e = u->out; e; e = e->next_out)
467
        if (e->type == TRUE_DEP && e->dest != e->src)
468
          {
469
            int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
470
 
471
            if (e->distance == 1)
472
              nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
473
 
474
            /* If dest precedes src in the schedule of the kernel, then dest
475
               will read before src writes and we can save one reg_copy.  */
476
            if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
477
                && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
478
              nreg_moves4e--;
479
 
480
            nreg_moves = MAX (nreg_moves, nreg_moves4e);
481
          }
482
 
483
      if (nreg_moves == 0)
484
        continue;
485
 
486
      /* Every use of the register defined by node may require a different
487
         copy of this register, depending on the time the use is scheduled.
488
         Set a bitmap vector, telling which nodes use each copy of this
489
         register.  */
490
      uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
491
      sbitmap_vector_zero (uses_of_defs, nreg_moves);
492
      for (e = u->out; e; e = e->next_out)
493
        if (e->type == TRUE_DEP && e->dest != e->src)
494
          {
495
            int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
496
 
497
            if (e->distance == 1)
498
              dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
499
 
500
            if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
501
                && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
502
              dest_copy--;
503
 
504
            if (dest_copy)
505
              SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
506
          }
507
 
508
      /* Now generate the reg_moves, attaching relevant uses to them.  */
509
      SCHED_NREG_MOVES (u) = nreg_moves;
510
      old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
511
      /* Insert the reg-moves right before the notes which precede
512
         the insn they relates to.  */
513
      last_reg_move = u->first_note;
514
 
515
      for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
516
        {
517
          unsigned int i_use = 0;
518
          rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
519
          rtx reg_move = gen_move_insn (new_reg, prev_reg);
520
          sbitmap_iterator sbi;
521
 
522
          add_insn_before (reg_move, last_reg_move, NULL);
523
          last_reg_move = reg_move;
524
 
525
          if (!SCHED_FIRST_REG_MOVE (u))
526
            SCHED_FIRST_REG_MOVE (u) = reg_move;
527
 
528
          EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
529
            {
530
              struct undo_replace_buff_elem *rep;
531
 
532
              rep = (struct undo_replace_buff_elem *)
533
                    xcalloc (1, sizeof (struct undo_replace_buff_elem));
534
              rep->insn = g->nodes[i_use].insn;
535
              rep->orig_reg = old_reg;
536
              rep->new_reg = new_reg;
537
 
538
              if (! reg_move_replaces)
539
                reg_move_replaces = rep;
540
              else
541
                {
542
                  rep->next = reg_move_replaces;
543
                  reg_move_replaces = rep;
544
                }
545
 
546
              replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
547
              if (rescan)
548
                df_insn_rescan (g->nodes[i_use].insn);
549
            }
550
 
551
          prev_reg = new_reg;
552
        }
553
      sbitmap_vector_free (uses_of_defs);
554
    }
555
  return reg_move_replaces;
556
}
557
 
558
/* Free memory allocated for the undo buffer.  */
559
static void
560
free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
561
{
562
 
563
  while (reg_move_replaces)
564
    {
565
      struct undo_replace_buff_elem *rep = reg_move_replaces;
566
 
567
      reg_move_replaces = reg_move_replaces->next;
568
      free (rep);
569
    }
570
}
571
 
572
/* Bump the SCHED_TIMEs of all nodes to start from zero.  Set the values
573
   of SCHED_ROW and SCHED_STAGE.  */
574
static void
575
normalize_sched_times (partial_schedule_ptr ps)
576
{
577
  int row;
578
  int amount = PS_MIN_CYCLE (ps);
579
  int ii = ps->ii;
580
  ps_insn_ptr crr_insn;
581
 
582
  for (row = 0; row < ii; row++)
583
    for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
584
      {
585
        ddg_node_ptr u = crr_insn->node;
586
        int normalized_time = SCHED_TIME (u) - amount;
587
 
588
        if (dump_file)
589
          fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
590
                   min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
591
                   (u), ps->min_cycle);
592
        gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
593
        gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
594
        SCHED_TIME (u) = normalized_time;
595
        SCHED_ROW (u) = normalized_time % ii;
596
        SCHED_STAGE (u) = normalized_time / ii;
597
      }
598
}
599
 
600
/* Set SCHED_COLUMN of each node according to its position in PS.  */
601
static void
602
set_columns_for_ps (partial_schedule_ptr ps)
603
{
604
  int row;
605
 
606
  for (row = 0; row < ps->ii; row++)
607
    {
608
      ps_insn_ptr cur_insn = ps->rows[row];
609
      int column = 0;
610
 
611
      for (; cur_insn; cur_insn = cur_insn->next_in_row)
612
        SCHED_COLUMN (cur_insn->node) = column++;
613
    }
614
}
615
 
616
/* Permute the insns according to their order in PS, from row 0 to
617
   row ii-1, and position them right before LAST.  This schedules
618
   the insns of the loop kernel.  */
619
static void
620
permute_partial_schedule (partial_schedule_ptr ps, rtx last)
621
{
622
  int ii = ps->ii;
623
  int row;
624
  ps_insn_ptr ps_ij;
625
 
626
  for (row = 0; row < ii ; row++)
627
    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
628
      if (PREV_INSN (last) != ps_ij->node->insn)
629
        reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
630
                            PREV_INSN (last));
631
}
632
 
633
static void
634
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
635
                           int to_stage, int for_prolog, rtx count_reg)
636
{
637
  int row;
638
  ps_insn_ptr ps_ij;
639
 
640
  for (row = 0; row < ps->ii; row++)
641
    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
642
      {
643
        ddg_node_ptr u_node = ps_ij->node;
644
        int j, i_reg_moves;
645
        rtx reg_move = NULL_RTX;
646
 
647
        /* Do not duplicate any insn which refers to count_reg as it
648
           belongs to the control part.
649
           TODO: This should be done by analyzing the control part of
650
           the loop.  */
651
        if (reg_mentioned_p (count_reg, u_node->insn))
652
          continue;
653
 
654
        if (for_prolog)
655
          {
656
            /* SCHED_STAGE (u_node) >= from_stage == 0.  Generate increasing
657
               number of reg_moves starting with the second occurrence of
658
               u_node, which is generated if its SCHED_STAGE <= to_stage.  */
659
            i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
660
            i_reg_moves = MAX (i_reg_moves, 0);
661
            i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
662
 
663
            /* The reg_moves start from the *first* reg_move backwards.  */
664
            if (i_reg_moves)
665
              {
666
                reg_move = SCHED_FIRST_REG_MOVE (u_node);
667
                for (j = 1; j < i_reg_moves; j++)
668
                  reg_move = PREV_INSN (reg_move);
669
              }
670
          }
671
        else /* It's for the epilog.  */
672
          {
673
            /* SCHED_STAGE (u_node) <= to_stage.  Generate all reg_moves,
674
               starting to decrease one stage after u_node no longer occurs;
675
               that is, generate all reg_moves until
676
               SCHED_STAGE (u_node) == from_stage - 1.  */
677
            i_reg_moves = SCHED_NREG_MOVES (u_node)
678
                       - (from_stage - SCHED_STAGE (u_node) - 1);
679
            i_reg_moves = MAX (i_reg_moves, 0);
680
            i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
681
 
682
            /* The reg_moves start from the *last* reg_move forwards.  */
683
            if (i_reg_moves)
684
              {
685
                reg_move = SCHED_FIRST_REG_MOVE (u_node);
686
                for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
687
                  reg_move = PREV_INSN (reg_move);
688
              }
689
          }
690
 
691
        for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
692
          emit_insn (copy_rtx (PATTERN (reg_move)));
693
        if (SCHED_STAGE (u_node) >= from_stage
694
            && SCHED_STAGE (u_node) <= to_stage)
695
          duplicate_insn_chain (u_node->first_note, u_node->insn);
696
      }
697
}
698
 
699
 
700
/* Generate the instructions (including reg_moves) for prolog & epilog.  */
701
static void
702
generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
703
                        rtx count_reg, rtx count_init)
704
{
705
  int i;
706
  int last_stage = PS_STAGE_COUNT (ps) - 1;
707
  edge e;
708
 
709
  /* Generate the prolog, inserting its insns on the loop-entry edge.  */
710
  start_sequence ();
711
 
712
  if (!count_init)
713
    {
714
      /* Generate instructions at the beginning of the prolog to
715
         adjust the loop count by STAGE_COUNT.  If loop count is constant
716
         (count_init), this constant is adjusted by STAGE_COUNT in
717
         generate_prolog_epilog function.  */
718
      rtx sub_reg = NULL_RTX;
719
 
720
      sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
721
                                     count_reg, GEN_INT (last_stage),
722
                                     count_reg, 1, OPTAB_DIRECT);
723
      gcc_assert (REG_P (sub_reg));
724
      if (REGNO (sub_reg) != REGNO (count_reg))
725
        emit_move_insn (count_reg, sub_reg);
726
    }
727
 
728
  for (i = 0; i < last_stage; i++)
729
    duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
730
 
731
  /* Put the prolog on the entry edge.  */
732
  e = loop_preheader_edge (loop);
733
  split_edge_and_insert (e, get_insns ());
734
 
735
  end_sequence ();
736
 
737
  /* Generate the epilog, inserting its insns on the loop-exit edge.  */
738
  start_sequence ();
739
 
740
  for (i = 0; i < last_stage; i++)
741
    duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
742
 
743
  /* Put the epilogue on the exit edge.  */
744
  gcc_assert (single_exit (loop));
745
  e = single_exit (loop);
746
  split_edge_and_insert (e, get_insns ());
747
  end_sequence ();
748
}
749
 
750
/* Return true if all the BBs of the loop are empty except the
751
   loop header.  */
752
static bool
753
loop_single_full_bb_p (struct loop *loop)
754
{
755
  unsigned i;
756
  basic_block *bbs = get_loop_body (loop);
757
 
758
  for (i = 0; i < loop->num_nodes ; i++)
759
    {
760
      rtx head, tail;
761
      bool empty_bb = true;
762
 
763
      if (bbs[i] == loop->header)
764
        continue;
765
 
766
      /* Make sure that basic blocks other than the header
767
         have only notes labels or jumps.  */
768
      get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
769
      for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
770
        {
771
          if (NOTE_P (head) || LABEL_P (head)
772
              || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
773
            continue;
774
          empty_bb = false;
775
          break;
776
        }
777
 
778
      if (! empty_bb)
779
        {
780
          free (bbs);
781
          return false;
782
        }
783
    }
784
  free (bbs);
785
  return true;
786
}
787
 
788
/* A simple loop from SMS point of view; it is a loop that is composed of
789
   either a single basic block or two BBs - a header and a latch.  */
790
#define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 )                     \
791
                                  && (EDGE_COUNT (loop->latch->preds) == 1) \
792
                                  && (EDGE_COUNT (loop->latch->succs) == 1))
793
 
794
/* Return true if the loop is in its canonical form and false if not.
795
   i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
796
static bool
797
loop_canon_p (struct loop *loop)
798
{
799
 
800
  if (loop->inner || !loop_outer (loop))
801
  {
802
    if (dump_file)
803
      fprintf (dump_file, "SMS loop inner or !loop_outer\n");
804
    return false;
805
  }
806
 
807
  if (!single_exit (loop))
808
    {
809
      if (dump_file)
810
        {
811
          rtx insn = BB_END (loop->header);
812
 
813
          fprintf (dump_file, "SMS loop many exits ");
814
                  fprintf (dump_file, " %s %d (file, line)\n",
815
                           insn_file (insn), insn_line (insn));
816
        }
817
      return false;
818
    }
819
 
820
  if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
821
    {
822
      if (dump_file)
823
        {
824
          rtx insn = BB_END (loop->header);
825
 
826
          fprintf (dump_file, "SMS loop many BBs. ");
827
          fprintf (dump_file, " %s %d (file, line)\n",
828
                   insn_file (insn), insn_line (insn));
829
        }
830
      return false;
831
    }
832
 
833
    return true;
834
}
835
 
836
/* If there are more than one entry for the loop,
837
   make it one by splitting the first entry edge and
838
   redirecting the others to the new BB.  */
839
static void
840
canon_loop (struct loop *loop)
841
{
842
  edge e;
843
  edge_iterator i;
844
 
845
  /* Avoid annoying special cases of edges going to exit
846
     block.  */
847
  FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
848
    if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
849
      split_edge (e);
850
 
851
  if (loop->latch == loop->header
852
      || EDGE_COUNT (loop->latch->succs) > 1)
853
    {
854
      FOR_EACH_EDGE (e, i, loop->header->preds)
855
        if (e->src == loop->latch)
856
          break;
857
      split_edge (e);
858
    }
859
}
860
 
861
/* Setup infos.  */
862
static void
863
setup_sched_infos (void)
864
{
865
  memcpy (&sms_common_sched_info, &haifa_common_sched_info,
866
          sizeof (sms_common_sched_info));
867
  sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
868
  common_sched_info = &sms_common_sched_info;
869
 
870
  sched_deps_info = &sms_sched_deps_info;
871
  current_sched_info = &sms_sched_info;
872
}
873
 
874
/* Probability in % that the sms-ed loop rolls enough so that optimized
875
   version may be entered.  Just a guess.  */
876
#define PROB_SMS_ENOUGH_ITERATIONS 80
877
 
878
/* Used to calculate the upper bound of ii.  */
879
#define MAXII_FACTOR 2
880
 
881
/* Main entry point, perform SMS scheduling on the loops of the function
882
   that consist of single basic blocks.  */
883
static void
884
sms_schedule (void)
885
{
886
  rtx insn;
887
  ddg_ptr *g_arr, g;
888
  int * node_order;
889
  int maxii, max_asap;
890
  loop_iterator li;
891
  partial_schedule_ptr ps;
892
  basic_block bb = NULL;
893
  struct loop *loop;
894
  basic_block condition_bb = NULL;
895
  edge latch_edge;
896
  gcov_type trip_count = 0;
897
 
898
  loop_optimizer_init (LOOPS_HAVE_PREHEADERS
899
                       | LOOPS_HAVE_RECORDED_EXITS);
900
  if (number_of_loops () <= 1)
901
    {
902
      loop_optimizer_finalize ();
903
      return;  /* There are no loops to schedule.  */
904
    }
905
 
906
  /* Initialize issue_rate.  */
907
  if (targetm.sched.issue_rate)
908
    {
909
      int temp = reload_completed;
910
 
911
      reload_completed = 1;
912
      issue_rate = targetm.sched.issue_rate ();
913
      reload_completed = temp;
914
    }
915
  else
916
    issue_rate = 1;
917
 
918
  /* Initialize the scheduler.  */
919
  setup_sched_infos ();
920
  haifa_sched_init ();
921
 
922
  /* Allocate memory to hold the DDG array one entry for each loop.
923
     We use loop->num as index into this array.  */
924
  g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
925
 
926
  if (dump_file)
927
  {
928
    fprintf (dump_file, "\n\nSMS analysis phase\n");
929
    fprintf (dump_file, "===================\n\n");
930
  }
931
 
932
  /* Build DDGs for all the relevant loops and hold them in G_ARR
933
     indexed by the loop index.  */
934
  FOR_EACH_LOOP (li, loop, 0)
935
    {
936
      rtx head, tail;
937
      rtx count_reg;
938
 
939
      /* For debugging.  */
940
      if (dbg_cnt (sms_sched_loop) == false)
941
        {
942
          if (dump_file)
943
            fprintf (dump_file, "SMS reached max limit... \n");
944
 
945
          break;
946
        }
947
 
948
      if (dump_file)
949
      {
950
         rtx insn = BB_END (loop->header);
951
 
952
         fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
953
                  loop->num, insn_file (insn), insn_line (insn));
954
 
955
      }
956
 
957
      if (! loop_canon_p (loop))
958
        continue;
959
 
960
      if (! loop_single_full_bb_p (loop))
961
      {
962
        if (dump_file)
963
          fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
964
        continue;
965
      }
966
 
967
      bb = loop->header;
968
 
969
      get_ebb_head_tail (bb, bb, &head, &tail);
970
      latch_edge = loop_latch_edge (loop);
971
      gcc_assert (single_exit (loop));
972
      if (single_exit (loop)->count)
973
        trip_count = latch_edge->count / single_exit (loop)->count;
974
 
975
      /* Perform SMS only on loops that their average count is above threshold.  */
976
 
977
      if ( latch_edge->count
978
          && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
979
        {
980
          if (dump_file)
981
            {
982
              fprintf (dump_file, " %s %d (file, line)\n",
983
                       insn_file (tail), insn_line (tail));
984
              fprintf (dump_file, "SMS single-bb-loop\n");
985
              if (profile_info && flag_branch_probabilities)
986
                {
987
                  fprintf (dump_file, "SMS loop-count ");
988
                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
989
                           (HOST_WIDEST_INT) bb->count);
990
                  fprintf (dump_file, "\n");
991
                  fprintf (dump_file, "SMS trip-count ");
992
                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
993
                           (HOST_WIDEST_INT) trip_count);
994
                  fprintf (dump_file, "\n");
995
                  fprintf (dump_file, "SMS profile-sum-max ");
996
                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
997
                           (HOST_WIDEST_INT) profile_info->sum_max);
998
                  fprintf (dump_file, "\n");
999
                }
1000
            }
1001
          continue;
1002
        }
1003
 
1004
      /* Make sure this is a doloop.  */
1005
      if ( !(count_reg = doloop_register_get (head, tail)))
1006
      {
1007
        if (dump_file)
1008
          fprintf (dump_file, "SMS doloop_register_get failed\n");
1009
        continue;
1010
      }
1011
 
1012
      /* Don't handle BBs with calls or barriers, or !single_set insns,
1013
         or auto-increment insns (to avoid creating invalid reg-moves
1014
         for the auto-increment insns).
1015
         ??? Should handle auto-increment insns.
1016
         ??? Should handle insns defining subregs.  */
1017
     for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1018
      {
1019
         rtx set;
1020
 
1021
        if (CALL_P (insn)
1022
            || BARRIER_P (insn)
1023
            || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1024
                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
1025
            || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1026
            || (INSN_P (insn) && (set = single_set (insn))
1027
                && GET_CODE (SET_DEST (set)) == SUBREG))
1028
        break;
1029
      }
1030
 
1031
      if (insn != NEXT_INSN (tail))
1032
        {
1033
          if (dump_file)
1034
            {
1035
              if (CALL_P (insn))
1036
                fprintf (dump_file, "SMS loop-with-call\n");
1037
              else if (BARRIER_P (insn))
1038
                fprintf (dump_file, "SMS loop-with-barrier\n");
1039
              else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
1040
                fprintf (dump_file, "SMS reg inc\n");
1041
              else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1042
                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1043
                fprintf (dump_file, "SMS loop-with-not-single-set\n");
1044
              else
1045
               fprintf (dump_file, "SMS loop with subreg in lhs\n");
1046
              print_rtl_single (dump_file, insn);
1047
            }
1048
 
1049
          continue;
1050
        }
1051
 
1052
      if (! (g = create_ddg (bb, 0)))
1053
        {
1054
          if (dump_file)
1055
            fprintf (dump_file, "SMS create_ddg failed\n");
1056
          continue;
1057
        }
1058
 
1059
      g_arr[loop->num] = g;
1060
      if (dump_file)
1061
        fprintf (dump_file, "...OK\n");
1062
 
1063
    }
1064
  if (dump_file)
1065
  {
1066
    fprintf (dump_file, "\nSMS transformation phase\n");
1067
    fprintf (dump_file, "=========================\n\n");
1068
  }
1069
 
1070
  /* We don't want to perform SMS on new loops - created by versioning.  */
1071
  FOR_EACH_LOOP (li, loop, 0)
1072
    {
1073
      rtx head, tail;
1074
      rtx count_reg, count_init;
1075
      int mii, rec_mii;
1076
      unsigned stage_count = 0;
1077
      HOST_WIDEST_INT loop_count = 0;
1078
 
1079
      if (! (g = g_arr[loop->num]))
1080
        continue;
1081
 
1082
      if (dump_file)
1083
      {
1084
         rtx insn = BB_END (loop->header);
1085
 
1086
         fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
1087
                  loop->num, insn_file (insn), insn_line (insn));
1088
 
1089
         print_ddg (dump_file, g);
1090
      }
1091
 
1092
      get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1093
 
1094
      latch_edge = loop_latch_edge (loop);
1095
      gcc_assert (single_exit (loop));
1096
      if (single_exit (loop)->count)
1097
        trip_count = latch_edge->count / single_exit (loop)->count;
1098
 
1099
      if (dump_file)
1100
        {
1101
          fprintf (dump_file, " %s %d (file, line)\n",
1102
                   insn_file (tail), insn_line (tail));
1103
          fprintf (dump_file, "SMS single-bb-loop\n");
1104
          if (profile_info && flag_branch_probabilities)
1105
            {
1106
              fprintf (dump_file, "SMS loop-count ");
1107
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1108
                       (HOST_WIDEST_INT) bb->count);
1109
              fprintf (dump_file, "\n");
1110
              fprintf (dump_file, "SMS profile-sum-max ");
1111
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1112
                       (HOST_WIDEST_INT) profile_info->sum_max);
1113
              fprintf (dump_file, "\n");
1114
            }
1115
          fprintf (dump_file, "SMS doloop\n");
1116
          fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1117
          fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1118
          fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1119
        }
1120
 
1121
 
1122
      /* In case of th loop have doloop register it gets special
1123
         handling.  */
1124
      count_init = NULL_RTX;
1125
      if ((count_reg = doloop_register_get (head, tail)))
1126
        {
1127
          basic_block pre_header;
1128
 
1129
          pre_header = loop_preheader_edge (loop)->src;
1130
          count_init = const_iteration_count (count_reg, pre_header,
1131
                                              &loop_count);
1132
        }
1133
      gcc_assert (count_reg);
1134
 
1135
      if (dump_file && count_init)
1136
        {
1137
          fprintf (dump_file, "SMS const-doloop ");
1138
          fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1139
                     loop_count);
1140
          fprintf (dump_file, "\n");
1141
        }
1142
 
1143
      node_order = XNEWVEC (int, g->num_nodes);
1144
 
1145
      mii = 1; /* Need to pass some estimate of mii.  */
1146
      rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1147
      mii = MAX (res_MII (g), rec_mii);
1148
      maxii = MAX (max_asap, MAXII_FACTOR * mii);
1149
 
1150
      if (dump_file)
1151
        fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1152
                 rec_mii, mii, maxii);
1153
 
1154
      /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1155
         ASAP.  */
1156
      set_node_sched_params (g);
1157
 
1158
      ps = sms_schedule_by_order (g, mii, maxii, node_order);
1159
 
1160
      if (ps){
1161
        stage_count = PS_STAGE_COUNT (ps);
1162
        gcc_assert(stage_count >= 1);
1163
      }
1164
 
1165
      /* Stage count of 1 means that there is no interleaving between
1166
         iterations, let the scheduling passes do the job.  */
1167
      if (stage_count <= 1
1168
          || (count_init && (loop_count <= stage_count))
1169
          || (flag_branch_probabilities && (trip_count <= stage_count)))
1170
        {
1171
          if (dump_file)
1172
            {
1173
              fprintf (dump_file, "SMS failed... \n");
1174
              fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1175
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1176
              fprintf (dump_file, ", trip-count=");
1177
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1178
              fprintf (dump_file, ")\n");
1179
            }
1180
          continue;
1181
        }
1182
      else
1183
        {
1184
          struct undo_replace_buff_elem *reg_move_replaces;
1185
 
1186
          if (dump_file)
1187
            {
1188
              fprintf (dump_file,
1189
                       "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1190
                       stage_count);
1191
              print_partial_schedule (ps, dump_file);
1192
              fprintf (dump_file,
1193
                       "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1194
                       g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1195
            }
1196
 
1197
          /* Set the stage boundaries.  If the DDG is built with closing_branch_deps,
1198
             the closing_branch was scheduled and should appear in the last (ii-1)
1199
             row.  Otherwise, we are free to schedule the branch, and we let nodes
1200
             that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1201
             row; this should reduce stage_count to minimum.
1202
             TODO: Revisit the issue of scheduling the insns of the
1203
             control part relative to the branch when the control part
1204
             has more than one insn.  */
1205
          normalize_sched_times (ps);
1206
          rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1207
          set_columns_for_ps (ps);
1208
 
1209
          canon_loop (loop);
1210
 
1211
          /* case the BCT count is not known , Do loop-versioning */
1212
          if (count_reg && ! count_init)
1213
            {
1214
              rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1215
                                             GEN_INT(stage_count));
1216
              unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1217
                               * REG_BR_PROB_BASE) / 100;
1218
 
1219
              loop_version (loop, comp_rtx, &condition_bb,
1220
                            prob, prob, REG_BR_PROB_BASE - prob,
1221
                            true);
1222
             }
1223
 
1224
          /* Set new iteration count of loop kernel.  */
1225
          if (count_reg && count_init)
1226
            SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1227
                                                     - stage_count + 1);
1228
 
1229
          /* Now apply the scheduled kernel to the RTL of the loop.  */
1230
          permute_partial_schedule (ps, g->closing_branch->first_note);
1231
 
1232
          /* Mark this loop as software pipelined so the later
1233
             scheduling passes doesn't touch it.  */
1234
          if (! flag_resched_modulo_sched)
1235
            g->bb->flags |= BB_DISABLE_SCHEDULE;
1236
          /* The life-info is not valid any more.  */
1237
          df_set_bb_dirty (g->bb);
1238
 
1239
          reg_move_replaces = generate_reg_moves (ps, true);
1240
          if (dump_file)
1241
            print_node_sched_params (dump_file, g->num_nodes, g);
1242
          /* Generate prolog and epilog.  */
1243
          generate_prolog_epilog (ps, loop, count_reg, count_init);
1244
 
1245
          free_undo_replace_buff (reg_move_replaces);
1246
        }
1247
 
1248
      free_partial_schedule (ps);
1249
      free (node_sched_params);
1250
      free (node_order);
1251
      free_ddg (g);
1252
    }
1253
 
1254
  free (g_arr);
1255
 
1256
  /* Release scheduler data, needed until now because of DFA.  */
1257
  haifa_sched_finish ();
1258
  loop_optimizer_finalize ();
1259
}
1260
 
1261
/* The SMS scheduling algorithm itself
1262
   -----------------------------------
1263
   Input: 'O' an ordered list of insns of a loop.
1264
   Output: A scheduling of the loop - kernel, prolog, and epilogue.
1265
 
1266
   'Q' is the empty Set
1267
   'PS' is the partial schedule; it holds the currently scheduled nodes with
1268
        their cycle/slot.
1269
   'PSP' previously scheduled predecessors.
1270
   'PSS' previously scheduled successors.
1271
   't(u)' the cycle where u is scheduled.
1272
   'l(u)' is the latency of u.
1273
   'd(v,u)' is the dependence distance from v to u.
1274
   'ASAP(u)' the earliest time at which u could be scheduled as computed in
1275
             the node ordering phase.
1276
   'check_hardware_resources_conflicts(u, PS, c)'
1277
                             run a trace around cycle/slot through DFA model
1278
                             to check resource conflicts involving instruction u
1279
                             at cycle c given the partial schedule PS.
1280
   'add_to_partial_schedule_at_time(u, PS, c)'
1281
                             Add the node/instruction u to the partial schedule
1282
                             PS at time c.
1283
   'calculate_register_pressure(PS)'
1284
                             Given a schedule of instructions, calculate the register
1285
                             pressure it implies.  One implementation could be the
1286
                             maximum number of overlapping live ranges.
1287
   'maxRP' The maximum allowed register pressure, it is usually derived from the number
1288
           registers available in the hardware.
1289
 
1290
   1. II = MII.
1291
   2. PS = empty list
1292
   3. for each node u in O in pre-computed order
1293
   4.   if (PSP(u) != Q && PSS(u) == Q) then
1294
   5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1295
   6.     start = Early_start; end = Early_start + II - 1; step = 1
1296
   11.  else if (PSP(u) == Q && PSS(u) != Q) then
1297
   12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1298
   13.     start = Late_start; end = Late_start - II + 1; step = -1
1299
   14.  else if (PSP(u) != Q && PSS(u) != Q) then
1300
   15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1301
   16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1302
   17.     start = Early_start;
1303
   18.     end = min(Early_start + II - 1 , Late_start);
1304
   19.     step = 1
1305
   20.     else "if (PSP(u) == Q && PSS(u) == Q)"
1306
   21.    start = ASAP(u); end = start + II - 1; step = 1
1307
   22.  endif
1308
 
1309
   23.  success = false
1310
   24.  for (c = start ; c != end ; c += step)
1311
   25.     if check_hardware_resources_conflicts(u, PS, c) then
1312
   26.       add_to_partial_schedule_at_time(u, PS, c)
1313
   27.       success = true
1314
   28.       break
1315
   29.     endif
1316
   30.  endfor
1317
   31.  if (success == false) then
1318
   32.    II = II + 1
1319
   33.    if (II > maxII) then
1320
   34.       finish - failed to schedule
1321
   35.   endif
1322
   36.    goto 2.
1323
   37.  endif
1324
   38. endfor
1325
   39. if (calculate_register_pressure(PS) > maxRP) then
1326
   40.    goto 32.
1327
   41. endif
1328
   42. compute epilogue & prologue
1329
   43. finish - succeeded to schedule
1330
*/
1331
 
1332
/* A limit on the number of cycles that resource conflicts can span.  ??? Should
1333
   be provided by DFA, and be dependent on the type of insn scheduled.  Currently
1334
   set to 0 to save compile time.  */
1335
#define DFA_HISTORY SMS_DFA_HISTORY
1336
 
1337
/* A threshold for the number of repeated unsuccessful attempts to insert
1338
   an empty row, before we flush the partial schedule and start over.  */
1339
#define MAX_SPLIT_NUM 10
1340
/* Given the partial schedule PS, this function calculates and returns the
1341
   cycles in which we can schedule the node with the given index I.
1342
   NOTE: Here we do the backtracking in SMS, in some special cases. We have
1343
   noticed that there are several cases in which we fail    to SMS the loop
1344
   because the sched window of a node is empty    due to tight data-deps. In
1345
   such cases we want to unschedule    some of the predecessors/successors
1346
   until we get non-empty    scheduling window.  It returns -1 if the
1347
   scheduling window is empty and zero otherwise.  */
1348
 
1349
static int
1350
get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1351
                  sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1352
{
1353
  int start, step, end;
1354
  ddg_edge_ptr e;
1355
  int u = nodes_order [i];
1356
  ddg_node_ptr u_node = &ps->g->nodes[u];
1357
  sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1358
  sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1359
  sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1360
  sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1361
  int psp_not_empty;
1362
  int pss_not_empty;
1363
 
1364
  /* 1. compute sched window for u (start, end, step).  */
1365
  sbitmap_zero (psp);
1366
  sbitmap_zero (pss);
1367
  psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1368
  pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1369
 
1370
  if (psp_not_empty && !pss_not_empty)
1371
    {
1372
      int early_start = INT_MIN;
1373
 
1374
      end = INT_MAX;
1375
      for (e = u_node->in; e != 0; e = e->next_in)
1376
        {
1377
          ddg_node_ptr v_node = e->src;
1378
 
1379
          if (dump_file)
1380
            {
1381
              fprintf (dump_file, "\nProcessing edge: ");
1382
              print_ddg_edge (dump_file, e);
1383
              fprintf (dump_file,
1384
                       "\nScheduling %d (%d) in psp_not_empty,"
1385
                       " checking p %d (%d): ", u_node->cuid,
1386
                       INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1387
                       (v_node->insn));
1388
            }
1389
 
1390
          if (TEST_BIT (sched_nodes, v_node->cuid))
1391
            {
1392
              int p_st = SCHED_TIME (v_node);
1393
 
1394
              early_start =
1395
                MAX (early_start, p_st + e->latency - (e->distance * ii));
1396
 
1397
              if (dump_file)
1398
                fprintf (dump_file,
1399
                         "pred st = %d; early_start = %d; latency: %d",
1400
                         p_st, early_start, e->latency);
1401
 
1402
              if (e->data_type == MEM_DEP)
1403
                end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1404
            }
1405
         else if (dump_file)
1406
            fprintf (dump_file, "the node is not scheduled\n");
1407
        }
1408
      start = early_start;
1409
      end = MIN (end, early_start + ii);
1410
      /* Schedule the node close to it's predecessors.  */
1411
      step = 1;
1412
 
1413
      if (dump_file)
1414
        fprintf (dump_file,
1415
                 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1416
                 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1417
    }
1418
 
1419
  else if (!psp_not_empty && pss_not_empty)
1420
    {
1421
      int late_start = INT_MAX;
1422
 
1423
      end = INT_MIN;
1424
      for (e = u_node->out; e != 0; e = e->next_out)
1425
        {
1426
          ddg_node_ptr v_node = e->dest;
1427
 
1428
          if (dump_file)
1429
            {
1430
              fprintf (dump_file, "\nProcessing edge:");
1431
              print_ddg_edge (dump_file, e);
1432
              fprintf (dump_file,
1433
                       "\nScheduling %d (%d) in pss_not_empty,"
1434
                       " checking s %d (%d): ", u_node->cuid,
1435
                       INSN_UID (u_node->insn), v_node->cuid, INSN_UID
1436
                       (v_node->insn));
1437
            }
1438
 
1439
          if (TEST_BIT (sched_nodes, v_node->cuid))
1440
            {
1441
              int s_st = SCHED_TIME (v_node);
1442
 
1443
              late_start = MIN (late_start,
1444
                                s_st - e->latency + (e->distance * ii));
1445
 
1446
              if (dump_file)
1447
                fprintf (dump_file,
1448
                         "succ st = %d; late_start = %d; latency = %d",
1449
                         s_st, late_start, e->latency);
1450
 
1451
              if (e->data_type == MEM_DEP)
1452
                end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1453
             if (dump_file)
1454
                 fprintf (dump_file, "end = %d\n", end);
1455
 
1456
            }
1457
          else if (dump_file)
1458
            fprintf (dump_file, "the node is not scheduled\n");
1459
 
1460
        }
1461
      start = late_start;
1462
      end = MAX (end, late_start - ii);
1463
      /* Schedule the node close to it's successors.  */
1464
      step = -1;
1465
 
1466
      if (dump_file)
1467
        fprintf (dump_file,
1468
                 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
1469
                 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
1470
 
1471
    }
1472
 
1473
  else if (psp_not_empty && pss_not_empty)
1474
    {
1475
      int early_start = INT_MIN;
1476
      int late_start = INT_MAX;
1477
      int count_preds = 0;
1478
      int count_succs = 0;
1479
 
1480
      start = INT_MIN;
1481
      end = INT_MAX;
1482
      for (e = u_node->in; e != 0; e = e->next_in)
1483
        {
1484
          ddg_node_ptr v_node = e->src;
1485
 
1486
          if (dump_file)
1487
            {
1488
              fprintf (dump_file, "\nProcessing edge:");
1489
              print_ddg_edge (dump_file, e);
1490
              fprintf (dump_file,
1491
                       "\nScheduling %d (%d) in psp_pss_not_empty,"
1492
                       " checking p %d (%d): ", u_node->cuid, INSN_UID
1493
                       (u_node->insn), v_node->cuid, INSN_UID
1494
                       (v_node->insn));
1495
            }
1496
 
1497
          if (TEST_BIT (sched_nodes, v_node->cuid))
1498
            {
1499
              int p_st = SCHED_TIME (v_node);
1500
 
1501
              early_start = MAX (early_start,
1502
                                 p_st + e->latency
1503
                                 - (e->distance * ii));
1504
 
1505
              if (dump_file)
1506
                fprintf (dump_file,
1507
                         "pred st = %d; early_start = %d; latency = %d",
1508
                         p_st, early_start, e->latency);
1509
 
1510
              if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1511
                count_preds++;
1512
 
1513
              if (e->data_type == MEM_DEP)
1514
                end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1515
            }
1516
          else if (dump_file)
1517
            fprintf (dump_file, "the node is not scheduled\n");
1518
 
1519
        }
1520
      for (e = u_node->out; e != 0; e = e->next_out)
1521
        {
1522
          ddg_node_ptr v_node = e->dest;
1523
 
1524
          if (dump_file)
1525
            {
1526
              fprintf (dump_file, "\nProcessing edge:");
1527
              print_ddg_edge (dump_file, e);
1528
              fprintf (dump_file,
1529
                       "\nScheduling %d (%d) in psp_pss_not_empty,"
1530
                       " checking s %d (%d): ", u_node->cuid, INSN_UID
1531
                       (u_node->insn), v_node->cuid, INSN_UID
1532
                       (v_node->insn));
1533
            }
1534
 
1535
          if (TEST_BIT (sched_nodes, v_node->cuid))
1536
            {
1537
              int s_st = SCHED_TIME (v_node);
1538
 
1539
              late_start = MIN (late_start,
1540
                                s_st - e->latency
1541
                                + (e->distance * ii));
1542
 
1543
              if (dump_file)
1544
                fprintf (dump_file,
1545
                         "succ st = %d; late_start = %d; latency = %d",
1546
                         s_st, late_start, e->latency);
1547
 
1548
               if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1549
                 count_succs++;
1550
 
1551
              if (e->data_type == MEM_DEP)
1552
                start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1553
            }
1554
          else if (dump_file)
1555
            fprintf (dump_file, "the node is not scheduled\n");
1556
 
1557
        }
1558
      start = MAX (start, early_start);
1559
      end = MIN (end, MIN (early_start + ii, late_start + 1));
1560
      step = 1;
1561
      /* If there are more successors than predecessors schedule the
1562
         node close to it's successors.  */
1563
      if (count_succs >= count_preds)
1564
        {
1565
          int old_start = start;
1566
 
1567
          start = end - 1;
1568
          end = old_start - 1;
1569
          step = -1;
1570
        }
1571
    }
1572
  else /* psp is empty && pss is empty.  */
1573
    {
1574
      start = SCHED_ASAP (u_node);
1575
      end = start + ii;
1576
      step = 1;
1577
    }
1578
 
1579
  *start_p = start;
1580
  *step_p = step;
1581
  *end_p = end;
1582
  sbitmap_free (psp);
1583
  sbitmap_free (pss);
1584
 
1585
  if ((start >= end && step == 1) || (start <= end && step == -1))
1586
    {
1587
      if (dump_file)
1588
        fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
1589
                 start, end, step);
1590
    return -1;
1591
    }
1592
 
1593
    return 0;
1594
}
1595
 
1596
/* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
1597
   node currently been scheduled.  At the end of the calculation
1598
   MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
1599
   U_NODE which are (1) already scheduled in the first/last row of
1600
   U_NODE's scheduling window, (2) whose dependence inequality with U
1601
   becomes an equality when U is scheduled in this same row, and (3)
1602
   whose dependence latency is zero.
1603
 
1604
   The first and last rows are calculated using the following parameters:
1605
   START/END rows - The cycles that begins/ends the traversal on the window;
1606
   searching for an empty cycle to schedule U_NODE.
1607
   STEP - The direction in which we traverse the window.
1608
   II - The initiation interval.  */
1609
 
1610
static void
1611
calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
1612
                               int step, int ii, sbitmap sched_nodes,
1613
                               sbitmap must_precede, sbitmap must_follow)
1614
{
1615
  ddg_edge_ptr e;
1616
  int first_cycle_in_window, last_cycle_in_window;
1617
 
1618
  gcc_assert (must_precede && must_follow);
1619
 
1620
  /* Consider the following scheduling window:
1621
     {first_cycle_in_window, first_cycle_in_window+1, ...,
1622
     last_cycle_in_window}.  If step is 1 then the following will be
1623
     the order we traverse the window: {start=first_cycle_in_window,
1624
     first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
1625
     or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
1626
     end=first_cycle_in_window-1} if step is -1.  */
1627
  first_cycle_in_window = (step == 1) ? start : end - step;
1628
  last_cycle_in_window = (step == 1) ? end - step : start;
1629
 
1630
  sbitmap_zero (must_precede);
1631
  sbitmap_zero (must_follow);
1632
 
1633
  if (dump_file)
1634
    fprintf (dump_file, "\nmust_precede: ");
1635
 
1636
  /* Instead of checking if:
1637
      (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
1638
      && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
1639
             first_cycle_in_window)
1640
      && e->latency == 0
1641
     we use the fact that latency is non-negative:
1642
      SCHED_TIME (e->src) - (e->distance * ii) <=
1643
      SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
1644
      first_cycle_in_window
1645
     and check only if
1646
      SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
1647
  for (e = u_node->in; e != 0; e = e->next_in)
1648
    if (TEST_BIT (sched_nodes, e->src->cuid)
1649
        && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
1650
             first_cycle_in_window))
1651
      {
1652
        if (dump_file)
1653
          fprintf (dump_file, "%d ", e->src->cuid);
1654
 
1655
        SET_BIT (must_precede, e->src->cuid);
1656
      }
1657
 
1658
  if (dump_file)
1659
    fprintf (dump_file, "\nmust_follow: ");
1660
 
1661
  /* Instead of checking if:
1662
      (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
1663
      && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
1664
             last_cycle_in_window)
1665
      && e->latency == 0
1666
     we use the fact that latency is non-negative:
1667
      SCHED_TIME (e->dest) + (e->distance * ii) >=
1668
      SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
1669
      last_cycle_in_window
1670
     and check only if
1671
      SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
1672
  for (e = u_node->out; e != 0; e = e->next_out)
1673
    if (TEST_BIT (sched_nodes, e->dest->cuid)
1674
        && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
1675
             last_cycle_in_window))
1676
      {
1677
        if (dump_file)
1678
          fprintf (dump_file, "%d ", e->dest->cuid);
1679
 
1680
        SET_BIT (must_follow, e->dest->cuid);
1681
      }
1682
 
1683
  if (dump_file)
1684
    fprintf (dump_file, "\n");
1685
}
1686
 
1687
/* Return 1 if U_NODE can be scheduled in CYCLE.  Use the following
1688
   parameters to decide if that's possible:
1689
   PS - The partial schedule.
1690
   U - The serial number of U_NODE.
1691
   NUM_SPLITS - The number of row splits made so far.
1692
   MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
1693
   the first row of the scheduling window)
1694
   MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
1695
   last row of the scheduling window)  */
1696
 
1697
static bool
1698
try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
1699
                              int u, int cycle, sbitmap sched_nodes,
1700
                              int *num_splits, sbitmap must_precede,
1701
                              sbitmap must_follow)
1702
{
1703
  ps_insn_ptr psi;
1704
  bool success = 0;
1705
 
1706
  verify_partial_schedule (ps, sched_nodes);
1707
  psi = ps_add_node_check_conflicts (ps, u_node, cycle,
1708
                                     must_precede, must_follow);
1709
  if (psi)
1710
    {
1711
      SCHED_TIME (u_node) = cycle;
1712
      SET_BIT (sched_nodes, u);
1713
      success = 1;
1714
      *num_splits = 0;
1715
      if (dump_file)
1716
        fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
1717
 
1718
    }
1719
 
1720
  return success;
1721
}
1722
 
1723
/* This function implements the scheduling algorithm for SMS according to the
1724
   above algorithm.  */
1725
static partial_schedule_ptr
1726
sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1727
{
1728
  int ii = mii;
1729
  int i, c, success, num_splits = 0;
1730
  int flush_and_start_over = true;
1731
  int num_nodes = g->num_nodes;
1732
  int start, end, step; /* Place together into one struct?  */
1733
  sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1734
  sbitmap must_precede = sbitmap_alloc (num_nodes);
1735
  sbitmap must_follow = sbitmap_alloc (num_nodes);
1736
  sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1737
 
1738
  partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1739
 
1740
  sbitmap_ones (tobe_scheduled);
1741
  sbitmap_zero (sched_nodes);
1742
 
1743
  while (flush_and_start_over && (ii < maxii))
1744
    {
1745
 
1746
      if (dump_file)
1747
        fprintf (dump_file, "Starting with ii=%d\n", ii);
1748
      flush_and_start_over = false;
1749
      sbitmap_zero (sched_nodes);
1750
 
1751
      for (i = 0; i < num_nodes; i++)
1752
        {
1753
          int u = nodes_order[i];
1754
          ddg_node_ptr u_node = &ps->g->nodes[u];
1755
          rtx insn = u_node->insn;
1756
 
1757
          if (!NONDEBUG_INSN_P (insn))
1758
            {
1759
              RESET_BIT (tobe_scheduled, u);
1760
              continue;
1761
            }
1762
 
1763
          if (JUMP_P (insn)) /* Closing branch handled later.  */
1764
            {
1765
              RESET_BIT (tobe_scheduled, u);
1766
              continue;
1767
            }
1768
 
1769
          if (TEST_BIT (sched_nodes, u))
1770
            continue;
1771
 
1772
          /* Try to get non-empty scheduling window.  */
1773
         success = 0;
1774
         if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
1775
                                &step, &end) == 0)
1776
            {
1777
              if (dump_file)
1778
                fprintf (dump_file, "\nTrying to schedule node %d \
1779
                        INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
1780
                        (g->nodes[u].insn)), start, end, step);
1781
 
1782
              gcc_assert ((step > 0 && start < end)
1783
                          || (step < 0 && start > end));
1784
 
1785
              calculate_must_precede_follow (u_node, start, end, step, ii,
1786
                                             sched_nodes, must_precede,
1787
                                             must_follow);
1788
 
1789
              for (c = start; c != end; c += step)
1790
                {
1791
                  sbitmap tmp_precede = NULL;
1792
                  sbitmap tmp_follow = NULL;
1793
 
1794
                  if (c == start)
1795
                    {
1796
                      if (step == 1)
1797
                        tmp_precede = must_precede;
1798
                      else      /* step == -1.  */
1799
                        tmp_follow = must_follow;
1800
                    }
1801
                  if (c == end - step)
1802
                    {
1803
                      if (step == 1)
1804
                        tmp_follow = must_follow;
1805
                      else      /* step == -1.  */
1806
                        tmp_precede = must_precede;
1807
                    }
1808
 
1809
                  success =
1810
                    try_scheduling_node_in_cycle (ps, u_node, u, c,
1811
                                                  sched_nodes,
1812
                                                  &num_splits, tmp_precede,
1813
                                                  tmp_follow);
1814
                  if (success)
1815
                    break;
1816
                }
1817
 
1818
              verify_partial_schedule (ps, sched_nodes);
1819
            }
1820
            if (!success)
1821
            {
1822
              int split_row;
1823
 
1824
              if (ii++ == maxii)
1825
                break;
1826
 
1827
              if (num_splits >= MAX_SPLIT_NUM)
1828
                {
1829
                  num_splits = 0;
1830
                  flush_and_start_over = true;
1831
                  verify_partial_schedule (ps, sched_nodes);
1832
                  reset_partial_schedule (ps, ii);
1833
                  verify_partial_schedule (ps, sched_nodes);
1834
                  break;
1835
                }
1836
 
1837
              num_splits++;
1838
              /* The scheduling window is exclusive of 'end'
1839
                 whereas compute_split_window() expects an inclusive,
1840
                 ordered range.  */
1841
              if (step == 1)
1842
                split_row = compute_split_row (sched_nodes, start, end - 1,
1843
                                               ps->ii, u_node);
1844
              else
1845
                split_row = compute_split_row (sched_nodes, end + 1, start,
1846
                                               ps->ii, u_node);
1847
 
1848
              ps_insert_empty_row (ps, split_row, sched_nodes);
1849
              i--;              /* Go back and retry node i.  */
1850
 
1851
              if (dump_file)
1852
                fprintf (dump_file, "num_splits=%d\n", num_splits);
1853
            }
1854
 
1855
          /* ??? If (success), check register pressure estimates.  */
1856
        }                       /* Continue with next node.  */
1857
    }                           /* While flush_and_start_over.  */
1858
  if (ii >= maxii)
1859
    {
1860
      free_partial_schedule (ps);
1861
      ps = NULL;
1862
    }
1863
  else
1864
    gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
1865
 
1866
  sbitmap_free (sched_nodes);
1867
  sbitmap_free (must_precede);
1868
  sbitmap_free (must_follow);
1869
  sbitmap_free (tobe_scheduled);
1870
 
1871
  return ps;
1872
}
1873
 
1874
/* This function inserts a new empty row into PS at the position
1875
   according to SPLITROW, keeping all already scheduled instructions
1876
   intact and updating their SCHED_TIME and cycle accordingly.  */
1877
static void
1878
ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
1879
                     sbitmap sched_nodes)
1880
{
1881
  ps_insn_ptr crr_insn;
1882
  ps_insn_ptr *rows_new;
1883
  int ii = ps->ii;
1884
  int new_ii = ii + 1;
1885
  int row;
1886
 
1887
  verify_partial_schedule (ps, sched_nodes);
1888
 
1889
  /* We normalize sched_time and rotate ps to have only non-negative sched
1890
     times, for simplicity of updating cycles after inserting new row.  */
1891
  split_row -= ps->min_cycle;
1892
  split_row = SMODULO (split_row, ii);
1893
  if (dump_file)
1894
    fprintf (dump_file, "split_row=%d\n", split_row);
1895
 
1896
  normalize_sched_times (ps);
1897
  rotate_partial_schedule (ps, ps->min_cycle);
1898
 
1899
  rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
1900
  for (row = 0; row < split_row; row++)
1901
    {
1902
      rows_new[row] = ps->rows[row];
1903
      ps->rows[row] = NULL;
1904
      for (crr_insn = rows_new[row];
1905
           crr_insn; crr_insn = crr_insn->next_in_row)
1906
        {
1907
          ddg_node_ptr u = crr_insn->node;
1908
          int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
1909
 
1910
          SCHED_TIME (u) = new_time;
1911
          crr_insn->cycle = new_time;
1912
          SCHED_ROW (u) = new_time % new_ii;
1913
          SCHED_STAGE (u) = new_time / new_ii;
1914
        }
1915
 
1916
    }
1917
 
1918
  rows_new[split_row] = NULL;
1919
 
1920
  for (row = split_row; row < ii; row++)
1921
    {
1922
      rows_new[row + 1] = ps->rows[row];
1923
      ps->rows[row] = NULL;
1924
      for (crr_insn = rows_new[row + 1];
1925
           crr_insn; crr_insn = crr_insn->next_in_row)
1926
        {
1927
          ddg_node_ptr u = crr_insn->node;
1928
          int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
1929
 
1930
          SCHED_TIME (u) = new_time;
1931
          crr_insn->cycle = new_time;
1932
          SCHED_ROW (u) = new_time % new_ii;
1933
          SCHED_STAGE (u) = new_time / new_ii;
1934
        }
1935
    }
1936
 
1937
  /* Updating ps.  */
1938
  ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
1939
    + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
1940
  ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
1941
    + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
1942
  free (ps->rows);
1943
  ps->rows = rows_new;
1944
  ps->ii = new_ii;
1945
  gcc_assert (ps->min_cycle >= 0);
1946
 
1947
  verify_partial_schedule (ps, sched_nodes);
1948
 
1949
  if (dump_file)
1950
    fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
1951
             ps->max_cycle);
1952
}
1953
 
1954
/* Given U_NODE which is the node that failed to be scheduled; LOW and
1955
   UP which are the boundaries of it's scheduling window; compute using
1956
   SCHED_NODES and II a row in the partial schedule that can be split
1957
   which will separate a critical predecessor from a critical successor
1958
   thereby expanding the window, and return it.  */
1959
static int
1960
compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
1961
                   ddg_node_ptr u_node)
1962
{
1963
  ddg_edge_ptr e;
1964
  int lower = INT_MIN, upper = INT_MAX;
1965
  ddg_node_ptr crit_pred = NULL;
1966
  ddg_node_ptr crit_succ = NULL;
1967
  int crit_cycle;
1968
 
1969
  for (e = u_node->in; e != 0; e = e->next_in)
1970
    {
1971
      ddg_node_ptr v_node = e->src;
1972
 
1973
      if (TEST_BIT (sched_nodes, v_node->cuid)
1974
          && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
1975
        if (SCHED_TIME (v_node) > lower)
1976
          {
1977
            crit_pred = v_node;
1978
            lower = SCHED_TIME (v_node);
1979
          }
1980
    }
1981
 
1982
  if (crit_pred != NULL)
1983
    {
1984
      crit_cycle = SCHED_TIME (crit_pred) + 1;
1985
      return SMODULO (crit_cycle, ii);
1986
    }
1987
 
1988
  for (e = u_node->out; e != 0; e = e->next_out)
1989
    {
1990
      ddg_node_ptr v_node = e->dest;
1991
      if (TEST_BIT (sched_nodes, v_node->cuid)
1992
          && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
1993
        if (SCHED_TIME (v_node) < upper)
1994
          {
1995
            crit_succ = v_node;
1996
            upper = SCHED_TIME (v_node);
1997
          }
1998
    }
1999
 
2000
  if (crit_succ != NULL)
2001
    {
2002
      crit_cycle = SCHED_TIME (crit_succ);
2003
      return SMODULO (crit_cycle, ii);
2004
    }
2005
 
2006
  if (dump_file)
2007
    fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2008
 
2009
  return SMODULO ((low + up + 1) / 2, ii);
2010
}
2011
 
2012
static void
2013
verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2014
{
2015
  int row;
2016
  ps_insn_ptr crr_insn;
2017
 
2018
  for (row = 0; row < ps->ii; row++)
2019
    for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2020
      {
2021
        ddg_node_ptr u = crr_insn->node;
2022
 
2023
        gcc_assert (TEST_BIT (sched_nodes, u->cuid));
2024
        /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2025
           popcount (sched_nodes) == number of insns in ps.  */
2026
        gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2027
        gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2028
      }
2029
}
2030
 
2031
 
2032
/* This page implements the algorithm for ordering the nodes of a DDG
2033
   for modulo scheduling, activated through the
2034
   "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
2035
 
2036
#define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2037
#define ASAP(x) (ORDER_PARAMS ((x))->asap)
2038
#define ALAP(x) (ORDER_PARAMS ((x))->alap)
2039
#define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2040
#define MOB(x) (ALAP ((x)) - ASAP ((x)))
2041
#define DEPTH(x) (ASAP ((x)))
2042
 
2043
typedef struct node_order_params * nopa;
2044
 
2045
static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2046
static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2047
static nopa  calculate_order_params (ddg_ptr, int, int *);
2048
static int find_max_asap (ddg_ptr, sbitmap);
2049
static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2050
static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2051
 
2052
enum sms_direction {BOTTOMUP, TOPDOWN};
2053
 
2054
struct node_order_params
2055
{
2056
  int asap;
2057
  int alap;
2058
  int height;
2059
};
2060
 
2061
/* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
2062
static void
2063
check_nodes_order (int *node_order, int num_nodes)
2064
{
2065
  int i;
2066
  sbitmap tmp = sbitmap_alloc (num_nodes);
2067
 
2068
  sbitmap_zero (tmp);
2069
 
2070
  if (dump_file)
2071
    fprintf (dump_file, "SMS final nodes order: \n");
2072
 
2073
  for (i = 0; i < num_nodes; i++)
2074
    {
2075
      int u = node_order[i];
2076
 
2077
      if (dump_file)
2078
        fprintf (dump_file, "%d ", u);
2079
      gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2080
 
2081
      SET_BIT (tmp, u);
2082
    }
2083
 
2084
  if (dump_file)
2085
    fprintf (dump_file, "\n");
2086
 
2087
  sbitmap_free (tmp);
2088
}
2089
 
2090
/* Order the nodes of G for scheduling and pass the result in
2091
   NODE_ORDER.  Also set aux.count of each node to ASAP.
2092
   Put maximal ASAP to PMAX_ASAP.  Return the recMII for the given DDG.  */
2093
static int
2094
sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2095
{
2096
  int i;
2097
  int rec_mii = 0;
2098
  ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2099
 
2100
  nopa nops = calculate_order_params (g, mii, pmax_asap);
2101
 
2102
  if (dump_file)
2103
    print_sccs (dump_file, sccs, g);
2104
 
2105
  order_nodes_of_sccs (sccs, node_order);
2106
 
2107
  if (sccs->num_sccs > 0)
2108
    /* First SCC has the largest recurrence_length.  */
2109
    rec_mii = sccs->sccs[0]->recurrence_length;
2110
 
2111
  /* Save ASAP before destroying node_order_params.  */
2112
  for (i = 0; i < g->num_nodes; i++)
2113
    {
2114
      ddg_node_ptr v = &g->nodes[i];
2115
      v->aux.count = ASAP (v);
2116
    }
2117
 
2118
  free (nops);
2119
  free_ddg_all_sccs (sccs);
2120
  check_nodes_order (node_order, g->num_nodes);
2121
 
2122
  return rec_mii;
2123
}
2124
 
2125
static void
2126
order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2127
{
2128
  int i, pos = 0;
2129
  ddg_ptr g = all_sccs->ddg;
2130
  int num_nodes = g->num_nodes;
2131
  sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2132
  sbitmap on_path = sbitmap_alloc (num_nodes);
2133
  sbitmap tmp = sbitmap_alloc (num_nodes);
2134
  sbitmap ones = sbitmap_alloc (num_nodes);
2135
 
2136
  sbitmap_zero (prev_sccs);
2137
  sbitmap_ones (ones);
2138
 
2139
  /* Perform the node ordering starting from the SCC with the highest recMII.
2140
     For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
2141
  for (i = 0; i < all_sccs->num_sccs; i++)
2142
    {
2143
      ddg_scc_ptr scc = all_sccs->sccs[i];
2144
 
2145
      /* Add nodes on paths from previous SCCs to the current SCC.  */
2146
      find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2147
      sbitmap_a_or_b (tmp, scc->nodes, on_path);
2148
 
2149
      /* Add nodes on paths from the current SCC to previous SCCs.  */
2150
      find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2151
      sbitmap_a_or_b (tmp, tmp, on_path);
2152
 
2153
      /* Remove nodes of previous SCCs from current extended SCC.  */
2154
      sbitmap_difference (tmp, tmp, prev_sccs);
2155
 
2156
      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2157
      /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
2158
    }
2159
 
2160
  /* Handle the remaining nodes that do not belong to any scc.  Each call
2161
     to order_nodes_in_scc handles a single connected component.  */
2162
  while (pos < g->num_nodes)
2163
    {
2164
      sbitmap_difference (tmp, ones, prev_sccs);
2165
      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2166
    }
2167
  sbitmap_free (prev_sccs);
2168
  sbitmap_free (on_path);
2169
  sbitmap_free (tmp);
2170
  sbitmap_free (ones);
2171
}
2172
 
2173
/* MII is needed if we consider backarcs (that do not close recursive cycles).  */
2174
static struct node_order_params *
2175
calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2176
{
2177
  int u;
2178
  int max_asap;
2179
  int num_nodes = g->num_nodes;
2180
  ddg_edge_ptr e;
2181
  /* Allocate a place to hold ordering params for each node in the DDG.  */
2182
  nopa node_order_params_arr;
2183
 
2184
  /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
2185
  node_order_params_arr = (nopa) xcalloc (num_nodes,
2186
                                          sizeof (struct node_order_params));
2187
 
2188
  /* Set the aux pointer of each node to point to its order_params structure.  */
2189
  for (u = 0; u < num_nodes; u++)
2190
    g->nodes[u].aux.info = &node_order_params_arr[u];
2191
 
2192
  /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2193
     calculate ASAP, ALAP, mobility, distance, and height for each node
2194
     in the dependence (direct acyclic) graph.  */
2195
 
2196
  /* We assume that the nodes in the array are in topological order.  */
2197
 
2198
  max_asap = 0;
2199
  for (u = 0; u < num_nodes; u++)
2200
    {
2201
      ddg_node_ptr u_node = &g->nodes[u];
2202
 
2203
      ASAP (u_node) = 0;
2204
      for (e = u_node->in; e; e = e->next_in)
2205
        if (e->distance == 0)
2206
          ASAP (u_node) = MAX (ASAP (u_node),
2207
                               ASAP (e->src) + e->latency);
2208
      max_asap = MAX (max_asap, ASAP (u_node));
2209
    }
2210
 
2211
  for (u = num_nodes - 1; u > -1; u--)
2212
    {
2213
      ddg_node_ptr u_node = &g->nodes[u];
2214
 
2215
      ALAP (u_node) = max_asap;
2216
      HEIGHT (u_node) = 0;
2217
      for (e = u_node->out; e; e = e->next_out)
2218
        if (e->distance == 0)
2219
          {
2220
            ALAP (u_node) = MIN (ALAP (u_node),
2221
                                 ALAP (e->dest) - e->latency);
2222
            HEIGHT (u_node) = MAX (HEIGHT (u_node),
2223
                                   HEIGHT (e->dest) + e->latency);
2224
          }
2225
    }
2226
  if (dump_file)
2227
  {
2228
    fprintf (dump_file, "\nOrder params\n");
2229
    for (u = 0; u < num_nodes; u++)
2230
      {
2231
        ddg_node_ptr u_node = &g->nodes[u];
2232
 
2233
        fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2234
                 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2235
      }
2236
  }
2237
 
2238
  *pmax_asap = max_asap;
2239
  return node_order_params_arr;
2240
}
2241
 
2242
static int
2243
find_max_asap (ddg_ptr g, sbitmap nodes)
2244
{
2245
  unsigned int u = 0;
2246
  int max_asap = -1;
2247
  int result = -1;
2248
  sbitmap_iterator sbi;
2249
 
2250
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2251
    {
2252
      ddg_node_ptr u_node = &g->nodes[u];
2253
 
2254
      if (max_asap < ASAP (u_node))
2255
        {
2256
          max_asap = ASAP (u_node);
2257
          result = u;
2258
        }
2259
    }
2260
  return result;
2261
}
2262
 
2263
static int
2264
find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2265
{
2266
  unsigned int u = 0;
2267
  int max_hv = -1;
2268
  int min_mob = INT_MAX;
2269
  int result = -1;
2270
  sbitmap_iterator sbi;
2271
 
2272
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2273
    {
2274
      ddg_node_ptr u_node = &g->nodes[u];
2275
 
2276
      if (max_hv < HEIGHT (u_node))
2277
        {
2278
          max_hv = HEIGHT (u_node);
2279
          min_mob = MOB (u_node);
2280
          result = u;
2281
        }
2282
      else if ((max_hv == HEIGHT (u_node))
2283
               && (min_mob > MOB (u_node)))
2284
        {
2285
          min_mob = MOB (u_node);
2286
          result = u;
2287
        }
2288
    }
2289
  return result;
2290
}
2291
 
2292
static int
2293
find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2294
{
2295
  unsigned int u = 0;
2296
  int max_dv = -1;
2297
  int min_mob = INT_MAX;
2298
  int result = -1;
2299
  sbitmap_iterator sbi;
2300
 
2301
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2302
    {
2303
      ddg_node_ptr u_node = &g->nodes[u];
2304
 
2305
      if (max_dv < DEPTH (u_node))
2306
        {
2307
          max_dv = DEPTH (u_node);
2308
          min_mob = MOB (u_node);
2309
          result = u;
2310
        }
2311
      else if ((max_dv == DEPTH (u_node))
2312
               && (min_mob > MOB (u_node)))
2313
        {
2314
          min_mob = MOB (u_node);
2315
          result = u;
2316
        }
2317
    }
2318
  return result;
2319
}
2320
 
2321
/* Places the nodes of SCC into the NODE_ORDER array starting
2322
   at position POS, according to the SMS ordering algorithm.
2323
   NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2324
   the NODE_ORDER array, starting from position zero.  */
2325
static int
2326
order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2327
                    int * node_order, int pos)
2328
{
2329
  enum sms_direction dir;
2330
  int num_nodes = g->num_nodes;
2331
  sbitmap workset = sbitmap_alloc (num_nodes);
2332
  sbitmap tmp = sbitmap_alloc (num_nodes);
2333
  sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2334
  sbitmap predecessors = sbitmap_alloc (num_nodes);
2335
  sbitmap successors = sbitmap_alloc (num_nodes);
2336
 
2337
  sbitmap_zero (predecessors);
2338
  find_predecessors (predecessors, g, nodes_ordered);
2339
 
2340
  sbitmap_zero (successors);
2341
  find_successors (successors, g, nodes_ordered);
2342
 
2343
  sbitmap_zero (tmp);
2344
  if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2345
    {
2346
      sbitmap_copy (workset, tmp);
2347
      dir = BOTTOMUP;
2348
    }
2349
  else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2350
    {
2351
      sbitmap_copy (workset, tmp);
2352
      dir = TOPDOWN;
2353
    }
2354
  else
2355
    {
2356
      int u;
2357
 
2358
      sbitmap_zero (workset);
2359
      if ((u = find_max_asap (g, scc)) >= 0)
2360
        SET_BIT (workset, u);
2361
      dir = BOTTOMUP;
2362
    }
2363
 
2364
  sbitmap_zero (zero_bitmap);
2365
  while (!sbitmap_equal (workset, zero_bitmap))
2366
    {
2367
      int v;
2368
      ddg_node_ptr v_node;
2369
      sbitmap v_node_preds;
2370
      sbitmap v_node_succs;
2371
 
2372
      if (dir == TOPDOWN)
2373
        {
2374
          while (!sbitmap_equal (workset, zero_bitmap))
2375
            {
2376
              v = find_max_hv_min_mob (g, workset);
2377
              v_node = &g->nodes[v];
2378
              node_order[pos++] = v;
2379
              v_node_succs = NODE_SUCCESSORS (v_node);
2380
              sbitmap_a_and_b (tmp, v_node_succs, scc);
2381
 
2382
              /* Don't consider the already ordered successors again.  */
2383
              sbitmap_difference (tmp, tmp, nodes_ordered);
2384
              sbitmap_a_or_b (workset, workset, tmp);
2385
              RESET_BIT (workset, v);
2386
              SET_BIT (nodes_ordered, v);
2387
            }
2388
          dir = BOTTOMUP;
2389
          sbitmap_zero (predecessors);
2390
          find_predecessors (predecessors, g, nodes_ordered);
2391
          sbitmap_a_and_b (workset, predecessors, scc);
2392
        }
2393
      else
2394
        {
2395
          while (!sbitmap_equal (workset, zero_bitmap))
2396
            {
2397
              v = find_max_dv_min_mob (g, workset);
2398
              v_node = &g->nodes[v];
2399
              node_order[pos++] = v;
2400
              v_node_preds = NODE_PREDECESSORS (v_node);
2401
              sbitmap_a_and_b (tmp, v_node_preds, scc);
2402
 
2403
              /* Don't consider the already ordered predecessors again.  */
2404
              sbitmap_difference (tmp, tmp, nodes_ordered);
2405
              sbitmap_a_or_b (workset, workset, tmp);
2406
              RESET_BIT (workset, v);
2407
              SET_BIT (nodes_ordered, v);
2408
            }
2409
          dir = TOPDOWN;
2410
          sbitmap_zero (successors);
2411
          find_successors (successors, g, nodes_ordered);
2412
          sbitmap_a_and_b (workset, successors, scc);
2413
        }
2414
    }
2415
  sbitmap_free (tmp);
2416
  sbitmap_free (workset);
2417
  sbitmap_free (zero_bitmap);
2418
  sbitmap_free (predecessors);
2419
  sbitmap_free (successors);
2420
  return pos;
2421
}
2422
 
2423
 
2424
/* This page contains functions for manipulating partial-schedules during
2425
   modulo scheduling.  */
2426
 
2427
/* Create a partial schedule and allocate a memory to hold II rows.  */
2428
 
2429
static partial_schedule_ptr
2430
create_partial_schedule (int ii, ddg_ptr g, int history)
2431
{
2432
  partial_schedule_ptr ps = XNEW (struct partial_schedule);
2433
  ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2434
  ps->ii = ii;
2435
  ps->history = history;
2436
  ps->min_cycle = INT_MAX;
2437
  ps->max_cycle = INT_MIN;
2438
  ps->g = g;
2439
 
2440
  return ps;
2441
}
2442
 
2443
/* Free the PS_INSNs in rows array of the given partial schedule.
2444
   ??? Consider caching the PS_INSN's.  */
2445
static void
2446
free_ps_insns (partial_schedule_ptr ps)
2447
{
2448
  int i;
2449
 
2450
  for (i = 0; i < ps->ii; i++)
2451
    {
2452
      while (ps->rows[i])
2453
        {
2454
          ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2455
 
2456
          free (ps->rows[i]);
2457
          ps->rows[i] = ps_insn;
2458
        }
2459
      ps->rows[i] = NULL;
2460
    }
2461
}
2462
 
2463
/* Free all the memory allocated to the partial schedule.  */
2464
 
2465
static void
2466
free_partial_schedule (partial_schedule_ptr ps)
2467
{
2468
  if (!ps)
2469
    return;
2470
  free_ps_insns (ps);
2471
  free (ps->rows);
2472
  free (ps);
2473
}
2474
 
2475
/* Clear the rows array with its PS_INSNs, and create a new one with
2476
   NEW_II rows.  */
2477
 
2478
static void
2479
reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2480
{
2481
  if (!ps)
2482
    return;
2483
  free_ps_insns (ps);
2484
  if (new_ii == ps->ii)
2485
    return;
2486
  ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2487
                                                 * sizeof (ps_insn_ptr));
2488
  memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2489
  ps->ii = new_ii;
2490
  ps->min_cycle = INT_MAX;
2491
  ps->max_cycle = INT_MIN;
2492
}
2493
 
2494
/* Prints the partial schedule as an ii rows array, for each rows
2495
   print the ids of the insns in it.  */
2496
void
2497
print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2498
{
2499
  int i;
2500
 
2501
  for (i = 0; i < ps->ii; i++)
2502
    {
2503
      ps_insn_ptr ps_i = ps->rows[i];
2504
 
2505
      fprintf (dump, "\n[ROW %d ]: ", i);
2506
      while (ps_i)
2507
        {
2508
          fprintf (dump, "%d, ",
2509
                   INSN_UID (ps_i->node->insn));
2510
          ps_i = ps_i->next_in_row;
2511
        }
2512
    }
2513
}
2514
 
2515
/* Creates an object of PS_INSN and initializes it to the given parameters.  */
2516
static ps_insn_ptr
2517
create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2518
{
2519
  ps_insn_ptr ps_i = XNEW (struct ps_insn);
2520
 
2521
  ps_i->node = node;
2522
  ps_i->next_in_row = NULL;
2523
  ps_i->prev_in_row = NULL;
2524
  ps_i->row_rest_count = rest_count;
2525
  ps_i->cycle = cycle;
2526
 
2527
  return ps_i;
2528
}
2529
 
2530
 
2531
/* Removes the given PS_INSN from the partial schedule.  Returns false if the
2532
   node is not found in the partial schedule, else returns true.  */
2533
static bool
2534
remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2535
{
2536
  int row;
2537
 
2538
  if (!ps || !ps_i)
2539
    return false;
2540
 
2541
  row = SMODULO (ps_i->cycle, ps->ii);
2542
  if (! ps_i->prev_in_row)
2543
    {
2544
      if (ps_i != ps->rows[row])
2545
        return false;
2546
 
2547
      ps->rows[row] = ps_i->next_in_row;
2548
      if (ps->rows[row])
2549
        ps->rows[row]->prev_in_row = NULL;
2550
    }
2551
  else
2552
    {
2553
      ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2554
      if (ps_i->next_in_row)
2555
        ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2556
    }
2557
  free (ps_i);
2558
  return true;
2559
}
2560
 
2561
/* Unlike what literature describes for modulo scheduling (which focuses
2562
   on VLIW machines) the order of the instructions inside a cycle is
2563
   important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2564
   where the current instruction should go relative to the already
2565
   scheduled instructions in the given cycle.  Go over these
2566
   instructions and find the first possible column to put it in.  */
2567
static bool
2568
ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2569
                     sbitmap must_precede, sbitmap must_follow)
2570
{
2571
  ps_insn_ptr next_ps_i;
2572
  ps_insn_ptr first_must_follow = NULL;
2573
  ps_insn_ptr last_must_precede = NULL;
2574
  int row;
2575
 
2576
  if (! ps_i)
2577
    return false;
2578
 
2579
  row = SMODULO (ps_i->cycle, ps->ii);
2580
 
2581
  /* Find the first must follow and the last must precede
2582
     and insert the node immediately after the must precede
2583
     but make sure that it there is no must follow after it.  */
2584
  for (next_ps_i = ps->rows[row];
2585
       next_ps_i;
2586
       next_ps_i = next_ps_i->next_in_row)
2587
    {
2588
      if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
2589
          && ! first_must_follow)
2590
        first_must_follow = next_ps_i;
2591
      if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
2592
        {
2593
          /* If we have already met a node that must follow, then
2594
             there is no possible column.  */
2595
          if (first_must_follow)
2596
            return false;
2597
          else
2598
            last_must_precede = next_ps_i;
2599
        }
2600
    }
2601
 
2602
  /* Now insert the node after INSERT_AFTER_PSI.  */
2603
 
2604
  if (! last_must_precede)
2605
    {
2606
      ps_i->next_in_row = ps->rows[row];
2607
      ps_i->prev_in_row = NULL;
2608
      if (ps_i->next_in_row)
2609
        ps_i->next_in_row->prev_in_row = ps_i;
2610
      ps->rows[row] = ps_i;
2611
    }
2612
  else
2613
    {
2614
      ps_i->next_in_row = last_must_precede->next_in_row;
2615
      last_must_precede->next_in_row = ps_i;
2616
      ps_i->prev_in_row = last_must_precede;
2617
      if (ps_i->next_in_row)
2618
        ps_i->next_in_row->prev_in_row = ps_i;
2619
    }
2620
 
2621
  return true;
2622
}
2623
 
2624
/* Advances the PS_INSN one column in its current row; returns false
2625
   in failure and true in success.  Bit N is set in MUST_FOLLOW if
2626
   the node with cuid N must be come after the node pointed to by
2627
   PS_I when scheduled in the same cycle.  */
2628
static int
2629
ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2630
                        sbitmap must_follow)
2631
{
2632
  ps_insn_ptr prev, next;
2633
  int row;
2634
  ddg_node_ptr next_node;
2635
 
2636
  if (!ps || !ps_i)
2637
    return false;
2638
 
2639
  row = SMODULO (ps_i->cycle, ps->ii);
2640
 
2641
  if (! ps_i->next_in_row)
2642
    return false;
2643
 
2644
  next_node = ps_i->next_in_row->node;
2645
 
2646
  /* Check if next_in_row is dependent on ps_i, both having same sched
2647
     times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
2648
  if (must_follow && TEST_BIT (must_follow, next_node->cuid))
2649
    return false;
2650
 
2651
  /* Advance PS_I over its next_in_row in the doubly linked list.  */
2652
  prev = ps_i->prev_in_row;
2653
  next = ps_i->next_in_row;
2654
 
2655
  if (ps_i == ps->rows[row])
2656
    ps->rows[row] = next;
2657
 
2658
  ps_i->next_in_row = next->next_in_row;
2659
 
2660
  if (next->next_in_row)
2661
    next->next_in_row->prev_in_row = ps_i;
2662
 
2663
  next->next_in_row = ps_i;
2664
  ps_i->prev_in_row = next;
2665
 
2666
  next->prev_in_row = prev;
2667
  if (prev)
2668
    prev->next_in_row = next;
2669
 
2670
  return true;
2671
}
2672
 
2673
/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2674
   Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
2675
   set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2676
   before/after (respectively) the node pointed to by PS_I when scheduled
2677
   in the same cycle.  */
2678
static ps_insn_ptr
2679
add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2680
                sbitmap must_precede, sbitmap must_follow)
2681
{
2682
  ps_insn_ptr ps_i;
2683
  int rest_count = 1;
2684
  int row = SMODULO (cycle, ps->ii);
2685
 
2686
  if (ps->rows[row]
2687
      && ps->rows[row]->row_rest_count >= issue_rate)
2688
    return NULL;
2689
 
2690
  if (ps->rows[row])
2691
    rest_count += ps->rows[row]->row_rest_count;
2692
 
2693
  ps_i = create_ps_insn (node, rest_count, cycle);
2694
 
2695
  /* Finds and inserts PS_I according to MUST_FOLLOW and
2696
     MUST_PRECEDE.  */
2697
  if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2698
    {
2699
      free (ps_i);
2700
      return NULL;
2701
    }
2702
 
2703
  return ps_i;
2704
}
2705
 
2706
/* Advance time one cycle.  Assumes DFA is being used.  */
2707
static void
2708
advance_one_cycle (void)
2709
{
2710
  if (targetm.sched.dfa_pre_cycle_insn)
2711
    state_transition (curr_state,
2712
                      targetm.sched.dfa_pre_cycle_insn ());
2713
 
2714
  state_transition (curr_state, NULL);
2715
 
2716
  if (targetm.sched.dfa_post_cycle_insn)
2717
    state_transition (curr_state,
2718
                      targetm.sched.dfa_post_cycle_insn ());
2719
}
2720
 
2721
 
2722
 
2723
/* Checks if PS has resource conflicts according to DFA, starting from
2724
   FROM cycle to TO cycle; returns true if there are conflicts and false
2725
   if there are no conflicts.  Assumes DFA is being used.  */
2726
static int
2727
ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2728
{
2729
  int cycle;
2730
 
2731
  state_reset (curr_state);
2732
 
2733
  for (cycle = from; cycle <= to; cycle++)
2734
    {
2735
      ps_insn_ptr crr_insn;
2736
      /* Holds the remaining issue slots in the current row.  */
2737
      int can_issue_more = issue_rate;
2738
 
2739
      /* Walk through the DFA for the current row.  */
2740
      for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2741
           crr_insn;
2742
           crr_insn = crr_insn->next_in_row)
2743
        {
2744
          rtx insn = crr_insn->node->insn;
2745
 
2746
          if (!NONDEBUG_INSN_P (insn))
2747
            continue;
2748
 
2749
          /* Check if there is room for the current insn.  */
2750
          if (!can_issue_more || state_dead_lock_p (curr_state))
2751
            return true;
2752
 
2753
          /* Update the DFA state and return with failure if the DFA found
2754
             resource conflicts.  */
2755
          if (state_transition (curr_state, insn) >= 0)
2756
            return true;
2757
 
2758
          if (targetm.sched.variable_issue)
2759
            can_issue_more =
2760
              targetm.sched.variable_issue (sched_dump, sched_verbose,
2761
                                            insn, can_issue_more);
2762
          /* A naked CLOBBER or USE generates no instruction, so don't
2763
             let them consume issue slots.  */
2764
          else if (GET_CODE (PATTERN (insn)) != USE
2765
                   && GET_CODE (PATTERN (insn)) != CLOBBER)
2766
            can_issue_more--;
2767
        }
2768
 
2769
      /* Advance the DFA to the next cycle.  */
2770
      advance_one_cycle ();
2771
    }
2772
  return false;
2773
}
2774
 
2775
/* Checks if the given node causes resource conflicts when added to PS at
2776
   cycle C.  If not the node is added to PS and returned; otherwise zero
2777
   is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2778
   cuid N must be come before/after (respectively) the node pointed to by
2779
   PS_I when scheduled in the same cycle.  */
2780
ps_insn_ptr
2781
ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2782
                             int c, sbitmap must_precede,
2783
                             sbitmap must_follow)
2784
{
2785
  int has_conflicts = 0;
2786
  ps_insn_ptr ps_i;
2787
 
2788
  /* First add the node to the PS, if this succeeds check for
2789
     conflicts, trying different issue slots in the same row.  */
2790
  if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2791
    return NULL; /* Failed to insert the node at the given cycle.  */
2792
 
2793
  has_conflicts = ps_has_conflicts (ps, c, c)
2794
                  || (ps->history > 0
2795
                      && ps_has_conflicts (ps,
2796
                                           c - ps->history,
2797
                                           c + ps->history));
2798
 
2799
  /* Try different issue slots to find one that the given node can be
2800
     scheduled in without conflicts.  */
2801
  while (has_conflicts)
2802
    {
2803
      if (! ps_insn_advance_column (ps, ps_i, must_follow))
2804
        break;
2805
      has_conflicts = ps_has_conflicts (ps, c, c)
2806
                      || (ps->history > 0
2807
                          && ps_has_conflicts (ps,
2808
                                               c - ps->history,
2809
                                               c + ps->history));
2810
    }
2811
 
2812
  if (has_conflicts)
2813
    {
2814
      remove_node_from_ps (ps, ps_i);
2815
      return NULL;
2816
    }
2817
 
2818
  ps->min_cycle = MIN (ps->min_cycle, c);
2819
  ps->max_cycle = MAX (ps->max_cycle, c);
2820
  return ps_i;
2821
}
2822
 
2823
/* Rotate the rows of PS such that insns scheduled at time
2824
   START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
2825
void
2826
rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2827
{
2828
  int i, row, backward_rotates;
2829
  int last_row = ps->ii - 1;
2830
 
2831
  if (start_cycle == 0)
2832
    return;
2833
 
2834
  backward_rotates = SMODULO (start_cycle, ps->ii);
2835
 
2836
  /* Revisit later and optimize this into a single loop.  */
2837
  for (i = 0; i < backward_rotates; i++)
2838
    {
2839
      ps_insn_ptr first_row = ps->rows[0];
2840
 
2841
      for (row = 0; row < last_row; row++)
2842
        ps->rows[row] = ps->rows[row+1];
2843
 
2844
      ps->rows[last_row] = first_row;
2845
    }
2846
 
2847
  ps->max_cycle -= start_cycle;
2848
  ps->min_cycle -= start_cycle;
2849
}
2850
 
2851
#endif /* INSN_SCHEDULING */
2852
 
2853
static bool
2854
gate_handle_sms (void)
2855
{
2856
  return (optimize > 0 && flag_modulo_sched);
2857
}
2858
 
2859
 
2860
/* Run instruction scheduler.  */
2861
/* Perform SMS module scheduling.  */
2862
static unsigned int
2863
rest_of_handle_sms (void)
2864
{
2865
#ifdef INSN_SCHEDULING
2866
  basic_block bb;
2867
 
2868
  /* Collect loop information to be used in SMS.  */
2869
  cfg_layout_initialize (0);
2870
  sms_schedule ();
2871
 
2872
  /* Update the life information, because we add pseudos.  */
2873
  max_regno = max_reg_num ();
2874
 
2875
  /* Finalize layout changes.  */
2876
  FOR_EACH_BB (bb)
2877
    if (bb->next_bb != EXIT_BLOCK_PTR)
2878
      bb->aux = bb->next_bb;
2879
  free_dominance_info (CDI_DOMINATORS);
2880
  cfg_layout_finalize ();
2881
#endif /* INSN_SCHEDULING */
2882
  return 0;
2883
}
2884
 
2885
struct rtl_opt_pass pass_sms =
2886
{
2887
 {
2888
  RTL_PASS,
2889
  "sms",                                /* name */
2890
  gate_handle_sms,                      /* gate */
2891
  rest_of_handle_sms,                   /* execute */
2892
  NULL,                                 /* sub */
2893
  NULL,                                 /* next */
2894
  0,                                    /* static_pass_number */
2895
  TV_SMS,                               /* tv_id */
2896
  0,                                    /* properties_required */
2897
  0,                                    /* properties_provided */
2898
  0,                                    /* properties_destroyed */
2899
  TODO_dump_func,                       /* todo_flags_start */
2900
  TODO_df_finish | TODO_verify_rtl_sharing |
2901
  TODO_dump_func |
2902
  TODO_ggc_collect                      /* todo_flags_finish */
2903
 }
2904
};
2905
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.