OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [modulo-sched.c] - Blame information for rev 833

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Swing Modulo Scheduling implementation.
2
   Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3
   Free Software Foundation, Inc.
4
   Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
5
 
6
This file is part of GCC.
7
 
8
GCC is free software; you can redistribute it and/or modify it under
9
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 3, or (at your option) any later
11
version.
12
 
13
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14
WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16
for more details.
17
 
18
You should have received a copy of the GNU General Public License
19
along with GCC; see the file COPYING3.  If not see
20
<http://www.gnu.org/licenses/>.  */
21
 
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "diagnostic-core.h"
28
#include "rtl.h"
29
#include "tm_p.h"
30
#include "hard-reg-set.h"
31
#include "regs.h"
32
#include "function.h"
33
#include "flags.h"
34
#include "insn-config.h"
35
#include "insn-attr.h"
36
#include "except.h"
37
#include "recog.h"
38
#include "sched-int.h"
39
#include "target.h"
40
#include "cfglayout.h"
41
#include "cfgloop.h"
42
#include "cfghooks.h"
43
#include "expr.h"
44
#include "params.h"
45
#include "gcov-io.h"
46
#include "ddg.h"
47
#include "timevar.h"
48
#include "tree-pass.h"
49
#include "dbgcnt.h"
50
#include "df.h"
51
 
52
#ifdef INSN_SCHEDULING
53
 
54
/* This file contains the implementation of the Swing Modulo Scheduler,
55
   described in the following references:
56
   [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
57
       Lifetime--sensitive modulo scheduling in a production environment.
58
       IEEE Trans. on Comps., 50(3), March 2001
59
   [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
60
       Swing Modulo Scheduling: A Lifetime Sensitive Approach.
61
       PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
62
 
63
   The basic structure is:
64
   1. Build a data-dependence graph (DDG) for each loop.
65
   2. Use the DDG to order the insns of a loop (not in topological order
66
      necessarily, but rather) trying to place each insn after all its
67
      predecessors _or_ after all its successors.
68
   3. Compute MII: a lower bound on the number of cycles to schedule the loop.
69
   4. Use the ordering to perform list-scheduling of the loop:
70
      1. Set II = MII.  We will try to schedule the loop within II cycles.
71
      2. Try to schedule the insns one by one according to the ordering.
72
         For each insn compute an interval of cycles by considering already-
73
         scheduled preds and succs (and associated latencies); try to place
74
         the insn in the cycles of this window checking for potential
75
         resource conflicts (using the DFA interface).
76
         Note: this is different from the cycle-scheduling of schedule_insns;
77
         here the insns are not scheduled monotonically top-down (nor bottom-
78
         up).
79
      3. If failed in scheduling all insns - bump II++ and try again, unless
80
         II reaches an upper bound MaxII, in which case report failure.
81
   5. If we succeeded in scheduling the loop within II cycles, we now
82
      generate prolog and epilog, decrease the counter of the loop, and
83
      perform modulo variable expansion for live ranges that span more than
84
      II cycles (i.e. use register copies to prevent a def from overwriting
85
      itself before reaching the use).
86
 
87
    SMS works with countable loops (1) whose control part can be easily
88
    decoupled from the rest of the loop and (2) whose loop count can
89
    be easily adjusted.  This is because we peel a constant number of
90
    iterations into a prologue and epilogue for which we want to avoid
91
    emitting the control part, and a kernel which is to iterate that
92
    constant number of iterations less than the original loop.  So the
93
    control part should be a set of insns clearly identified and having
94
    its own iv, not otherwise used in the loop (at-least for now), which
95
    initializes a register before the loop to the number of iterations.
96
    Currently SMS relies on the do-loop pattern to recognize such loops,
97
    where (1) the control part comprises of all insns defining and/or
98
    using a certain 'count' register and (2) the loop count can be
99
    adjusted by modifying this register prior to the loop.
100
    TODO: Rely on cfgloop analysis instead.  */
101
 
102
/* This page defines partial-schedule structures and functions for
103
   modulo scheduling.  */
104
 
105
typedef struct partial_schedule *partial_schedule_ptr;
106
typedef struct ps_insn *ps_insn_ptr;
107
 
108
/* The minimum (absolute) cycle that a node of ps was scheduled in.  */
109
#define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
110
 
111
/* The maximum (absolute) cycle that a node of ps was scheduled in.  */
112
#define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
113
 
114
/* Perform signed modulo, always returning a non-negative value.  */
115
#define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
116
 
117
/* The number of different iterations the nodes in ps span, assuming
118
   the stage boundaries are placed efficiently.  */
119
#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
120
                         + 1 + ii - 1) / ii)
121
/* The stage count of ps.  */
122
#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
123
 
124
/* A single instruction in the partial schedule.  */
125
struct ps_insn
126
{
127
  /* Identifies the instruction to be scheduled.  Values smaller than
128
     the ddg's num_nodes refer directly to ddg nodes.  A value of
129
     X - num_nodes refers to register move X.  */
130
  int id;
131
 
132
  /* The (absolute) cycle in which the PS instruction is scheduled.
133
     Same as SCHED_TIME (node).  */
134
  int cycle;
135
 
136
  /* The next/prev PS_INSN in the same row.  */
137
  ps_insn_ptr next_in_row,
138
              prev_in_row;
139
 
140
};
141
 
142
/* Information about a register move that has been added to a partial
143
   schedule.  */
144
struct ps_reg_move_info
145
{
146
  /* The source of the move is defined by the ps_insn with id DEF.
147
     The destination is used by the ps_insns with the ids in USES.  */
148
  int def;
149
  sbitmap uses;
150
 
151
  /* The original form of USES' instructions used OLD_REG, but they
152
     should now use NEW_REG.  */
153
  rtx old_reg;
154
  rtx new_reg;
155
 
156
  /* The number of consecutive stages that the move occupies.  */
157
  int num_consecutive_stages;
158
 
159
  /* An instruction that sets NEW_REG to the correct value.  The first
160
     move associated with DEF will have an rhs of OLD_REG; later moves
161
     use the result of the previous move.  */
162
  rtx insn;
163
};
164
 
165
typedef struct ps_reg_move_info ps_reg_move_info;
166
DEF_VEC_O (ps_reg_move_info);
167
DEF_VEC_ALLOC_O (ps_reg_move_info, heap);
168
 
169
/* Holds the partial schedule as an array of II rows.  Each entry of the
170
   array points to a linked list of PS_INSNs, which represents the
171
   instructions that are scheduled for that row.  */
172
struct partial_schedule
173
{
174
  int ii;       /* Number of rows in the partial schedule.  */
175
  int history;  /* Threshold for conflict checking using DFA.  */
176
 
177
  /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
178
  ps_insn_ptr *rows;
179
 
180
  /* All the moves added for this partial schedule.  Index X has
181
     a ps_insn id of X + g->num_nodes.  */
182
  VEC (ps_reg_move_info, heap) *reg_moves;
183
 
184
  /*  rows_length[i] holds the number of instructions in the row.
185
      It is used only (as an optimization) to back off quickly from
186
      trying to schedule a node in a full row; that is, to avoid running
187
      through futile DFA state transitions.  */
188
  int *rows_length;
189
 
190
  /* The earliest absolute cycle of an insn in the partial schedule.  */
191
  int min_cycle;
192
 
193
  /* The latest absolute cycle of an insn in the partial schedule.  */
194
  int max_cycle;
195
 
196
  ddg_ptr g;    /* The DDG of the insns in the partial schedule.  */
197
 
198
  int stage_count;  /* The stage count of the partial schedule.  */
199
};
200
 
201
 
202
static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
203
static void free_partial_schedule (partial_schedule_ptr);
204
static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
205
void print_partial_schedule (partial_schedule_ptr, FILE *);
206
static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
207
static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
208
                                                int, int, sbitmap, sbitmap);
209
static void rotate_partial_schedule (partial_schedule_ptr, int);
210
void set_row_column_for_ps (partial_schedule_ptr);
211
static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
212
static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
213
 
214
 
215
/* This page defines constants and structures for the modulo scheduling
216
   driver.  */
217
 
218
static int sms_order_nodes (ddg_ptr, int, int *, int *);
219
static void set_node_sched_params (ddg_ptr);
220
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
221
static void permute_partial_schedule (partial_schedule_ptr, rtx);
222
static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
223
                                    rtx, rtx);
224
static int calculate_stage_count (partial_schedule_ptr, int);
225
static void calculate_must_precede_follow (ddg_node_ptr, int, int,
226
                                           int, int, sbitmap, sbitmap, sbitmap);
227
static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
228
                             sbitmap, int, int *, int *, int *);
229
static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
230
                                          sbitmap, int *, sbitmap, sbitmap);
231
static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
232
 
233
#define NODE_ASAP(node) ((node)->aux.count)
234
 
235
#define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x)
236
#define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
237
#define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
238
#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
239
#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
240
 
241
/* The scheduling parameters held for each node.  */
242
typedef struct node_sched_params
243
{
244
  int time;     /* The absolute scheduling cycle.  */
245
 
246
  int row;    /* Holds time % ii.  */
247
  int stage;  /* Holds time / ii.  */
248
 
249
  /* The column of a node inside the ps.  If nodes u, v are on the same row,
250
     u will precede v if column (u) < column (v).  */
251
  int column;
252
} *node_sched_params_ptr;
253
 
254
typedef struct node_sched_params node_sched_params;
255
DEF_VEC_O (node_sched_params);
256
DEF_VEC_ALLOC_O (node_sched_params, heap);
257
 
258
/* The following three functions are copied from the current scheduler
259
   code in order to use sched_analyze() for computing the dependencies.
260
   They are used when initializing the sched_info structure.  */
261
static const char *
262
sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
263
{
264
  static char tmp[80];
265
 
266
  sprintf (tmp, "i%4d", INSN_UID (insn));
267
  return tmp;
268
}
269
 
270
static void
271
compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
272
                               regset used ATTRIBUTE_UNUSED)
273
{
274
}
275
 
276
static struct common_sched_info_def sms_common_sched_info;
277
 
278
static struct sched_deps_info_def sms_sched_deps_info =
279
  {
280
    compute_jump_reg_dependencies,
281
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
282
    NULL,
283
    0, 0, 0
284
  };
285
 
286
static struct haifa_sched_info sms_sched_info =
287
{
288
  NULL,
289
  NULL,
290
  NULL,
291
  NULL,
292
  NULL,
293
  sms_print_insn,
294
  NULL,
295
  NULL, /* insn_finishes_block_p */
296
  NULL, NULL,
297
  NULL, NULL,
298
  0, 0,
299
 
300
  NULL, NULL, NULL, NULL,
301
  NULL, NULL,
302
 
303
};
304
 
305
/* Partial schedule instruction ID in PS is a register move.  Return
306
   information about it.  */
307
static struct ps_reg_move_info *
308
ps_reg_move (partial_schedule_ptr ps, int id)
309
{
310
  gcc_checking_assert (id >= ps->g->num_nodes);
311
  return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes);
312
}
313
 
314
/* Return the rtl instruction that is being scheduled by partial schedule
315
   instruction ID, which belongs to schedule PS.  */
316
static rtx
317
ps_rtl_insn (partial_schedule_ptr ps, int id)
318
{
319
  if (id < ps->g->num_nodes)
320
    return ps->g->nodes[id].insn;
321
  else
322
    return ps_reg_move (ps, id)->insn;
323
}
324
 
325
/* Partial schedule instruction ID, which belongs to PS, occured in
326
   the original (unscheduled) loop.  Return the first instruction
327
   in the loop that was associated with ps_rtl_insn (PS, ID).
328
   If the instruction had some notes before it, this is the first
329
   of those notes.  */
330
static rtx
331
ps_first_note (partial_schedule_ptr ps, int id)
332
{
333
  gcc_assert (id < ps->g->num_nodes);
334
  return ps->g->nodes[id].first_note;
335
}
336
 
337
/* Return the number of consecutive stages that are occupied by
338
   partial schedule instruction ID in PS.  */
339
static int
340
ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
341
{
342
  if (id < ps->g->num_nodes)
343
    return 1;
344
  else
345
    return ps_reg_move (ps, id)->num_consecutive_stages;
346
}
347
 
348
/* Given HEAD and TAIL which are the first and last insns in a loop;
349
   return the register which controls the loop.  Return zero if it has
350
   more than one occurrence in the loop besides the control part or the
351
   do-loop pattern is not of the form we expect.  */
352
static rtx
353
doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
354
{
355
#ifdef HAVE_doloop_end
356
  rtx reg, condition, insn, first_insn_not_to_check;
357
 
358
  if (!JUMP_P (tail))
359
    return NULL_RTX;
360
 
361
  /* TODO: Free SMS's dependence on doloop_condition_get.  */
362
  condition = doloop_condition_get (tail);
363
  if (! condition)
364
    return NULL_RTX;
365
 
366
  if (REG_P (XEXP (condition, 0)))
367
    reg = XEXP (condition, 0);
368
  else if (GET_CODE (XEXP (condition, 0)) == PLUS
369
           && REG_P (XEXP (XEXP (condition, 0), 0)))
370
    reg = XEXP (XEXP (condition, 0), 0);
371
  else
372
    gcc_unreachable ();
373
 
374
  /* Check that the COUNT_REG has no other occurrences in the loop
375
     until the decrement.  We assume the control part consists of
376
     either a single (parallel) branch-on-count or a (non-parallel)
377
     branch immediately preceded by a single (decrement) insn.  */
378
  first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
379
                             : prev_nondebug_insn (tail));
380
 
381
  for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
382
    if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
383
      {
384
        if (dump_file)
385
        {
386
          fprintf (dump_file, "SMS count_reg found ");
387
          print_rtl_single (dump_file, reg);
388
          fprintf (dump_file, " outside control in insn:\n");
389
          print_rtl_single (dump_file, insn);
390
        }
391
 
392
        return NULL_RTX;
393
      }
394
 
395
  return reg;
396
#else
397
  return NULL_RTX;
398
#endif
399
}
400
 
401
/* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
402
   that the number of iterations is a compile-time constant.  If so,
403
   return the rtx that sets COUNT_REG to a constant, and set COUNT to
404
   this constant.  Otherwise return 0.  */
405
static rtx
406
const_iteration_count (rtx count_reg, basic_block pre_header,
407
                       HOST_WIDEST_INT * count)
408
{
409
  rtx insn;
410
  rtx head, tail;
411
 
412
  if (! pre_header)
413
    return NULL_RTX;
414
 
415
  get_ebb_head_tail (pre_header, pre_header, &head, &tail);
416
 
417
  for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
418
    if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
419
        rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
420
      {
421
        rtx pat = single_set (insn);
422
 
423
        if (CONST_INT_P (SET_SRC (pat)))
424
          {
425
            *count = INTVAL (SET_SRC (pat));
426
            return insn;
427
          }
428
 
429
        return NULL_RTX;
430
      }
431
 
432
  return NULL_RTX;
433
}
434
 
435
/* A very simple resource-based lower bound on the initiation interval.
436
   ??? Improve the accuracy of this bound by considering the
437
   utilization of various units.  */
438
static int
439
res_MII (ddg_ptr g)
440
{
441
  if (targetm.sched.sms_res_mii)
442
    return targetm.sched.sms_res_mii (g);
443
 
444
  return ((g->num_nodes - g->num_debug) / issue_rate);
445
}
446
 
447
 
448
/* A vector that contains the sched data for each ps_insn.  */
449
static VEC (node_sched_params, heap) *node_sched_param_vec;
450
 
451
/* Allocate sched_params for each node and initialize it.  */
452
static void
453
set_node_sched_params (ddg_ptr g)
454
{
455
  VEC_truncate (node_sched_params, node_sched_param_vec, 0);
456
  VEC_safe_grow_cleared (node_sched_params, heap,
457
                         node_sched_param_vec, g->num_nodes);
458
}
459
 
460
/* Make sure that node_sched_param_vec has an entry for every move in PS.  */
461
static void
462
extend_node_sched_params (partial_schedule_ptr ps)
463
{
464
  VEC_safe_grow_cleared (node_sched_params, heap, node_sched_param_vec,
465
                         ps->g->num_nodes + VEC_length (ps_reg_move_info,
466
                                                        ps->reg_moves));
467
}
468
 
469
/* Update the sched_params (time, row and stage) for node U using the II,
470
   the CYCLE of U and MIN_CYCLE.
471
   We're not simply taking the following
472
   SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
473
   because the stages may not be aligned on cycle 0.  */
474
static void
475
update_node_sched_params (int u, int ii, int cycle, int min_cycle)
476
{
477
  int sc_until_cycle_zero;
478
  int stage;
479
 
480
  SCHED_TIME (u) = cycle;
481
  SCHED_ROW (u) = SMODULO (cycle, ii);
482
 
483
  /* The calculation of stage count is done adding the number
484
     of stages before cycle zero and after cycle zero.  */
485
  sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
486
 
487
  if (SCHED_TIME (u) < 0)
488
    {
489
      stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
490
      SCHED_STAGE (u) = sc_until_cycle_zero - stage;
491
    }
492
  else
493
    {
494
      stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
495
      SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
496
    }
497
}
498
 
499
static void
500
print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
501
{
502
  int i;
503
 
504
  if (! file)
505
    return;
506
  for (i = 0; i < num_nodes; i++)
507
    {
508
      node_sched_params_ptr nsp = SCHED_PARAMS (i);
509
 
510
      fprintf (file, "Node = %d; INSN = %d\n", i,
511
               INSN_UID (ps_rtl_insn (ps, i)));
512
      fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
513
      fprintf (file, " time = %d:\n", nsp->time);
514
      fprintf (file, " stage = %d:\n", nsp->stage);
515
    }
516
}
517
 
518
/* Set SCHED_COLUMN for each instruction in row ROW of PS.  */
519
static void
520
set_columns_for_row (partial_schedule_ptr ps, int row)
521
{
522
  ps_insn_ptr cur_insn;
523
  int column;
524
 
525
  column = 0;
526
  for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
527
    SCHED_COLUMN (cur_insn->id) = column++;
528
}
529
 
530
/* Set SCHED_COLUMN for each instruction in PS.  */
531
static void
532
set_columns_for_ps (partial_schedule_ptr ps)
533
{
534
  int row;
535
 
536
  for (row = 0; row < ps->ii; row++)
537
    set_columns_for_row (ps, row);
538
}
539
 
540
/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
541
   Its single predecessor has already been scheduled, as has its
542
   ddg node successors.  (The move may have also another move as its
543
   successor, in which case that successor will be scheduled later.)
544
 
545
   The move is part of a chain that satisfies register dependencies
546
   between a producing ddg node and various consuming ddg nodes.
547
   If some of these dependencies have a distance of 1 (meaning that
548
   the use is upward-exposed) then DISTANCE1_USES is nonnull and
549
   contains the set of uses with distance-1 dependencies.
550
   DISTANCE1_USES is null otherwise.
551
 
552
   MUST_FOLLOW is a scratch bitmap that is big enough to hold
553
   all current ps_insn ids.
554
 
555
   Return true on success.  */
556
static bool
557
schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
558
                   sbitmap distance1_uses, sbitmap must_follow)
559
{
560
  unsigned int u;
561
  int this_time, this_distance, this_start, this_end, this_latency;
562
  int start, end, c, ii;
563
  sbitmap_iterator sbi;
564
  ps_reg_move_info *move;
565
  rtx this_insn;
566
  ps_insn_ptr psi;
567
 
568
  move = ps_reg_move (ps, i_reg_move);
569
  ii = ps->ii;
570
  if (dump_file)
571
    {
572
      fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
573
               ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
574
               PS_MIN_CYCLE (ps));
575
      print_rtl_single (dump_file, move->insn);
576
      fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
577
      fprintf (dump_file, "=========== =========== =====\n");
578
    }
579
 
580
  start = INT_MIN;
581
  end = INT_MAX;
582
 
583
  /* For dependencies of distance 1 between a producer ddg node A
584
     and consumer ddg node B, we have a chain of dependencies:
585
 
586
        A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
587
 
588
     where Mi is the ith move.  For dependencies of distance 0 between
589
     a producer ddg node A and consumer ddg node C, we have a chain of
590
     dependencies:
591
 
592
        A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
593
 
594
     where Mi' occupies the same position as Mi but occurs a stage later.
595
     We can only schedule each move once, so if we have both types of
596
     chain, we model the second as:
597
 
598
        A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
599
 
600
     First handle the dependencies between the previously-scheduled
601
     predecessor and the move.  */
602
  this_insn = ps_rtl_insn (ps, move->def);
603
  this_latency = insn_latency (this_insn, move->insn);
604
  this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
605
  this_time = SCHED_TIME (move->def) - this_distance * ii;
606
  this_start = this_time + this_latency;
607
  this_end = this_time + ii;
608
  if (dump_file)
609
    fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
610
             this_start, this_end, SCHED_TIME (move->def),
611
             INSN_UID (this_insn), this_latency, this_distance,
612
             INSN_UID (move->insn));
613
 
614
  if (start < this_start)
615
    start = this_start;
616
  if (end > this_end)
617
    end = this_end;
618
 
619
  /* Handle the dependencies between the move and previously-scheduled
620
     successors.  */
621
  EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, u, sbi)
622
    {
623
      this_insn = ps_rtl_insn (ps, u);
624
      this_latency = insn_latency (move->insn, this_insn);
625
      if (distance1_uses && !TEST_BIT (distance1_uses, u))
626
        this_distance = -1;
627
      else
628
        this_distance = 0;
629
      this_time = SCHED_TIME (u) + this_distance * ii;
630
      this_start = this_time - ii;
631
      this_end = this_time - this_latency;
632
      if (dump_file)
633
        fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
634
                 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
635
                 this_latency, this_distance, INSN_UID (this_insn));
636
 
637
      if (start < this_start)
638
        start = this_start;
639
      if (end > this_end)
640
        end = this_end;
641
    }
642
 
643
  if (dump_file)
644
    {
645
      fprintf (dump_file, "----------- ----------- -----\n");
646
      fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
647
    }
648
 
649
  sbitmap_zero (must_follow);
650
  SET_BIT (must_follow, move->def);
651
 
652
  start = MAX (start, end - (ii - 1));
653
  for (c = end; c >= start; c--)
654
    {
655
      psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
656
                                         move->uses, must_follow);
657
      if (psi)
658
        {
659
          update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
660
          if (dump_file)
661
            fprintf (dump_file, "\nScheduled register move INSN %d at"
662
                     " time %d, row %d\n\n", INSN_UID (move->insn), c,
663
                     SCHED_ROW (i_reg_move));
664
          return true;
665
        }
666
    }
667
 
668
  if (dump_file)
669
    fprintf (dump_file, "\nNo available slot\n\n");
670
 
671
  return false;
672
}
673
 
674
/*
675
   Breaking intra-loop register anti-dependences:
676
   Each intra-loop register anti-dependence implies a cross-iteration true
677
   dependence of distance 1. Therefore, we can remove such false dependencies
678
   and figure out if the partial schedule broke them by checking if (for a
679
   true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
680
   if so generate a register move.   The number of such moves is equal to:
681
              SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
682
   nreg_moves = ----------------------------------- + 1 - {   dependence.
683
                            ii                          { 1 if not.
684
*/
685
static bool
686
schedule_reg_moves (partial_schedule_ptr ps)
687
{
688
  ddg_ptr g = ps->g;
689
  int ii = ps->ii;
690
  int i;
691
 
692
  for (i = 0; i < g->num_nodes; i++)
693
    {
694
      ddg_node_ptr u = &g->nodes[i];
695
      ddg_edge_ptr e;
696
      int nreg_moves = 0, i_reg_move;
697
      rtx prev_reg, old_reg;
698
      int first_move;
699
      int distances[2];
700
      sbitmap must_follow;
701
      sbitmap distance1_uses;
702
      rtx set = single_set (u->insn);
703
 
704
      /* Skip instructions that do not set a register.  */
705
      if ((set && !REG_P (SET_DEST (set))))
706
        continue;
707
 
708
      /* Compute the number of reg_moves needed for u, by looking at life
709
         ranges started at u (excluding self-loops).  */
710
      distances[0] = distances[1] = false;
711
      for (e = u->out; e; e = e->next_out)
712
        if (e->type == TRUE_DEP && e->dest != e->src)
713
          {
714
            int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
715
                                - SCHED_TIME (e->src->cuid)) / ii;
716
 
717
            if (e->distance == 1)
718
              nreg_moves4e = (SCHED_TIME (e->dest->cuid)
719
                              - SCHED_TIME (e->src->cuid) + ii) / ii;
720
 
721
            /* If dest precedes src in the schedule of the kernel, then dest
722
               will read before src writes and we can save one reg_copy.  */
723
            if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
724
                && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
725
              nreg_moves4e--;
726
 
727
            if (nreg_moves4e >= 1)
728
              {
729
                /* !single_set instructions are not supported yet and
730
                   thus we do not except to encounter them in the loop
731
                   except from the doloop part.  For the latter case
732
                   we assume no regmoves are generated as the doloop
733
                   instructions are tied to the branch with an edge.  */
734
                gcc_assert (set);
735
                /* If the instruction contains auto-inc register then
736
                   validate that the regmov is being generated for the
737
                   target regsiter rather then the inc'ed register.     */
738
                gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
739
              }
740
 
741
            if (nreg_moves4e)
742
              {
743
                gcc_assert (e->distance < 2);
744
                distances[e->distance] = true;
745
              }
746
            nreg_moves = MAX (nreg_moves, nreg_moves4e);
747
          }
748
 
749
      if (nreg_moves == 0)
750
        continue;
751
 
752
      /* Create NREG_MOVES register moves.  */
753
      first_move = VEC_length (ps_reg_move_info, ps->reg_moves);
754
      VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves,
755
                             first_move + nreg_moves);
756
      extend_node_sched_params (ps);
757
 
758
      /* Record the moves associated with this node.  */
759
      first_move += ps->g->num_nodes;
760
 
761
      /* Generate each move.  */
762
      old_reg = prev_reg = SET_DEST (single_set (u->insn));
763
      for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
764
        {
765
          ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
766
 
767
          move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
768
          move->uses = sbitmap_alloc (first_move + nreg_moves);
769
          move->old_reg = old_reg;
770
          move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
771
          move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
772
          move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
773
          sbitmap_zero (move->uses);
774
 
775
          prev_reg = move->new_reg;
776
        }
777
 
778
      distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
779
 
780
      /* Every use of the register defined by node may require a different
781
         copy of this register, depending on the time the use is scheduled.
782
         Record which uses require which move results.  */
783
      for (e = u->out; e; e = e->next_out)
784
        if (e->type == TRUE_DEP && e->dest != e->src)
785
          {
786
            int dest_copy = (SCHED_TIME (e->dest->cuid)
787
                             - SCHED_TIME (e->src->cuid)) / ii;
788
 
789
            if (e->distance == 1)
790
              dest_copy = (SCHED_TIME (e->dest->cuid)
791
                           - SCHED_TIME (e->src->cuid) + ii) / ii;
792
 
793
            if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
794
                && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
795
              dest_copy--;
796
 
797
            if (dest_copy)
798
              {
799
                ps_reg_move_info *move;
800
 
801
                move = ps_reg_move (ps, first_move + dest_copy - 1);
802
                SET_BIT (move->uses, e->dest->cuid);
803
                if (e->distance == 1)
804
                  SET_BIT (distance1_uses, e->dest->cuid);
805
              }
806
          }
807
 
808
      must_follow = sbitmap_alloc (first_move + nreg_moves);
809
      for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
810
        if (!schedule_reg_move (ps, first_move + i_reg_move,
811
                                distance1_uses, must_follow))
812
          break;
813
      sbitmap_free (must_follow);
814
      if (distance1_uses)
815
        sbitmap_free (distance1_uses);
816
      if (i_reg_move < nreg_moves)
817
        return false;
818
    }
819
  return true;
820
}
821
 
822
/* Emit the moves associatied with PS.  Apply the substitutions
823
   associated with them.  */
824
static void
825
apply_reg_moves (partial_schedule_ptr ps)
826
{
827
  ps_reg_move_info *move;
828
  int i;
829
 
830
  FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
831
    {
832
      unsigned int i_use;
833
      sbitmap_iterator sbi;
834
 
835
      EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi)
836
        {
837
          replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
838
          df_insn_rescan (ps->g->nodes[i_use].insn);
839
        }
840
    }
841
}
842
 
843
/* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
844
   SCHED_ROW and SCHED_STAGE.  Instruction scheduled on cycle AMOUNT
845
   will move to cycle zero.  */
846
static void
847
reset_sched_times (partial_schedule_ptr ps, int amount)
848
{
849
  int row;
850
  int ii = ps->ii;
851
  ps_insn_ptr crr_insn;
852
 
853
  for (row = 0; row < ii; row++)
854
    for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
855
      {
856
        int u = crr_insn->id;
857
        int normalized_time = SCHED_TIME (u) - amount;
858
        int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
859
 
860
        if (dump_file)
861
          {
862
            /* Print the scheduling times after the rotation.  */
863
            rtx insn = ps_rtl_insn (ps, u);
864
 
865
            fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
866
                     "crr_insn->cycle=%d, min_cycle=%d", u,
867
                     INSN_UID (insn), normalized_time, new_min_cycle);
868
            if (JUMP_P (insn))
869
              fprintf (dump_file, " (branch)");
870
            fprintf (dump_file, "\n");
871
          }
872
 
873
        gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
874
        gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
875
 
876
        crr_insn->cycle = normalized_time;
877
        update_node_sched_params (u, ii, normalized_time, new_min_cycle);
878
      }
879
}
880
 
881
/* Permute the insns according to their order in PS, from row 0 to
882
   row ii-1, and position them right before LAST.  This schedules
883
   the insns of the loop kernel.  */
884
static void
885
permute_partial_schedule (partial_schedule_ptr ps, rtx last)
886
{
887
  int ii = ps->ii;
888
  int row;
889
  ps_insn_ptr ps_ij;
890
 
891
  for (row = 0; row < ii ; row++)
892
    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
893
      {
894
        rtx insn = ps_rtl_insn (ps, ps_ij->id);
895
 
896
        if (PREV_INSN (last) != insn)
897
          {
898
            if (ps_ij->id < ps->g->num_nodes)
899
              reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
900
                                  PREV_INSN (last));
901
            else
902
              add_insn_before (insn, last, NULL);
903
          }
904
      }
905
}
906
 
907
/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
908
   respectively only if cycle C falls on the border of the scheduling
909
   window boundaries marked by START and END cycles.  STEP is the
910
   direction of the window.  */
911
static inline void
912
set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
913
                         sbitmap *tmp_precede, sbitmap must_precede, int c,
914
                         int start, int end, int step)
915
{
916
  *tmp_precede = NULL;
917
  *tmp_follow = NULL;
918
 
919
  if (c == start)
920
    {
921
      if (step == 1)
922
        *tmp_precede = must_precede;
923
      else                      /* step == -1.  */
924
        *tmp_follow = must_follow;
925
    }
926
  if (c == end - step)
927
    {
928
      if (step == 1)
929
        *tmp_follow = must_follow;
930
      else                      /* step == -1.  */
931
        *tmp_precede = must_precede;
932
    }
933
 
934
}
935
 
936
/* Return True if the branch can be moved to row ii-1 while
937
   normalizing the partial schedule PS to start from cycle zero and thus
938
   optimize the SC.  Otherwise return False.  */
939
static bool
940
optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
941
{
942
  int amount = PS_MIN_CYCLE (ps);
943
  sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
944
  int start, end, step;
945
  int ii = ps->ii;
946
  bool ok = false;
947
  int stage_count, stage_count_curr;
948
 
949
  /* Compare the SC after normalization and SC after bringing the branch
950
     to row ii-1.  If they are equal just bail out.  */
951
  stage_count = calculate_stage_count (ps, amount);
952
  stage_count_curr =
953
    calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
954
 
955
  if (stage_count == stage_count_curr)
956
    {
957
      if (dump_file)
958
        fprintf (dump_file, "SMS SC already optimized.\n");
959
 
960
      ok = false;
961
      goto clear;
962
    }
963
 
964
  if (dump_file)
965
    {
966
      fprintf (dump_file, "SMS Trying to optimize branch location\n");
967
      fprintf (dump_file, "SMS partial schedule before trial:\n");
968
      print_partial_schedule (ps, dump_file);
969
    }
970
 
971
  /* First, normalize the partial scheduling.  */
972
  reset_sched_times (ps, amount);
973
  rotate_partial_schedule (ps, amount);
974
  if (dump_file)
975
    {
976
      fprintf (dump_file,
977
               "SMS partial schedule after normalization (ii, %d, SC %d):\n",
978
               ii, stage_count);
979
      print_partial_schedule (ps, dump_file);
980
    }
981
 
982
  if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
983
    {
984
      ok = true;
985
      goto clear;
986
    }
987
 
988
  sbitmap_ones (sched_nodes);
989
 
990
  /* Calculate the new placement of the branch.  It should be in row
991
     ii-1 and fall into it's scheduling window.  */
992
  if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
993
                        &step, &end) == 0)
994
    {
995
      bool success;
996
      ps_insn_ptr next_ps_i;
997
      int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
998
      int row = SMODULO (branch_cycle, ps->ii);
999
      int num_splits = 0;
1000
      sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
1001
      int c;
1002
 
1003
      if (dump_file)
1004
        fprintf (dump_file, "\nTrying to schedule node %d "
1005
                 "INSN = %d  in (%d .. %d) step %d\n",
1006
                 g->closing_branch->cuid,
1007
                 (INSN_UID (g->closing_branch->insn)), start, end, step);
1008
 
1009
      gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
1010
      if (step == 1)
1011
        {
1012
          c = start + ii - SMODULO (start, ii) - 1;
1013
          gcc_assert (c >= start);
1014
          if (c >= end)
1015
            {
1016
              ok = false;
1017
              if (dump_file)
1018
                fprintf (dump_file,
1019
                         "SMS failed to schedule branch at cycle: %d\n", c);
1020
              goto clear;
1021
            }
1022
        }
1023
      else
1024
        {
1025
          c = start - SMODULO (start, ii) - 1;
1026
          gcc_assert (c <= start);
1027
 
1028
          if (c <= end)
1029
            {
1030
              if (dump_file)
1031
                fprintf (dump_file,
1032
                         "SMS failed to schedule branch at cycle: %d\n", c);
1033
              ok = false;
1034
              goto clear;
1035
            }
1036
        }
1037
 
1038
      must_precede = sbitmap_alloc (g->num_nodes);
1039
      must_follow = sbitmap_alloc (g->num_nodes);
1040
 
1041
      /* Try to schedule the branch is it's new cycle.  */
1042
      calculate_must_precede_follow (g->closing_branch, start, end,
1043
                                     step, ii, sched_nodes,
1044
                                     must_precede, must_follow);
1045
 
1046
      set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1047
                               must_precede, c, start, end, step);
1048
 
1049
      /* Find the element in the partial schedule related to the closing
1050
         branch so we can remove it from it's current cycle.  */
1051
      for (next_ps_i = ps->rows[row];
1052
           next_ps_i; next_ps_i = next_ps_i->next_in_row)
1053
        if (next_ps_i->id == g->closing_branch->cuid)
1054
          break;
1055
 
1056
      remove_node_from_ps (ps, next_ps_i);
1057
      success =
1058
        try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1059
                                      sched_nodes, &num_splits,
1060
                                      tmp_precede, tmp_follow);
1061
      gcc_assert (num_splits == 0);
1062
      if (!success)
1063
        {
1064
          if (dump_file)
1065
            fprintf (dump_file,
1066
                     "SMS failed to schedule branch at cycle: %d, "
1067
                     "bringing it back to cycle %d\n", c, branch_cycle);
1068
 
1069
          /* The branch was failed to be placed in row ii - 1.
1070
             Put it back in it's original place in the partial
1071
             schedualing.  */
1072
          set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1073
                                   must_precede, branch_cycle, start, end,
1074
                                   step);
1075
          success =
1076
            try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1077
                                          branch_cycle, sched_nodes,
1078
                                          &num_splits, tmp_precede,
1079
                                          tmp_follow);
1080
          gcc_assert (success && (num_splits == 0));
1081
          ok = false;
1082
        }
1083
      else
1084
        {
1085
          /* The branch is placed in row ii - 1.  */
1086
          if (dump_file)
1087
            fprintf (dump_file,
1088
                     "SMS success in moving branch to cycle %d\n", c);
1089
 
1090
          update_node_sched_params (g->closing_branch->cuid, ii, c,
1091
                                    PS_MIN_CYCLE (ps));
1092
          ok = true;
1093
        }
1094
 
1095
      free (must_precede);
1096
      free (must_follow);
1097
    }
1098
 
1099
clear:
1100
  free (sched_nodes);
1101
  return ok;
1102
}
1103
 
1104
static void
1105
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1106
                           int to_stage, rtx count_reg)
1107
{
1108
  int row;
1109
  ps_insn_ptr ps_ij;
1110
 
1111
  for (row = 0; row < ps->ii; row++)
1112
    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1113
      {
1114
        int u = ps_ij->id;
1115
        int first_u, last_u;
1116
        rtx u_insn;
1117
 
1118
        /* Do not duplicate any insn which refers to count_reg as it
1119
           belongs to the control part.
1120
           The closing branch is scheduled as well and thus should
1121
           be ignored.
1122
           TODO: This should be done by analyzing the control part of
1123
           the loop.  */
1124
        u_insn = ps_rtl_insn (ps, u);
1125
        if (reg_mentioned_p (count_reg, u_insn)
1126
            || JUMP_P (u_insn))
1127
          continue;
1128
 
1129
        first_u = SCHED_STAGE (u);
1130
        last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1131
        if (from_stage <= last_u && to_stage >= first_u)
1132
          {
1133
            if (u < ps->g->num_nodes)
1134
              duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1135
            else
1136
              emit_insn (copy_rtx (PATTERN (u_insn)));
1137
          }
1138
      }
1139
}
1140
 
1141
 
1142
/* Generate the instructions (including reg_moves) for prolog & epilog.  */
1143
static void
1144
generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1145
                        rtx count_reg, rtx count_init)
1146
{
1147
  int i;
1148
  int last_stage = PS_STAGE_COUNT (ps) - 1;
1149
  edge e;
1150
 
1151
  /* Generate the prolog, inserting its insns on the loop-entry edge.  */
1152
  start_sequence ();
1153
 
1154
  if (!count_init)
1155
    {
1156
      /* Generate instructions at the beginning of the prolog to
1157
         adjust the loop count by STAGE_COUNT.  If loop count is constant
1158
         (count_init), this constant is adjusted by STAGE_COUNT in
1159
         generate_prolog_epilog function.  */
1160
      rtx sub_reg = NULL_RTX;
1161
 
1162
      sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
1163
                                     count_reg, GEN_INT (last_stage),
1164
                                     count_reg, 1, OPTAB_DIRECT);
1165
      gcc_assert (REG_P (sub_reg));
1166
      if (REGNO (sub_reg) != REGNO (count_reg))
1167
        emit_move_insn (count_reg, sub_reg);
1168
    }
1169
 
1170
  for (i = 0; i < last_stage; i++)
1171
    duplicate_insns_of_cycles (ps, 0, i, count_reg);
1172
 
1173
  /* Put the prolog on the entry edge.  */
1174
  e = loop_preheader_edge (loop);
1175
  split_edge_and_insert (e, get_insns ());
1176
  if (!flag_resched_modulo_sched)
1177
    e->dest->flags |= BB_DISABLE_SCHEDULE;
1178
 
1179
  end_sequence ();
1180
 
1181
  /* Generate the epilog, inserting its insns on the loop-exit edge.  */
1182
  start_sequence ();
1183
 
1184
  for (i = 0; i < last_stage; i++)
1185
    duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1186
 
1187
  /* Put the epilogue on the exit edge.  */
1188
  gcc_assert (single_exit (loop));
1189
  e = single_exit (loop);
1190
  split_edge_and_insert (e, get_insns ());
1191
  if (!flag_resched_modulo_sched)
1192
    e->dest->flags |= BB_DISABLE_SCHEDULE;
1193
 
1194
  end_sequence ();
1195
}
1196
 
1197
/* Mark LOOP as software pipelined so the later
1198
   scheduling passes don't touch it.  */
1199
static void
1200
mark_loop_unsched (struct loop *loop)
1201
{
1202
  unsigned i;
1203
  basic_block *bbs = get_loop_body (loop);
1204
 
1205
  for (i = 0; i < loop->num_nodes; i++)
1206
    bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1207
 
1208
  free (bbs);
1209
}
1210
 
1211
/* Return true if all the BBs of the loop are empty except the
1212
   loop header.  */
1213
static bool
1214
loop_single_full_bb_p (struct loop *loop)
1215
{
1216
  unsigned i;
1217
  basic_block *bbs = get_loop_body (loop);
1218
 
1219
  for (i = 0; i < loop->num_nodes ; i++)
1220
    {
1221
      rtx head, tail;
1222
      bool empty_bb = true;
1223
 
1224
      if (bbs[i] == loop->header)
1225
        continue;
1226
 
1227
      /* Make sure that basic blocks other than the header
1228
         have only notes labels or jumps.  */
1229
      get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1230
      for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1231
        {
1232
          if (NOTE_P (head) || LABEL_P (head)
1233
              || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1234
            continue;
1235
          empty_bb = false;
1236
          break;
1237
        }
1238
 
1239
      if (! empty_bb)
1240
        {
1241
          free (bbs);
1242
          return false;
1243
        }
1244
    }
1245
  free (bbs);
1246
  return true;
1247
}
1248
 
1249
/* Dump file:line from INSN's location info to dump_file.  */
1250
 
1251
static void
1252
dump_insn_locator (rtx insn)
1253
{
1254
  if (dump_file && INSN_LOCATOR (insn))
1255
    {
1256
      const char *file = insn_file (insn);
1257
      if (file)
1258
        fprintf (dump_file, " %s:%i", file, insn_line (insn));
1259
    }
1260
}
1261
 
1262
/* A simple loop from SMS point of view; it is a loop that is composed of
1263
   either a single basic block or two BBs - a header and a latch.  */
1264
#define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 )                     \
1265
                                  && (EDGE_COUNT (loop->latch->preds) == 1) \
1266
                                  && (EDGE_COUNT (loop->latch->succs) == 1))
1267
 
1268
/* Return true if the loop is in its canonical form and false if not.
1269
   i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
1270
static bool
1271
loop_canon_p (struct loop *loop)
1272
{
1273
 
1274
  if (loop->inner || !loop_outer (loop))
1275
  {
1276
    if (dump_file)
1277
      fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1278
    return false;
1279
  }
1280
 
1281
  if (!single_exit (loop))
1282
    {
1283
      if (dump_file)
1284
        {
1285
          rtx insn = BB_END (loop->header);
1286
 
1287
          fprintf (dump_file, "SMS loop many exits");
1288
          dump_insn_locator (insn);
1289
          fprintf (dump_file, "\n");
1290
        }
1291
      return false;
1292
    }
1293
 
1294
  if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1295
    {
1296
      if (dump_file)
1297
        {
1298
          rtx insn = BB_END (loop->header);
1299
 
1300
          fprintf (dump_file, "SMS loop many BBs.");
1301
          dump_insn_locator (insn);
1302
          fprintf (dump_file, "\n");
1303
        }
1304
      return false;
1305
    }
1306
 
1307
    return true;
1308
}
1309
 
1310
/* If there are more than one entry for the loop,
1311
   make it one by splitting the first entry edge and
1312
   redirecting the others to the new BB.  */
1313
static void
1314
canon_loop (struct loop *loop)
1315
{
1316
  edge e;
1317
  edge_iterator i;
1318
 
1319
  /* Avoid annoying special cases of edges going to exit
1320
     block.  */
1321
  FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
1322
    if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1323
      split_edge (e);
1324
 
1325
  if (loop->latch == loop->header
1326
      || EDGE_COUNT (loop->latch->succs) > 1)
1327
    {
1328
      FOR_EACH_EDGE (e, i, loop->header->preds)
1329
        if (e->src == loop->latch)
1330
          break;
1331
      split_edge (e);
1332
    }
1333
}
1334
 
1335
/* Setup infos.  */
1336
static void
1337
setup_sched_infos (void)
1338
{
1339
  memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1340
          sizeof (sms_common_sched_info));
1341
  sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1342
  common_sched_info = &sms_common_sched_info;
1343
 
1344
  sched_deps_info = &sms_sched_deps_info;
1345
  current_sched_info = &sms_sched_info;
1346
}
1347
 
1348
/* Probability in % that the sms-ed loop rolls enough so that optimized
1349
   version may be entered.  Just a guess.  */
1350
#define PROB_SMS_ENOUGH_ITERATIONS 80
1351
 
1352
/* Used to calculate the upper bound of ii.  */
1353
#define MAXII_FACTOR 2
1354
 
1355
/* Main entry point, perform SMS scheduling on the loops of the function
1356
   that consist of single basic blocks.  */
1357
static void
1358
sms_schedule (void)
1359
{
1360
  rtx insn;
1361
  ddg_ptr *g_arr, g;
1362
  int * node_order;
1363
  int maxii, max_asap;
1364
  loop_iterator li;
1365
  partial_schedule_ptr ps;
1366
  basic_block bb = NULL;
1367
  struct loop *loop;
1368
  basic_block condition_bb = NULL;
1369
  edge latch_edge;
1370
  gcov_type trip_count = 0;
1371
 
1372
  loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1373
                       | LOOPS_HAVE_RECORDED_EXITS);
1374
  if (number_of_loops () <= 1)
1375
    {
1376
      loop_optimizer_finalize ();
1377
      return;  /* There are no loops to schedule.  */
1378
    }
1379
 
1380
  /* Initialize issue_rate.  */
1381
  if (targetm.sched.issue_rate)
1382
    {
1383
      int temp = reload_completed;
1384
 
1385
      reload_completed = 1;
1386
      issue_rate = targetm.sched.issue_rate ();
1387
      reload_completed = temp;
1388
    }
1389
  else
1390
    issue_rate = 1;
1391
 
1392
  /* Initialize the scheduler.  */
1393
  setup_sched_infos ();
1394
  haifa_sched_init ();
1395
 
1396
  /* Allocate memory to hold the DDG array one entry for each loop.
1397
     We use loop->num as index into this array.  */
1398
  g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
1399
 
1400
  if (dump_file)
1401
  {
1402
    fprintf (dump_file, "\n\nSMS analysis phase\n");
1403
    fprintf (dump_file, "===================\n\n");
1404
  }
1405
 
1406
  /* Build DDGs for all the relevant loops and hold them in G_ARR
1407
     indexed by the loop index.  */
1408
  FOR_EACH_LOOP (li, loop, 0)
1409
    {
1410
      rtx head, tail;
1411
      rtx count_reg;
1412
 
1413
      /* For debugging.  */
1414
      if (dbg_cnt (sms_sched_loop) == false)
1415
        {
1416
          if (dump_file)
1417
            fprintf (dump_file, "SMS reached max limit... \n");
1418
 
1419
          break;
1420
        }
1421
 
1422
      if (dump_file)
1423
        {
1424
          rtx insn = BB_END (loop->header);
1425
 
1426
          fprintf (dump_file, "SMS loop num: %d", loop->num);
1427
          dump_insn_locator (insn);
1428
          fprintf (dump_file, "\n");
1429
        }
1430
 
1431
      if (! loop_canon_p (loop))
1432
        continue;
1433
 
1434
      if (! loop_single_full_bb_p (loop))
1435
      {
1436
        if (dump_file)
1437
          fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1438
        continue;
1439
      }
1440
 
1441
      bb = loop->header;
1442
 
1443
      get_ebb_head_tail (bb, bb, &head, &tail);
1444
      latch_edge = loop_latch_edge (loop);
1445
      gcc_assert (single_exit (loop));
1446
      if (single_exit (loop)->count)
1447
        trip_count = latch_edge->count / single_exit (loop)->count;
1448
 
1449
      /* Perform SMS only on loops that their average count is above threshold.  */
1450
 
1451
      if ( latch_edge->count
1452
          && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1453
        {
1454
          if (dump_file)
1455
            {
1456
              dump_insn_locator (tail);
1457
              fprintf (dump_file, "\nSMS single-bb-loop\n");
1458
              if (profile_info && flag_branch_probabilities)
1459
                {
1460
                  fprintf (dump_file, "SMS loop-count ");
1461
                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1462
                           (HOST_WIDEST_INT) bb->count);
1463
                  fprintf (dump_file, "\n");
1464
                  fprintf (dump_file, "SMS trip-count ");
1465
                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1466
                           (HOST_WIDEST_INT) trip_count);
1467
                  fprintf (dump_file, "\n");
1468
                  fprintf (dump_file, "SMS profile-sum-max ");
1469
                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1470
                           (HOST_WIDEST_INT) profile_info->sum_max);
1471
                  fprintf (dump_file, "\n");
1472
                }
1473
            }
1474
          continue;
1475
        }
1476
 
1477
      /* Make sure this is a doloop.  */
1478
      if ( !(count_reg = doloop_register_get (head, tail)))
1479
      {
1480
        if (dump_file)
1481
          fprintf (dump_file, "SMS doloop_register_get failed\n");
1482
        continue;
1483
      }
1484
 
1485
      /* Don't handle BBs with calls or barriers
1486
         or !single_set with the exception of instructions that include
1487
         count_reg---these instructions are part of the control part
1488
         that do-loop recognizes.
1489
         ??? Should handle insns defining subregs.  */
1490
     for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1491
      {
1492
         rtx set;
1493
 
1494
        if (CALL_P (insn)
1495
            || BARRIER_P (insn)
1496
            || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1497
                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1498
                && !reg_mentioned_p (count_reg, insn))
1499
            || (INSN_P (insn) && (set = single_set (insn))
1500
                && GET_CODE (SET_DEST (set)) == SUBREG))
1501
        break;
1502
      }
1503
 
1504
      if (insn != NEXT_INSN (tail))
1505
        {
1506
          if (dump_file)
1507
            {
1508
              if (CALL_P (insn))
1509
                fprintf (dump_file, "SMS loop-with-call\n");
1510
              else if (BARRIER_P (insn))
1511
                fprintf (dump_file, "SMS loop-with-barrier\n");
1512
              else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1513
                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1514
                fprintf (dump_file, "SMS loop-with-not-single-set\n");
1515
              else
1516
               fprintf (dump_file, "SMS loop with subreg in lhs\n");
1517
              print_rtl_single (dump_file, insn);
1518
            }
1519
 
1520
          continue;
1521
        }
1522
 
1523
      /* Always schedule the closing branch with the rest of the
1524
         instructions. The branch is rotated to be in row ii-1 at the
1525
         end of the scheduling procedure to make sure it's the last
1526
         instruction in the iteration.  */
1527
      if (! (g = create_ddg (bb, 1)))
1528
        {
1529
          if (dump_file)
1530
            fprintf (dump_file, "SMS create_ddg failed\n");
1531
          continue;
1532
        }
1533
 
1534
      g_arr[loop->num] = g;
1535
      if (dump_file)
1536
        fprintf (dump_file, "...OK\n");
1537
 
1538
    }
1539
  if (dump_file)
1540
  {
1541
    fprintf (dump_file, "\nSMS transformation phase\n");
1542
    fprintf (dump_file, "=========================\n\n");
1543
  }
1544
 
1545
  /* We don't want to perform SMS on new loops - created by versioning.  */
1546
  FOR_EACH_LOOP (li, loop, 0)
1547
    {
1548
      rtx head, tail;
1549
      rtx count_reg, count_init;
1550
      int mii, rec_mii, stage_count, min_cycle;
1551
      HOST_WIDEST_INT loop_count = 0;
1552
      bool opt_sc_p;
1553
 
1554
      if (! (g = g_arr[loop->num]))
1555
        continue;
1556
 
1557
      if (dump_file)
1558
        {
1559
          rtx insn = BB_END (loop->header);
1560
 
1561
          fprintf (dump_file, "SMS loop num: %d", loop->num);
1562
          dump_insn_locator (insn);
1563
          fprintf (dump_file, "\n");
1564
 
1565
          print_ddg (dump_file, g);
1566
        }
1567
 
1568
      get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1569
 
1570
      latch_edge = loop_latch_edge (loop);
1571
      gcc_assert (single_exit (loop));
1572
      if (single_exit (loop)->count)
1573
        trip_count = latch_edge->count / single_exit (loop)->count;
1574
 
1575
      if (dump_file)
1576
        {
1577
          dump_insn_locator (tail);
1578
          fprintf (dump_file, "\nSMS single-bb-loop\n");
1579
          if (profile_info && flag_branch_probabilities)
1580
            {
1581
              fprintf (dump_file, "SMS loop-count ");
1582
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1583
                       (HOST_WIDEST_INT) bb->count);
1584
              fprintf (dump_file, "\n");
1585
              fprintf (dump_file, "SMS profile-sum-max ");
1586
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1587
                       (HOST_WIDEST_INT) profile_info->sum_max);
1588
              fprintf (dump_file, "\n");
1589
            }
1590
          fprintf (dump_file, "SMS doloop\n");
1591
          fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1592
          fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1593
          fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1594
        }
1595
 
1596
 
1597
      /* In case of th loop have doloop register it gets special
1598
         handling.  */
1599
      count_init = NULL_RTX;
1600
      if ((count_reg = doloop_register_get (head, tail)))
1601
        {
1602
          basic_block pre_header;
1603
 
1604
          pre_header = loop_preheader_edge (loop)->src;
1605
          count_init = const_iteration_count (count_reg, pre_header,
1606
                                              &loop_count);
1607
        }
1608
      gcc_assert (count_reg);
1609
 
1610
      if (dump_file && count_init)
1611
        {
1612
          fprintf (dump_file, "SMS const-doloop ");
1613
          fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1614
                     loop_count);
1615
          fprintf (dump_file, "\n");
1616
        }
1617
 
1618
      node_order = XNEWVEC (int, g->num_nodes);
1619
 
1620
      mii = 1; /* Need to pass some estimate of mii.  */
1621
      rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1622
      mii = MAX (res_MII (g), rec_mii);
1623
      maxii = MAX (max_asap, MAXII_FACTOR * mii);
1624
 
1625
      if (dump_file)
1626
        fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1627
                 rec_mii, mii, maxii);
1628
 
1629
      for (;;)
1630
        {
1631
          set_node_sched_params (g);
1632
 
1633
          stage_count = 0;
1634
          opt_sc_p = false;
1635
          ps = sms_schedule_by_order (g, mii, maxii, node_order);
1636
 
1637
          if (ps)
1638
            {
1639
              /* Try to achieve optimized SC by normalizing the partial
1640
                 schedule (having the cycles start from cycle zero).
1641
                 The branch location must be placed in row ii-1 in the
1642
                 final scheduling.      If failed, shift all instructions to
1643
                 position the branch in row ii-1.  */
1644
              opt_sc_p = optimize_sc (ps, g);
1645
              if (opt_sc_p)
1646
                stage_count = calculate_stage_count (ps, 0);
1647
              else
1648
                {
1649
                  /* Bring the branch to cycle ii-1.  */
1650
                  int amount = (SCHED_TIME (g->closing_branch->cuid)
1651
                                - (ps->ii - 1));
1652
 
1653
                  if (dump_file)
1654
                    fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1655
 
1656
                  stage_count = calculate_stage_count (ps, amount);
1657
                }
1658
 
1659
              gcc_assert (stage_count >= 1);
1660
            }
1661
 
1662
          /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1663
             1 means that there is no interleaving between iterations thus
1664
             we let the scheduling passes do the job in this case.  */
1665
          if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1666
              || (count_init && (loop_count <= stage_count))
1667
              || (flag_branch_probabilities && (trip_count <= stage_count)))
1668
            {
1669
              if (dump_file)
1670
                {
1671
                  fprintf (dump_file, "SMS failed... \n");
1672
                  fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1673
                           " loop-count=", stage_count);
1674
                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1675
                  fprintf (dump_file, ", trip-count=");
1676
                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1677
                  fprintf (dump_file, ")\n");
1678
                }
1679
              break;
1680
            }
1681
 
1682
          if (!opt_sc_p)
1683
            {
1684
              /* Rotate the partial schedule to have the branch in row ii-1.  */
1685
              int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1686
 
1687
              reset_sched_times (ps, amount);
1688
              rotate_partial_schedule (ps, amount);
1689
            }
1690
 
1691
          set_columns_for_ps (ps);
1692
 
1693
          min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1694
          if (!schedule_reg_moves (ps))
1695
            {
1696
              mii = ps->ii + 1;
1697
              free_partial_schedule (ps);
1698
              continue;
1699
            }
1700
 
1701
          /* Moves that handle incoming values might have been added
1702
             to a new first stage.  Bump the stage count if so.
1703
 
1704
             ??? Perhaps we could consider rotating the schedule here
1705
             instead?  */
1706
          if (PS_MIN_CYCLE (ps) < min_cycle)
1707
            {
1708
              reset_sched_times (ps, 0);
1709
              stage_count++;
1710
            }
1711
 
1712
          /* The stage count should now be correct without rotation.  */
1713
          gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1714
          PS_STAGE_COUNT (ps) = stage_count;
1715
 
1716
          canon_loop (loop);
1717
 
1718
          if (dump_file)
1719
            {
1720
              dump_insn_locator (tail);
1721
              fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1722
                       ps->ii, stage_count);
1723
              print_partial_schedule (ps, dump_file);
1724
            }
1725
 
1726
          /* case the BCT count is not known , Do loop-versioning */
1727
          if (count_reg && ! count_init)
1728
            {
1729
              rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1730
                                             GEN_INT(stage_count));
1731
              unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1732
                               * REG_BR_PROB_BASE) / 100;
1733
 
1734
              loop_version (loop, comp_rtx, &condition_bb,
1735
                            prob, prob, REG_BR_PROB_BASE - prob,
1736
                            true);
1737
             }
1738
 
1739
          /* Set new iteration count of loop kernel.  */
1740
          if (count_reg && count_init)
1741
            SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1742
                                                     - stage_count + 1);
1743
 
1744
          /* Now apply the scheduled kernel to the RTL of the loop.  */
1745
          permute_partial_schedule (ps, g->closing_branch->first_note);
1746
 
1747
          /* Mark this loop as software pipelined so the later
1748
             scheduling passes don't touch it.  */
1749
          if (! flag_resched_modulo_sched)
1750
            mark_loop_unsched (loop);
1751
 
1752
          /* The life-info is not valid any more.  */
1753
          df_set_bb_dirty (g->bb);
1754
 
1755
          apply_reg_moves (ps);
1756
          if (dump_file)
1757
            print_node_sched_params (dump_file, g->num_nodes, ps);
1758
          /* Generate prolog and epilog.  */
1759
          generate_prolog_epilog (ps, loop, count_reg, count_init);
1760
          break;
1761
        }
1762
 
1763
      free_partial_schedule (ps);
1764
      VEC_free (node_sched_params, heap, node_sched_param_vec);
1765
      free (node_order);
1766
      free_ddg (g);
1767
    }
1768
 
1769
  free (g_arr);
1770
 
1771
  /* Release scheduler data, needed until now because of DFA.  */
1772
  haifa_sched_finish ();
1773
  loop_optimizer_finalize ();
1774
}
1775
 
1776
/* The SMS scheduling algorithm itself
1777
   -----------------------------------
1778
   Input: 'O' an ordered list of insns of a loop.
1779
   Output: A scheduling of the loop - kernel, prolog, and epilogue.
1780
 
1781
   'Q' is the empty Set
1782
   'PS' is the partial schedule; it holds the currently scheduled nodes with
1783
        their cycle/slot.
1784
   'PSP' previously scheduled predecessors.
1785
   'PSS' previously scheduled successors.
1786
   't(u)' the cycle where u is scheduled.
1787
   'l(u)' is the latency of u.
1788
   'd(v,u)' is the dependence distance from v to u.
1789
   'ASAP(u)' the earliest time at which u could be scheduled as computed in
1790
             the node ordering phase.
1791
   'check_hardware_resources_conflicts(u, PS, c)'
1792
                             run a trace around cycle/slot through DFA model
1793
                             to check resource conflicts involving instruction u
1794
                             at cycle c given the partial schedule PS.
1795
   'add_to_partial_schedule_at_time(u, PS, c)'
1796
                             Add the node/instruction u to the partial schedule
1797
                             PS at time c.
1798
   'calculate_register_pressure(PS)'
1799
                             Given a schedule of instructions, calculate the register
1800
                             pressure it implies.  One implementation could be the
1801
                             maximum number of overlapping live ranges.
1802
   'maxRP' The maximum allowed register pressure, it is usually derived from the number
1803
           registers available in the hardware.
1804
 
1805
   1. II = MII.
1806
   2. PS = empty list
1807
   3. for each node u in O in pre-computed order
1808
   4.   if (PSP(u) != Q && PSS(u) == Q) then
1809
   5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1810
   6.     start = Early_start; end = Early_start + II - 1; step = 1
1811
   11.  else if (PSP(u) == Q && PSS(u) != Q) then
1812
   12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1813
   13.     start = Late_start; end = Late_start - II + 1; step = -1
1814
   14.  else if (PSP(u) != Q && PSS(u) != Q) then
1815
   15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1816
   16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1817
   17.     start = Early_start;
1818
   18.     end = min(Early_start + II - 1 , Late_start);
1819
   19.     step = 1
1820
   20.     else "if (PSP(u) == Q && PSS(u) == Q)"
1821
   21.    start = ASAP(u); end = start + II - 1; step = 1
1822
   22.  endif
1823
 
1824
   23.  success = false
1825
   24.  for (c = start ; c != end ; c += step)
1826
   25.     if check_hardware_resources_conflicts(u, PS, c) then
1827
   26.       add_to_partial_schedule_at_time(u, PS, c)
1828
   27.       success = true
1829
   28.       break
1830
   29.     endif
1831
   30.  endfor
1832
   31.  if (success == false) then
1833
   32.    II = II + 1
1834
   33.    if (II > maxII) then
1835
   34.       finish - failed to schedule
1836
   35.   endif
1837
   36.    goto 2.
1838
   37.  endif
1839
   38. endfor
1840
   39. if (calculate_register_pressure(PS) > maxRP) then
1841
   40.    goto 32.
1842
   41. endif
1843
   42. compute epilogue & prologue
1844
   43. finish - succeeded to schedule
1845
 
1846
   ??? The algorithm restricts the scheduling window to II cycles.
1847
   In rare cases, it may be better to allow windows of II+1 cycles.
1848
   The window would then start and end on the same row, but with
1849
   different "must precede" and "must follow" requirements.  */
1850
 
1851
/* A limit on the number of cycles that resource conflicts can span.  ??? Should
1852
   be provided by DFA, and be dependent on the type of insn scheduled.  Currently
1853
   set to 0 to save compile time.  */
1854
#define DFA_HISTORY SMS_DFA_HISTORY
1855
 
1856
/* A threshold for the number of repeated unsuccessful attempts to insert
1857
   an empty row, before we flush the partial schedule and start over.  */
1858
#define MAX_SPLIT_NUM 10
1859
/* Given the partial schedule PS, this function calculates and returns the
1860
   cycles in which we can schedule the node with the given index I.
1861
   NOTE: Here we do the backtracking in SMS, in some special cases. We have
1862
   noticed that there are several cases in which we fail    to SMS the loop
1863
   because the sched window of a node is empty    due to tight data-deps. In
1864
   such cases we want to unschedule    some of the predecessors/successors
1865
   until we get non-empty    scheduling window.  It returns -1 if the
1866
   scheduling window is empty and zero otherwise.  */
1867
 
1868
static int
1869
get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1870
                  sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1871
                  int *end_p)
1872
{
1873
  int start, step, end;
1874
  int early_start, late_start;
1875
  ddg_edge_ptr e;
1876
  sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1877
  sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1878
  sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1879
  sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1880
  int psp_not_empty;
1881
  int pss_not_empty;
1882
  int count_preds;
1883
  int count_succs;
1884
 
1885
  /* 1. compute sched window for u (start, end, step).  */
1886
  sbitmap_zero (psp);
1887
  sbitmap_zero (pss);
1888
  psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1889
  pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1890
 
1891
  /* We first compute a forward range (start <= end), then decide whether
1892
     to reverse it.  */
1893
  early_start = INT_MIN;
1894
  late_start = INT_MAX;
1895
  start = INT_MIN;
1896
  end = INT_MAX;
1897
  step = 1;
1898
 
1899
  count_preds = 0;
1900
  count_succs = 0;
1901
 
1902
  if (dump_file && (psp_not_empty || pss_not_empty))
1903
    {
1904
      fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1905
               "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1906
      fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1907
               "start", "early start", "late start", "end", "time");
1908
      fprintf (dump_file, "=========== =========== =========== ==========="
1909
               " =====\n");
1910
    }
1911
  /* Calculate early_start and limit end.  Both bounds are inclusive.  */
1912
  if (psp_not_empty)
1913
    for (e = u_node->in; e != 0; e = e->next_in)
1914
      {
1915
        int v = e->src->cuid;
1916
 
1917
        if (TEST_BIT (sched_nodes, v))
1918
          {
1919
            int p_st = SCHED_TIME (v);
1920
            int earliest = p_st + e->latency - (e->distance * ii);
1921
            int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1922
 
1923
            if (dump_file)
1924
              {
1925
                fprintf (dump_file, "%11s %11d %11s %11d %5d",
1926
                         "", earliest, "", latest, p_st);
1927
                print_ddg_edge (dump_file, e);
1928
                fprintf (dump_file, "\n");
1929
              }
1930
 
1931
            early_start = MAX (early_start, earliest);
1932
            end = MIN (end, latest);
1933
 
1934
            if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1935
              count_preds++;
1936
          }
1937
      }
1938
 
1939
  /* Calculate late_start and limit start.  Both bounds are inclusive.  */
1940
  if (pss_not_empty)
1941
    for (e = u_node->out; e != 0; e = e->next_out)
1942
      {
1943
        int v = e->dest->cuid;
1944
 
1945
        if (TEST_BIT (sched_nodes, v))
1946
          {
1947
            int s_st = SCHED_TIME (v);
1948
            int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1949
            int latest = s_st - e->latency + (e->distance * ii);
1950
 
1951
            if (dump_file)
1952
              {
1953
                fprintf (dump_file, "%11d %11s %11d %11s %5d",
1954
                         earliest, "", latest, "", s_st);
1955
                print_ddg_edge (dump_file, e);
1956
                fprintf (dump_file, "\n");
1957
              }
1958
 
1959
            start = MAX (start, earliest);
1960
            late_start = MIN (late_start, latest);
1961
 
1962
            if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1963
              count_succs++;
1964
          }
1965
      }
1966
 
1967
  if (dump_file && (psp_not_empty || pss_not_empty))
1968
    {
1969
      fprintf (dump_file, "----------- ----------- ----------- -----------"
1970
               " -----\n");
1971
      fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
1972
               start, early_start, late_start, end, "",
1973
               "(max, max, min, min)");
1974
    }
1975
 
1976
  /* Get a target scheduling window no bigger than ii.  */
1977
  if (early_start == INT_MIN && late_start == INT_MAX)
1978
    early_start = NODE_ASAP (u_node);
1979
  else if (early_start == INT_MIN)
1980
    early_start = late_start - (ii - 1);
1981
  late_start = MIN (late_start, early_start + (ii - 1));
1982
 
1983
  /* Apply memory dependence limits.  */
1984
  start = MAX (start, early_start);
1985
  end = MIN (end, late_start);
1986
 
1987
  if (dump_file && (psp_not_empty || pss_not_empty))
1988
    fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
1989
             "", start, end, "", "");
1990
 
1991
  /* If there are at least as many successors as predecessors, schedule the
1992
     node close to its successors.  */
1993
  if (pss_not_empty && count_succs >= count_preds)
1994
    {
1995
      int tmp = end;
1996
      end = start;
1997
      start = tmp;
1998
      step = -1;
1999
    }
2000
 
2001
  /* Now that we've finalized the window, make END an exclusive rather
2002
     than an inclusive bound.  */
2003
  end += step;
2004
 
2005
  *start_p = start;
2006
  *step_p = step;
2007
  *end_p = end;
2008
  sbitmap_free (psp);
2009
  sbitmap_free (pss);
2010
 
2011
  if ((start >= end && step == 1) || (start <= end && step == -1))
2012
    {
2013
      if (dump_file)
2014
        fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2015
                 start, end, step);
2016
      return -1;
2017
    }
2018
 
2019
  return 0;
2020
}
2021
 
2022
/* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2023
   node currently been scheduled.  At the end of the calculation
2024
   MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2025
   U_NODE which are (1) already scheduled in the first/last row of
2026
   U_NODE's scheduling window, (2) whose dependence inequality with U
2027
   becomes an equality when U is scheduled in this same row, and (3)
2028
   whose dependence latency is zero.
2029
 
2030
   The first and last rows are calculated using the following parameters:
2031
   START/END rows - The cycles that begins/ends the traversal on the window;
2032
   searching for an empty cycle to schedule U_NODE.
2033
   STEP - The direction in which we traverse the window.
2034
   II - The initiation interval.  */
2035
 
2036
static void
2037
calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2038
                               int step, int ii, sbitmap sched_nodes,
2039
                               sbitmap must_precede, sbitmap must_follow)
2040
{
2041
  ddg_edge_ptr e;
2042
  int first_cycle_in_window, last_cycle_in_window;
2043
 
2044
  gcc_assert (must_precede && must_follow);
2045
 
2046
  /* Consider the following scheduling window:
2047
     {first_cycle_in_window, first_cycle_in_window+1, ...,
2048
     last_cycle_in_window}.  If step is 1 then the following will be
2049
     the order we traverse the window: {start=first_cycle_in_window,
2050
     first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2051
     or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2052
     end=first_cycle_in_window-1} if step is -1.  */
2053
  first_cycle_in_window = (step == 1) ? start : end - step;
2054
  last_cycle_in_window = (step == 1) ? end - step : start;
2055
 
2056
  sbitmap_zero (must_precede);
2057
  sbitmap_zero (must_follow);
2058
 
2059
  if (dump_file)
2060
    fprintf (dump_file, "\nmust_precede: ");
2061
 
2062
  /* Instead of checking if:
2063
      (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2064
      && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2065
             first_cycle_in_window)
2066
      && e->latency == 0
2067
     we use the fact that latency is non-negative:
2068
      SCHED_TIME (e->src) - (e->distance * ii) <=
2069
      SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2070
      first_cycle_in_window
2071
     and check only if
2072
      SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
2073
  for (e = u_node->in; e != 0; e = e->next_in)
2074
    if (TEST_BIT (sched_nodes, e->src->cuid)
2075
        && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2076
             first_cycle_in_window))
2077
      {
2078
        if (dump_file)
2079
          fprintf (dump_file, "%d ", e->src->cuid);
2080
 
2081
        SET_BIT (must_precede, e->src->cuid);
2082
      }
2083
 
2084
  if (dump_file)
2085
    fprintf (dump_file, "\nmust_follow: ");
2086
 
2087
  /* Instead of checking if:
2088
      (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2089
      && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2090
             last_cycle_in_window)
2091
      && e->latency == 0
2092
     we use the fact that latency is non-negative:
2093
      SCHED_TIME (e->dest) + (e->distance * ii) >=
2094
      SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2095
      last_cycle_in_window
2096
     and check only if
2097
      SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
2098
  for (e = u_node->out; e != 0; e = e->next_out)
2099
    if (TEST_BIT (sched_nodes, e->dest->cuid)
2100
        && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2101
             last_cycle_in_window))
2102
      {
2103
        if (dump_file)
2104
          fprintf (dump_file, "%d ", e->dest->cuid);
2105
 
2106
        SET_BIT (must_follow, e->dest->cuid);
2107
      }
2108
 
2109
  if (dump_file)
2110
    fprintf (dump_file, "\n");
2111
}
2112
 
2113
/* Return 1 if U_NODE can be scheduled in CYCLE.  Use the following
2114
   parameters to decide if that's possible:
2115
   PS - The partial schedule.
2116
   U - The serial number of U_NODE.
2117
   NUM_SPLITS - The number of row splits made so far.
2118
   MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2119
   the first row of the scheduling window)
2120
   MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2121
   last row of the scheduling window)  */
2122
 
2123
static bool
2124
try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2125
                              int u, int cycle, sbitmap sched_nodes,
2126
                              int *num_splits, sbitmap must_precede,
2127
                              sbitmap must_follow)
2128
{
2129
  ps_insn_ptr psi;
2130
  bool success = 0;
2131
 
2132
  verify_partial_schedule (ps, sched_nodes);
2133
  psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2134
  if (psi)
2135
    {
2136
      SCHED_TIME (u) = cycle;
2137
      SET_BIT (sched_nodes, u);
2138
      success = 1;
2139
      *num_splits = 0;
2140
      if (dump_file)
2141
        fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2142
 
2143
    }
2144
 
2145
  return success;
2146
}
2147
 
2148
/* This function implements the scheduling algorithm for SMS according to the
2149
   above algorithm.  */
2150
static partial_schedule_ptr
2151
sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2152
{
2153
  int ii = mii;
2154
  int i, c, success, num_splits = 0;
2155
  int flush_and_start_over = true;
2156
  int num_nodes = g->num_nodes;
2157
  int start, end, step; /* Place together into one struct?  */
2158
  sbitmap sched_nodes = sbitmap_alloc (num_nodes);
2159
  sbitmap must_precede = sbitmap_alloc (num_nodes);
2160
  sbitmap must_follow = sbitmap_alloc (num_nodes);
2161
  sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
2162
 
2163
  partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
2164
 
2165
  sbitmap_ones (tobe_scheduled);
2166
  sbitmap_zero (sched_nodes);
2167
 
2168
  while (flush_and_start_over && (ii < maxii))
2169
    {
2170
 
2171
      if (dump_file)
2172
        fprintf (dump_file, "Starting with ii=%d\n", ii);
2173
      flush_and_start_over = false;
2174
      sbitmap_zero (sched_nodes);
2175
 
2176
      for (i = 0; i < num_nodes; i++)
2177
        {
2178
          int u = nodes_order[i];
2179
          ddg_node_ptr u_node = &ps->g->nodes[u];
2180
          rtx insn = u_node->insn;
2181
 
2182
          if (!NONDEBUG_INSN_P (insn))
2183
            {
2184
              RESET_BIT (tobe_scheduled, u);
2185
              continue;
2186
            }
2187
 
2188
          if (TEST_BIT (sched_nodes, u))
2189
            continue;
2190
 
2191
          /* Try to get non-empty scheduling window.  */
2192
         success = 0;
2193
         if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2194
                                &step, &end) == 0)
2195
            {
2196
              if (dump_file)
2197
                fprintf (dump_file, "\nTrying to schedule node %d "
2198
                         "INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
2199
                        (g->nodes[u].insn)), start, end, step);
2200
 
2201
              gcc_assert ((step > 0 && start < end)
2202
                          || (step < 0 && start > end));
2203
 
2204
              calculate_must_precede_follow (u_node, start, end, step, ii,
2205
                                             sched_nodes, must_precede,
2206
                                             must_follow);
2207
 
2208
              for (c = start; c != end; c += step)
2209
                {
2210
                  sbitmap tmp_precede, tmp_follow;
2211
 
2212
                  set_must_precede_follow (&tmp_follow, must_follow,
2213
                                           &tmp_precede, must_precede,
2214
                                           c, start, end, step);
2215
                  success =
2216
                    try_scheduling_node_in_cycle (ps, u, c,
2217
                                                  sched_nodes,
2218
                                                  &num_splits, tmp_precede,
2219
                                                  tmp_follow);
2220
                  if (success)
2221
                    break;
2222
                }
2223
 
2224
              verify_partial_schedule (ps, sched_nodes);
2225
            }
2226
            if (!success)
2227
            {
2228
              int split_row;
2229
 
2230
              if (ii++ == maxii)
2231
                break;
2232
 
2233
              if (num_splits >= MAX_SPLIT_NUM)
2234
                {
2235
                  num_splits = 0;
2236
                  flush_and_start_over = true;
2237
                  verify_partial_schedule (ps, sched_nodes);
2238
                  reset_partial_schedule (ps, ii);
2239
                  verify_partial_schedule (ps, sched_nodes);
2240
                  break;
2241
                }
2242
 
2243
              num_splits++;
2244
              /* The scheduling window is exclusive of 'end'
2245
                 whereas compute_split_window() expects an inclusive,
2246
                 ordered range.  */
2247
              if (step == 1)
2248
                split_row = compute_split_row (sched_nodes, start, end - 1,
2249
                                               ps->ii, u_node);
2250
              else
2251
                split_row = compute_split_row (sched_nodes, end + 1, start,
2252
                                               ps->ii, u_node);
2253
 
2254
              ps_insert_empty_row (ps, split_row, sched_nodes);
2255
              i--;              /* Go back and retry node i.  */
2256
 
2257
              if (dump_file)
2258
                fprintf (dump_file, "num_splits=%d\n", num_splits);
2259
            }
2260
 
2261
          /* ??? If (success), check register pressure estimates.  */
2262
        }                       /* Continue with next node.  */
2263
    }                           /* While flush_and_start_over.  */
2264
  if (ii >= maxii)
2265
    {
2266
      free_partial_schedule (ps);
2267
      ps = NULL;
2268
    }
2269
  else
2270
    gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
2271
 
2272
  sbitmap_free (sched_nodes);
2273
  sbitmap_free (must_precede);
2274
  sbitmap_free (must_follow);
2275
  sbitmap_free (tobe_scheduled);
2276
 
2277
  return ps;
2278
}
2279
 
2280
/* This function inserts a new empty row into PS at the position
2281
   according to SPLITROW, keeping all already scheduled instructions
2282
   intact and updating their SCHED_TIME and cycle accordingly.  */
2283
static void
2284
ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2285
                     sbitmap sched_nodes)
2286
{
2287
  ps_insn_ptr crr_insn;
2288
  ps_insn_ptr *rows_new;
2289
  int ii = ps->ii;
2290
  int new_ii = ii + 1;
2291
  int row;
2292
  int *rows_length_new;
2293
 
2294
  verify_partial_schedule (ps, sched_nodes);
2295
 
2296
  /* We normalize sched_time and rotate ps to have only non-negative sched
2297
     times, for simplicity of updating cycles after inserting new row.  */
2298
  split_row -= ps->min_cycle;
2299
  split_row = SMODULO (split_row, ii);
2300
  if (dump_file)
2301
    fprintf (dump_file, "split_row=%d\n", split_row);
2302
 
2303
  reset_sched_times (ps, PS_MIN_CYCLE (ps));
2304
  rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2305
 
2306
  rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2307
  rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2308
  for (row = 0; row < split_row; row++)
2309
    {
2310
      rows_new[row] = ps->rows[row];
2311
      rows_length_new[row] = ps->rows_length[row];
2312
      ps->rows[row] = NULL;
2313
      for (crr_insn = rows_new[row];
2314
           crr_insn; crr_insn = crr_insn->next_in_row)
2315
        {
2316
          int u = crr_insn->id;
2317
          int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2318
 
2319
          SCHED_TIME (u) = new_time;
2320
          crr_insn->cycle = new_time;
2321
          SCHED_ROW (u) = new_time % new_ii;
2322
          SCHED_STAGE (u) = new_time / new_ii;
2323
        }
2324
 
2325
    }
2326
 
2327
  rows_new[split_row] = NULL;
2328
 
2329
  for (row = split_row; row < ii; row++)
2330
    {
2331
      rows_new[row + 1] = ps->rows[row];
2332
      rows_length_new[row + 1] = ps->rows_length[row];
2333
      ps->rows[row] = NULL;
2334
      for (crr_insn = rows_new[row + 1];
2335
           crr_insn; crr_insn = crr_insn->next_in_row)
2336
        {
2337
          int u = crr_insn->id;
2338
          int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2339
 
2340
          SCHED_TIME (u) = new_time;
2341
          crr_insn->cycle = new_time;
2342
          SCHED_ROW (u) = new_time % new_ii;
2343
          SCHED_STAGE (u) = new_time / new_ii;
2344
        }
2345
    }
2346
 
2347
  /* Updating ps.  */
2348
  ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2349
    + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2350
  ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2351
    + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2352
  free (ps->rows);
2353
  ps->rows = rows_new;
2354
  free (ps->rows_length);
2355
  ps->rows_length = rows_length_new;
2356
  ps->ii = new_ii;
2357
  gcc_assert (ps->min_cycle >= 0);
2358
 
2359
  verify_partial_schedule (ps, sched_nodes);
2360
 
2361
  if (dump_file)
2362
    fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2363
             ps->max_cycle);
2364
}
2365
 
2366
/* Given U_NODE which is the node that failed to be scheduled; LOW and
2367
   UP which are the boundaries of it's scheduling window; compute using
2368
   SCHED_NODES and II a row in the partial schedule that can be split
2369
   which will separate a critical predecessor from a critical successor
2370
   thereby expanding the window, and return it.  */
2371
static int
2372
compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2373
                   ddg_node_ptr u_node)
2374
{
2375
  ddg_edge_ptr e;
2376
  int lower = INT_MIN, upper = INT_MAX;
2377
  int crit_pred = -1;
2378
  int crit_succ = -1;
2379
  int crit_cycle;
2380
 
2381
  for (e = u_node->in; e != 0; e = e->next_in)
2382
    {
2383
      int v = e->src->cuid;
2384
 
2385
      if (TEST_BIT (sched_nodes, v)
2386
          && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2387
        if (SCHED_TIME (v) > lower)
2388
          {
2389
            crit_pred = v;
2390
            lower = SCHED_TIME (v);
2391
          }
2392
    }
2393
 
2394
  if (crit_pred >= 0)
2395
    {
2396
      crit_cycle = SCHED_TIME (crit_pred) + 1;
2397
      return SMODULO (crit_cycle, ii);
2398
    }
2399
 
2400
  for (e = u_node->out; e != 0; e = e->next_out)
2401
    {
2402
      int v = e->dest->cuid;
2403
 
2404
      if (TEST_BIT (sched_nodes, v)
2405
          && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2406
        if (SCHED_TIME (v) < upper)
2407
          {
2408
            crit_succ = v;
2409
            upper = SCHED_TIME (v);
2410
          }
2411
    }
2412
 
2413
  if (crit_succ >= 0)
2414
    {
2415
      crit_cycle = SCHED_TIME (crit_succ);
2416
      return SMODULO (crit_cycle, ii);
2417
    }
2418
 
2419
  if (dump_file)
2420
    fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2421
 
2422
  return SMODULO ((low + up + 1) / 2, ii);
2423
}
2424
 
2425
static void
2426
verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2427
{
2428
  int row;
2429
  ps_insn_ptr crr_insn;
2430
 
2431
  for (row = 0; row < ps->ii; row++)
2432
    {
2433
      int length = 0;
2434
 
2435
      for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2436
        {
2437
          int u = crr_insn->id;
2438
 
2439
          length++;
2440
          gcc_assert (TEST_BIT (sched_nodes, u));
2441
          /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2442
             popcount (sched_nodes) == number of insns in ps.  */
2443
          gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2444
          gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2445
        }
2446
 
2447
      gcc_assert (ps->rows_length[row] == length);
2448
    }
2449
}
2450
 
2451
 
2452
/* This page implements the algorithm for ordering the nodes of a DDG
2453
   for modulo scheduling, activated through the
2454
   "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
2455
 
2456
#define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2457
#define ASAP(x) (ORDER_PARAMS ((x))->asap)
2458
#define ALAP(x) (ORDER_PARAMS ((x))->alap)
2459
#define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2460
#define MOB(x) (ALAP ((x)) - ASAP ((x)))
2461
#define DEPTH(x) (ASAP ((x)))
2462
 
2463
typedef struct node_order_params * nopa;
2464
 
2465
static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2466
static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2467
static nopa  calculate_order_params (ddg_ptr, int, int *);
2468
static int find_max_asap (ddg_ptr, sbitmap);
2469
static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2470
static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2471
 
2472
enum sms_direction {BOTTOMUP, TOPDOWN};
2473
 
2474
struct node_order_params
2475
{
2476
  int asap;
2477
  int alap;
2478
  int height;
2479
};
2480
 
2481
/* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
2482
static void
2483
check_nodes_order (int *node_order, int num_nodes)
2484
{
2485
  int i;
2486
  sbitmap tmp = sbitmap_alloc (num_nodes);
2487
 
2488
  sbitmap_zero (tmp);
2489
 
2490
  if (dump_file)
2491
    fprintf (dump_file, "SMS final nodes order: \n");
2492
 
2493
  for (i = 0; i < num_nodes; i++)
2494
    {
2495
      int u = node_order[i];
2496
 
2497
      if (dump_file)
2498
        fprintf (dump_file, "%d ", u);
2499
      gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
2500
 
2501
      SET_BIT (tmp, u);
2502
    }
2503
 
2504
  if (dump_file)
2505
    fprintf (dump_file, "\n");
2506
 
2507
  sbitmap_free (tmp);
2508
}
2509
 
2510
/* Order the nodes of G for scheduling and pass the result in
2511
   NODE_ORDER.  Also set aux.count of each node to ASAP.
2512
   Put maximal ASAP to PMAX_ASAP.  Return the recMII for the given DDG.  */
2513
static int
2514
sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2515
{
2516
  int i;
2517
  int rec_mii = 0;
2518
  ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2519
 
2520
  nopa nops = calculate_order_params (g, mii, pmax_asap);
2521
 
2522
  if (dump_file)
2523
    print_sccs (dump_file, sccs, g);
2524
 
2525
  order_nodes_of_sccs (sccs, node_order);
2526
 
2527
  if (sccs->num_sccs > 0)
2528
    /* First SCC has the largest recurrence_length.  */
2529
    rec_mii = sccs->sccs[0]->recurrence_length;
2530
 
2531
  /* Save ASAP before destroying node_order_params.  */
2532
  for (i = 0; i < g->num_nodes; i++)
2533
    {
2534
      ddg_node_ptr v = &g->nodes[i];
2535
      v->aux.count = ASAP (v);
2536
    }
2537
 
2538
  free (nops);
2539
  free_ddg_all_sccs (sccs);
2540
  check_nodes_order (node_order, g->num_nodes);
2541
 
2542
  return rec_mii;
2543
}
2544
 
2545
static void
2546
order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2547
{
2548
  int i, pos = 0;
2549
  ddg_ptr g = all_sccs->ddg;
2550
  int num_nodes = g->num_nodes;
2551
  sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2552
  sbitmap on_path = sbitmap_alloc (num_nodes);
2553
  sbitmap tmp = sbitmap_alloc (num_nodes);
2554
  sbitmap ones = sbitmap_alloc (num_nodes);
2555
 
2556
  sbitmap_zero (prev_sccs);
2557
  sbitmap_ones (ones);
2558
 
2559
  /* Perform the node ordering starting from the SCC with the highest recMII.
2560
     For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
2561
  for (i = 0; i < all_sccs->num_sccs; i++)
2562
    {
2563
      ddg_scc_ptr scc = all_sccs->sccs[i];
2564
 
2565
      /* Add nodes on paths from previous SCCs to the current SCC.  */
2566
      find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2567
      sbitmap_a_or_b (tmp, scc->nodes, on_path);
2568
 
2569
      /* Add nodes on paths from the current SCC to previous SCCs.  */
2570
      find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2571
      sbitmap_a_or_b (tmp, tmp, on_path);
2572
 
2573
      /* Remove nodes of previous SCCs from current extended SCC.  */
2574
      sbitmap_difference (tmp, tmp, prev_sccs);
2575
 
2576
      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2577
      /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
2578
    }
2579
 
2580
  /* Handle the remaining nodes that do not belong to any scc.  Each call
2581
     to order_nodes_in_scc handles a single connected component.  */
2582
  while (pos < g->num_nodes)
2583
    {
2584
      sbitmap_difference (tmp, ones, prev_sccs);
2585
      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2586
    }
2587
  sbitmap_free (prev_sccs);
2588
  sbitmap_free (on_path);
2589
  sbitmap_free (tmp);
2590
  sbitmap_free (ones);
2591
}
2592
 
2593
/* MII is needed if we consider backarcs (that do not close recursive cycles).  */
2594
static struct node_order_params *
2595
calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2596
{
2597
  int u;
2598
  int max_asap;
2599
  int num_nodes = g->num_nodes;
2600
  ddg_edge_ptr e;
2601
  /* Allocate a place to hold ordering params for each node in the DDG.  */
2602
  nopa node_order_params_arr;
2603
 
2604
  /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
2605
  node_order_params_arr = (nopa) xcalloc (num_nodes,
2606
                                          sizeof (struct node_order_params));
2607
 
2608
  /* Set the aux pointer of each node to point to its order_params structure.  */
2609
  for (u = 0; u < num_nodes; u++)
2610
    g->nodes[u].aux.info = &node_order_params_arr[u];
2611
 
2612
  /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2613
     calculate ASAP, ALAP, mobility, distance, and height for each node
2614
     in the dependence (direct acyclic) graph.  */
2615
 
2616
  /* We assume that the nodes in the array are in topological order.  */
2617
 
2618
  max_asap = 0;
2619
  for (u = 0; u < num_nodes; u++)
2620
    {
2621
      ddg_node_ptr u_node = &g->nodes[u];
2622
 
2623
      ASAP (u_node) = 0;
2624
      for (e = u_node->in; e; e = e->next_in)
2625
        if (e->distance == 0)
2626
          ASAP (u_node) = MAX (ASAP (u_node),
2627
                               ASAP (e->src) + e->latency);
2628
      max_asap = MAX (max_asap, ASAP (u_node));
2629
    }
2630
 
2631
  for (u = num_nodes - 1; u > -1; u--)
2632
    {
2633
      ddg_node_ptr u_node = &g->nodes[u];
2634
 
2635
      ALAP (u_node) = max_asap;
2636
      HEIGHT (u_node) = 0;
2637
      for (e = u_node->out; e; e = e->next_out)
2638
        if (e->distance == 0)
2639
          {
2640
            ALAP (u_node) = MIN (ALAP (u_node),
2641
                                 ALAP (e->dest) - e->latency);
2642
            HEIGHT (u_node) = MAX (HEIGHT (u_node),
2643
                                   HEIGHT (e->dest) + e->latency);
2644
          }
2645
    }
2646
  if (dump_file)
2647
  {
2648
    fprintf (dump_file, "\nOrder params\n");
2649
    for (u = 0; u < num_nodes; u++)
2650
      {
2651
        ddg_node_ptr u_node = &g->nodes[u];
2652
 
2653
        fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2654
                 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2655
      }
2656
  }
2657
 
2658
  *pmax_asap = max_asap;
2659
  return node_order_params_arr;
2660
}
2661
 
2662
static int
2663
find_max_asap (ddg_ptr g, sbitmap nodes)
2664
{
2665
  unsigned int u = 0;
2666
  int max_asap = -1;
2667
  int result = -1;
2668
  sbitmap_iterator sbi;
2669
 
2670
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2671
    {
2672
      ddg_node_ptr u_node = &g->nodes[u];
2673
 
2674
      if (max_asap < ASAP (u_node))
2675
        {
2676
          max_asap = ASAP (u_node);
2677
          result = u;
2678
        }
2679
    }
2680
  return result;
2681
}
2682
 
2683
static int
2684
find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2685
{
2686
  unsigned int u = 0;
2687
  int max_hv = -1;
2688
  int min_mob = INT_MAX;
2689
  int result = -1;
2690
  sbitmap_iterator sbi;
2691
 
2692
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2693
    {
2694
      ddg_node_ptr u_node = &g->nodes[u];
2695
 
2696
      if (max_hv < HEIGHT (u_node))
2697
        {
2698
          max_hv = HEIGHT (u_node);
2699
          min_mob = MOB (u_node);
2700
          result = u;
2701
        }
2702
      else if ((max_hv == HEIGHT (u_node))
2703
               && (min_mob > MOB (u_node)))
2704
        {
2705
          min_mob = MOB (u_node);
2706
          result = u;
2707
        }
2708
    }
2709
  return result;
2710
}
2711
 
2712
static int
2713
find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2714
{
2715
  unsigned int u = 0;
2716
  int max_dv = -1;
2717
  int min_mob = INT_MAX;
2718
  int result = -1;
2719
  sbitmap_iterator sbi;
2720
 
2721
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
2722
    {
2723
      ddg_node_ptr u_node = &g->nodes[u];
2724
 
2725
      if (max_dv < DEPTH (u_node))
2726
        {
2727
          max_dv = DEPTH (u_node);
2728
          min_mob = MOB (u_node);
2729
          result = u;
2730
        }
2731
      else if ((max_dv == DEPTH (u_node))
2732
               && (min_mob > MOB (u_node)))
2733
        {
2734
          min_mob = MOB (u_node);
2735
          result = u;
2736
        }
2737
    }
2738
  return result;
2739
}
2740
 
2741
/* Places the nodes of SCC into the NODE_ORDER array starting
2742
   at position POS, according to the SMS ordering algorithm.
2743
   NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2744
   the NODE_ORDER array, starting from position zero.  */
2745
static int
2746
order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2747
                    int * node_order, int pos)
2748
{
2749
  enum sms_direction dir;
2750
  int num_nodes = g->num_nodes;
2751
  sbitmap workset = sbitmap_alloc (num_nodes);
2752
  sbitmap tmp = sbitmap_alloc (num_nodes);
2753
  sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2754
  sbitmap predecessors = sbitmap_alloc (num_nodes);
2755
  sbitmap successors = sbitmap_alloc (num_nodes);
2756
 
2757
  sbitmap_zero (predecessors);
2758
  find_predecessors (predecessors, g, nodes_ordered);
2759
 
2760
  sbitmap_zero (successors);
2761
  find_successors (successors, g, nodes_ordered);
2762
 
2763
  sbitmap_zero (tmp);
2764
  if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
2765
    {
2766
      sbitmap_copy (workset, tmp);
2767
      dir = BOTTOMUP;
2768
    }
2769
  else if (sbitmap_a_and_b_cg (tmp, successors, scc))
2770
    {
2771
      sbitmap_copy (workset, tmp);
2772
      dir = TOPDOWN;
2773
    }
2774
  else
2775
    {
2776
      int u;
2777
 
2778
      sbitmap_zero (workset);
2779
      if ((u = find_max_asap (g, scc)) >= 0)
2780
        SET_BIT (workset, u);
2781
      dir = BOTTOMUP;
2782
    }
2783
 
2784
  sbitmap_zero (zero_bitmap);
2785
  while (!sbitmap_equal (workset, zero_bitmap))
2786
    {
2787
      int v;
2788
      ddg_node_ptr v_node;
2789
      sbitmap v_node_preds;
2790
      sbitmap v_node_succs;
2791
 
2792
      if (dir == TOPDOWN)
2793
        {
2794
          while (!sbitmap_equal (workset, zero_bitmap))
2795
            {
2796
              v = find_max_hv_min_mob (g, workset);
2797
              v_node = &g->nodes[v];
2798
              node_order[pos++] = v;
2799
              v_node_succs = NODE_SUCCESSORS (v_node);
2800
              sbitmap_a_and_b (tmp, v_node_succs, scc);
2801
 
2802
              /* Don't consider the already ordered successors again.  */
2803
              sbitmap_difference (tmp, tmp, nodes_ordered);
2804
              sbitmap_a_or_b (workset, workset, tmp);
2805
              RESET_BIT (workset, v);
2806
              SET_BIT (nodes_ordered, v);
2807
            }
2808
          dir = BOTTOMUP;
2809
          sbitmap_zero (predecessors);
2810
          find_predecessors (predecessors, g, nodes_ordered);
2811
          sbitmap_a_and_b (workset, predecessors, scc);
2812
        }
2813
      else
2814
        {
2815
          while (!sbitmap_equal (workset, zero_bitmap))
2816
            {
2817
              v = find_max_dv_min_mob (g, workset);
2818
              v_node = &g->nodes[v];
2819
              node_order[pos++] = v;
2820
              v_node_preds = NODE_PREDECESSORS (v_node);
2821
              sbitmap_a_and_b (tmp, v_node_preds, scc);
2822
 
2823
              /* Don't consider the already ordered predecessors again.  */
2824
              sbitmap_difference (tmp, tmp, nodes_ordered);
2825
              sbitmap_a_or_b (workset, workset, tmp);
2826
              RESET_BIT (workset, v);
2827
              SET_BIT (nodes_ordered, v);
2828
            }
2829
          dir = TOPDOWN;
2830
          sbitmap_zero (successors);
2831
          find_successors (successors, g, nodes_ordered);
2832
          sbitmap_a_and_b (workset, successors, scc);
2833
        }
2834
    }
2835
  sbitmap_free (tmp);
2836
  sbitmap_free (workset);
2837
  sbitmap_free (zero_bitmap);
2838
  sbitmap_free (predecessors);
2839
  sbitmap_free (successors);
2840
  return pos;
2841
}
2842
 
2843
 
2844
/* This page contains functions for manipulating partial-schedules during
2845
   modulo scheduling.  */
2846
 
2847
/* Create a partial schedule and allocate a memory to hold II rows.  */
2848
 
2849
static partial_schedule_ptr
2850
create_partial_schedule (int ii, ddg_ptr g, int history)
2851
{
2852
  partial_schedule_ptr ps = XNEW (struct partial_schedule);
2853
  ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2854
  ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2855
  ps->reg_moves = NULL;
2856
  ps->ii = ii;
2857
  ps->history = history;
2858
  ps->min_cycle = INT_MAX;
2859
  ps->max_cycle = INT_MIN;
2860
  ps->g = g;
2861
 
2862
  return ps;
2863
}
2864
 
2865
/* Free the PS_INSNs in rows array of the given partial schedule.
2866
   ??? Consider caching the PS_INSN's.  */
2867
static void
2868
free_ps_insns (partial_schedule_ptr ps)
2869
{
2870
  int i;
2871
 
2872
  for (i = 0; i < ps->ii; i++)
2873
    {
2874
      while (ps->rows[i])
2875
        {
2876
          ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2877
 
2878
          free (ps->rows[i]);
2879
          ps->rows[i] = ps_insn;
2880
        }
2881
      ps->rows[i] = NULL;
2882
    }
2883
}
2884
 
2885
/* Free all the memory allocated to the partial schedule.  */
2886
 
2887
static void
2888
free_partial_schedule (partial_schedule_ptr ps)
2889
{
2890
  ps_reg_move_info *move;
2891
  unsigned int i;
2892
 
2893
  if (!ps)
2894
    return;
2895
 
2896
  FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
2897
    sbitmap_free (move->uses);
2898
  VEC_free (ps_reg_move_info, heap, ps->reg_moves);
2899
 
2900
  free_ps_insns (ps);
2901
  free (ps->rows);
2902
  free (ps->rows_length);
2903
  free (ps);
2904
}
2905
 
2906
/* Clear the rows array with its PS_INSNs, and create a new one with
2907
   NEW_II rows.  */
2908
 
2909
static void
2910
reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2911
{
2912
  if (!ps)
2913
    return;
2914
  free_ps_insns (ps);
2915
  if (new_ii == ps->ii)
2916
    return;
2917
  ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2918
                                                 * sizeof (ps_insn_ptr));
2919
  memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2920
  ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2921
  memset (ps->rows_length, 0, new_ii * sizeof (int));
2922
  ps->ii = new_ii;
2923
  ps->min_cycle = INT_MAX;
2924
  ps->max_cycle = INT_MIN;
2925
}
2926
 
2927
/* Prints the partial schedule as an ii rows array, for each rows
2928
   print the ids of the insns in it.  */
2929
void
2930
print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2931
{
2932
  int i;
2933
 
2934
  for (i = 0; i < ps->ii; i++)
2935
    {
2936
      ps_insn_ptr ps_i = ps->rows[i];
2937
 
2938
      fprintf (dump, "\n[ROW %d ]: ", i);
2939
      while (ps_i)
2940
        {
2941
          rtx insn = ps_rtl_insn (ps, ps_i->id);
2942
 
2943
          if (JUMP_P (insn))
2944
            fprintf (dump, "%d (branch), ", INSN_UID (insn));
2945
          else
2946
            fprintf (dump, "%d, ", INSN_UID (insn));
2947
 
2948
          ps_i = ps_i->next_in_row;
2949
        }
2950
    }
2951
}
2952
 
2953
/* Creates an object of PS_INSN and initializes it to the given parameters.  */
2954
static ps_insn_ptr
2955
create_ps_insn (int id, int cycle)
2956
{
2957
  ps_insn_ptr ps_i = XNEW (struct ps_insn);
2958
 
2959
  ps_i->id = id;
2960
  ps_i->next_in_row = NULL;
2961
  ps_i->prev_in_row = NULL;
2962
  ps_i->cycle = cycle;
2963
 
2964
  return ps_i;
2965
}
2966
 
2967
 
2968
/* Removes the given PS_INSN from the partial schedule.  */
2969
static void
2970
remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2971
{
2972
  int row;
2973
 
2974
  gcc_assert (ps && ps_i);
2975
 
2976
  row = SMODULO (ps_i->cycle, ps->ii);
2977
  if (! ps_i->prev_in_row)
2978
    {
2979
      gcc_assert (ps_i == ps->rows[row]);
2980
      ps->rows[row] = ps_i->next_in_row;
2981
      if (ps->rows[row])
2982
        ps->rows[row]->prev_in_row = NULL;
2983
    }
2984
  else
2985
    {
2986
      ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2987
      if (ps_i->next_in_row)
2988
        ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2989
    }
2990
 
2991
  ps->rows_length[row] -= 1;
2992
  free (ps_i);
2993
  return;
2994
}
2995
 
2996
/* Unlike what literature describes for modulo scheduling (which focuses
2997
   on VLIW machines) the order of the instructions inside a cycle is
2998
   important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2999
   where the current instruction should go relative to the already
3000
   scheduled instructions in the given cycle.  Go over these
3001
   instructions and find the first possible column to put it in.  */
3002
static bool
3003
ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3004
                     sbitmap must_precede, sbitmap must_follow)
3005
{
3006
  ps_insn_ptr next_ps_i;
3007
  ps_insn_ptr first_must_follow = NULL;
3008
  ps_insn_ptr last_must_precede = NULL;
3009
  ps_insn_ptr last_in_row = NULL;
3010
  int row;
3011
 
3012
  if (! ps_i)
3013
    return false;
3014
 
3015
  row = SMODULO (ps_i->cycle, ps->ii);
3016
 
3017
  /* Find the first must follow and the last must precede
3018
     and insert the node immediately after the must precede
3019
     but make sure that it there is no must follow after it.  */
3020
  for (next_ps_i = ps->rows[row];
3021
       next_ps_i;
3022
       next_ps_i = next_ps_i->next_in_row)
3023
    {
3024
      if (must_follow
3025
          && TEST_BIT (must_follow, next_ps_i->id)
3026
          && ! first_must_follow)
3027
        first_must_follow = next_ps_i;
3028
      if (must_precede && TEST_BIT (must_precede, next_ps_i->id))
3029
        {
3030
          /* If we have already met a node that must follow, then
3031
             there is no possible column.  */
3032
          if (first_must_follow)
3033
            return false;
3034
          else
3035
            last_must_precede = next_ps_i;
3036
        }
3037
      /* The closing branch must be the last in the row.  */
3038
      if (must_precede
3039
          && TEST_BIT (must_precede, next_ps_i->id)
3040
          && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3041
        return false;
3042
 
3043
       last_in_row = next_ps_i;
3044
    }
3045
 
3046
  /* The closing branch is scheduled as well.  Make sure there is no
3047
     dependent instruction after it as the branch should be the last
3048
     instruction in the row.  */
3049
  if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3050
    {
3051
      if (first_must_follow)
3052
        return false;
3053
      if (last_in_row)
3054
        {
3055
          /* Make the branch the last in the row.  New instructions
3056
             will be inserted at the beginning of the row or after the
3057
             last must_precede instruction thus the branch is guaranteed
3058
             to remain the last instruction in the row.  */
3059
          last_in_row->next_in_row = ps_i;
3060
          ps_i->prev_in_row = last_in_row;
3061
          ps_i->next_in_row = NULL;
3062
        }
3063
      else
3064
        ps->rows[row] = ps_i;
3065
      return true;
3066
    }
3067
 
3068
  /* Now insert the node after INSERT_AFTER_PSI.  */
3069
 
3070
  if (! last_must_precede)
3071
    {
3072
      ps_i->next_in_row = ps->rows[row];
3073
      ps_i->prev_in_row = NULL;
3074
      if (ps_i->next_in_row)
3075
        ps_i->next_in_row->prev_in_row = ps_i;
3076
      ps->rows[row] = ps_i;
3077
    }
3078
  else
3079
    {
3080
      ps_i->next_in_row = last_must_precede->next_in_row;
3081
      last_must_precede->next_in_row = ps_i;
3082
      ps_i->prev_in_row = last_must_precede;
3083
      if (ps_i->next_in_row)
3084
        ps_i->next_in_row->prev_in_row = ps_i;
3085
    }
3086
 
3087
  return true;
3088
}
3089
 
3090
/* Advances the PS_INSN one column in its current row; returns false
3091
   in failure and true in success.  Bit N is set in MUST_FOLLOW if
3092
   the node with cuid N must be come after the node pointed to by
3093
   PS_I when scheduled in the same cycle.  */
3094
static int
3095
ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3096
                        sbitmap must_follow)
3097
{
3098
  ps_insn_ptr prev, next;
3099
  int row;
3100
 
3101
  if (!ps || !ps_i)
3102
    return false;
3103
 
3104
  row = SMODULO (ps_i->cycle, ps->ii);
3105
 
3106
  if (! ps_i->next_in_row)
3107
    return false;
3108
 
3109
  /* Check if next_in_row is dependent on ps_i, both having same sched
3110
     times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
3111
  if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id))
3112
    return false;
3113
 
3114
  /* Advance PS_I over its next_in_row in the doubly linked list.  */
3115
  prev = ps_i->prev_in_row;
3116
  next = ps_i->next_in_row;
3117
 
3118
  if (ps_i == ps->rows[row])
3119
    ps->rows[row] = next;
3120
 
3121
  ps_i->next_in_row = next->next_in_row;
3122
 
3123
  if (next->next_in_row)
3124
    next->next_in_row->prev_in_row = ps_i;
3125
 
3126
  next->next_in_row = ps_i;
3127
  ps_i->prev_in_row = next;
3128
 
3129
  next->prev_in_row = prev;
3130
  if (prev)
3131
    prev->next_in_row = next;
3132
 
3133
  return true;
3134
}
3135
 
3136
/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3137
   Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
3138
   set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3139
   before/after (respectively) the node pointed to by PS_I when scheduled
3140
   in the same cycle.  */
3141
static ps_insn_ptr
3142
add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3143
                sbitmap must_precede, sbitmap must_follow)
3144
{
3145
  ps_insn_ptr ps_i;
3146
  int row = SMODULO (cycle, ps->ii);
3147
 
3148
  if (ps->rows_length[row] >= issue_rate)
3149
    return NULL;
3150
 
3151
  ps_i = create_ps_insn (id, cycle);
3152
 
3153
  /* Finds and inserts PS_I according to MUST_FOLLOW and
3154
     MUST_PRECEDE.  */
3155
  if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3156
    {
3157
      free (ps_i);
3158
      return NULL;
3159
    }
3160
 
3161
  ps->rows_length[row] += 1;
3162
  return ps_i;
3163
}
3164
 
3165
/* Advance time one cycle.  Assumes DFA is being used.  */
3166
static void
3167
advance_one_cycle (void)
3168
{
3169
  if (targetm.sched.dfa_pre_cycle_insn)
3170
    state_transition (curr_state,
3171
                      targetm.sched.dfa_pre_cycle_insn ());
3172
 
3173
  state_transition (curr_state, NULL);
3174
 
3175
  if (targetm.sched.dfa_post_cycle_insn)
3176
    state_transition (curr_state,
3177
                      targetm.sched.dfa_post_cycle_insn ());
3178
}
3179
 
3180
 
3181
 
3182
/* Checks if PS has resource conflicts according to DFA, starting from
3183
   FROM cycle to TO cycle; returns true if there are conflicts and false
3184
   if there are no conflicts.  Assumes DFA is being used.  */
3185
static int
3186
ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3187
{
3188
  int cycle;
3189
 
3190
  state_reset (curr_state);
3191
 
3192
  for (cycle = from; cycle <= to; cycle++)
3193
    {
3194
      ps_insn_ptr crr_insn;
3195
      /* Holds the remaining issue slots in the current row.  */
3196
      int can_issue_more = issue_rate;
3197
 
3198
      /* Walk through the DFA for the current row.  */
3199
      for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3200
           crr_insn;
3201
           crr_insn = crr_insn->next_in_row)
3202
        {
3203
          rtx insn = ps_rtl_insn (ps, crr_insn->id);
3204
 
3205
          if (!NONDEBUG_INSN_P (insn))
3206
            continue;
3207
 
3208
          /* Check if there is room for the current insn.  */
3209
          if (!can_issue_more || state_dead_lock_p (curr_state))
3210
            return true;
3211
 
3212
          /* Update the DFA state and return with failure if the DFA found
3213
             resource conflicts.  */
3214
          if (state_transition (curr_state, insn) >= 0)
3215
            return true;
3216
 
3217
          if (targetm.sched.variable_issue)
3218
            can_issue_more =
3219
              targetm.sched.variable_issue (sched_dump, sched_verbose,
3220
                                            insn, can_issue_more);
3221
          /* A naked CLOBBER or USE generates no instruction, so don't
3222
             let them consume issue slots.  */
3223
          else if (GET_CODE (PATTERN (insn)) != USE
3224
                   && GET_CODE (PATTERN (insn)) != CLOBBER)
3225
            can_issue_more--;
3226
        }
3227
 
3228
      /* Advance the DFA to the next cycle.  */
3229
      advance_one_cycle ();
3230
    }
3231
  return false;
3232
}
3233
 
3234
/* Checks if the given node causes resource conflicts when added to PS at
3235
   cycle C.  If not the node is added to PS and returned; otherwise zero
3236
   is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3237
   cuid N must be come before/after (respectively) the node pointed to by
3238
   PS_I when scheduled in the same cycle.  */
3239
ps_insn_ptr
3240
ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3241
                             int c, sbitmap must_precede,
3242
                             sbitmap must_follow)
3243
{
3244
  int has_conflicts = 0;
3245
  ps_insn_ptr ps_i;
3246
 
3247
  /* First add the node to the PS, if this succeeds check for
3248
     conflicts, trying different issue slots in the same row.  */
3249
  if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3250
    return NULL; /* Failed to insert the node at the given cycle.  */
3251
 
3252
  has_conflicts = ps_has_conflicts (ps, c, c)
3253
                  || (ps->history > 0
3254
                      && ps_has_conflicts (ps,
3255
                                           c - ps->history,
3256
                                           c + ps->history));
3257
 
3258
  /* Try different issue slots to find one that the given node can be
3259
     scheduled in without conflicts.  */
3260
  while (has_conflicts)
3261
    {
3262
      if (! ps_insn_advance_column (ps, ps_i, must_follow))
3263
        break;
3264
      has_conflicts = ps_has_conflicts (ps, c, c)
3265
                      || (ps->history > 0
3266
                          && ps_has_conflicts (ps,
3267
                                               c - ps->history,
3268
                                               c + ps->history));
3269
    }
3270
 
3271
  if (has_conflicts)
3272
    {
3273
      remove_node_from_ps (ps, ps_i);
3274
      return NULL;
3275
    }
3276
 
3277
  ps->min_cycle = MIN (ps->min_cycle, c);
3278
  ps->max_cycle = MAX (ps->max_cycle, c);
3279
  return ps_i;
3280
}
3281
 
3282
/* Calculate the stage count of the partial schedule PS.  The calculation
3283
   takes into account the rotation amount passed in ROTATION_AMOUNT.  */
3284
int
3285
calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3286
{
3287
  int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3288
  int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3289
  int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3290
 
3291
  /* The calculation of stage count is done adding the number of stages
3292
     before cycle zero and after cycle zero.  */
3293
  stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3294
 
3295
  return stage_count;
3296
}
3297
 
3298
/* Rotate the rows of PS such that insns scheduled at time
3299
   START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
3300
void
3301
rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3302
{
3303
  int i, row, backward_rotates;
3304
  int last_row = ps->ii - 1;
3305
 
3306
  if (start_cycle == 0)
3307
    return;
3308
 
3309
  backward_rotates = SMODULO (start_cycle, ps->ii);
3310
 
3311
  /* Revisit later and optimize this into a single loop.  */
3312
  for (i = 0; i < backward_rotates; i++)
3313
    {
3314
      ps_insn_ptr first_row = ps->rows[0];
3315
      int first_row_length = ps->rows_length[0];
3316
 
3317
      for (row = 0; row < last_row; row++)
3318
        {
3319
          ps->rows[row] = ps->rows[row + 1];
3320
          ps->rows_length[row] = ps->rows_length[row + 1];
3321
        }
3322
 
3323
      ps->rows[last_row] = first_row;
3324
      ps->rows_length[last_row] = first_row_length;
3325
    }
3326
 
3327
  ps->max_cycle -= start_cycle;
3328
  ps->min_cycle -= start_cycle;
3329
}
3330
 
3331
#endif /* INSN_SCHEDULING */
3332
 
3333
static bool
3334
gate_handle_sms (void)
3335
{
3336
  return (optimize > 0 && flag_modulo_sched);
3337
}
3338
 
3339
 
3340
/* Run instruction scheduler.  */
3341
/* Perform SMS module scheduling.  */
3342
static unsigned int
3343
rest_of_handle_sms (void)
3344
{
3345
#ifdef INSN_SCHEDULING
3346
  basic_block bb;
3347
 
3348
  /* Collect loop information to be used in SMS.  */
3349
  cfg_layout_initialize (0);
3350
  sms_schedule ();
3351
 
3352
  /* Update the life information, because we add pseudos.  */
3353
  max_regno = max_reg_num ();
3354
 
3355
  /* Finalize layout changes.  */
3356
  FOR_EACH_BB (bb)
3357
    if (bb->next_bb != EXIT_BLOCK_PTR)
3358
      bb->aux = bb->next_bb;
3359
  free_dominance_info (CDI_DOMINATORS);
3360
  cfg_layout_finalize ();
3361
#endif /* INSN_SCHEDULING */
3362
  return 0;
3363
}
3364
 
3365
struct rtl_opt_pass pass_sms =
3366
{
3367
 {
3368
  RTL_PASS,
3369
  "sms",                                /* name */
3370
  gate_handle_sms,                      /* gate */
3371
  rest_of_handle_sms,                   /* execute */
3372
  NULL,                                 /* sub */
3373
  NULL,                                 /* next */
3374
  0,                                    /* static_pass_number */
3375
  TV_SMS,                               /* tv_id */
3376
  0,                                    /* properties_required */
3377
  0,                                    /* properties_provided */
3378
  0,                                    /* properties_destroyed */
3379
  0,                                    /* todo_flags_start */
3380
  TODO_df_finish
3381
    | TODO_verify_flow
3382
    | TODO_verify_rtl_sharing
3383
    | TODO_ggc_collect                  /* todo_flags_finish */
3384
 }
3385
};

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.