OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [modulo-sched.c] - Blame information for rev 20

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
/* Swing Modulo Scheduling implementation.
2
   Copyright (C) 2004, 2005
3
   Free Software Foundation, Inc.
4
   Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
5
 
6
This file is part of GCC.
7
 
8
GCC is free software; you can redistribute it and/or modify it under
9
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 2, or (at your option) any later
11
version.
12
 
13
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14
WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16
for more details.
17
 
18
You should have received a copy of the GNU General Public License
19
along with GCC; see the file COPYING.  If not, write to the Free
20
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21
02110-1301, USA.  */
22
 
23
 
24
#include "config.h"
25
#include "system.h"
26
#include "coretypes.h"
27
#include "tm.h"
28
#include "toplev.h"
29
#include "rtl.h"
30
#include "tm_p.h"
31
#include "hard-reg-set.h"
32
#include "regs.h"
33
#include "function.h"
34
#include "flags.h"
35
#include "insn-config.h"
36
#include "insn-attr.h"
37
#include "except.h"
38
#include "toplev.h"
39
#include "recog.h"
40
#include "sched-int.h"
41
#include "target.h"
42
#include "cfglayout.h"
43
#include "cfgloop.h"
44
#include "cfghooks.h"
45
#include "expr.h"
46
#include "params.h"
47
#include "gcov-io.h"
48
#include "df.h"
49
#include "ddg.h"
50
#include "timevar.h"
51
#include "tree-pass.h"
52
 
53
#ifdef INSN_SCHEDULING
54
 
55
/* This file contains the implementation of the Swing Modulo Scheduler,
56
   described in the following references:
57
   [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
58
       Lifetime--sensitive modulo scheduling in a production environment.
59
       IEEE Trans. on Comps., 50(3), March 2001
60
   [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
61
       Swing Modulo Scheduling: A Lifetime Sensitive Approach.
62
       PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
63
 
64
   The basic structure is:
65
   1. Build a data-dependence graph (DDG) for each loop.
66
   2. Use the DDG to order the insns of a loop (not in topological order
67
      necessarily, but rather) trying to place each insn after all its
68
      predecessors _or_ after all its successors.
69
   3. Compute MII: a lower bound on the number of cycles to schedule the loop.
70
   4. Use the ordering to perform list-scheduling of the loop:
71
      1. Set II = MII.  We will try to schedule the loop within II cycles.
72
      2. Try to schedule the insns one by one according to the ordering.
73
         For each insn compute an interval of cycles by considering already-
74
         scheduled preds and succs (and associated latencies); try to place
75
         the insn in the cycles of this window checking for potential
76
         resource conflicts (using the DFA interface).
77
         Note: this is different from the cycle-scheduling of schedule_insns;
78
         here the insns are not scheduled monotonically top-down (nor bottom-
79
         up).
80
      3. If failed in scheduling all insns - bump II++ and try again, unless
81
         II reaches an upper bound MaxII, in which case report failure.
82
   5. If we succeeded in scheduling the loop within II cycles, we now
83
      generate prolog and epilog, decrease the counter of the loop, and
84
      perform modulo variable expansion for live ranges that span more than
85
      II cycles (i.e. use register copies to prevent a def from overwriting
86
      itself before reaching the use).
87
*/
88
 
89
 
90
/* This page defines partial-schedule structures and functions for
91
   modulo scheduling.  */
92
 
93
typedef struct partial_schedule *partial_schedule_ptr;
94
typedef struct ps_insn *ps_insn_ptr;
95
 
96
/* The minimum (absolute) cycle that a node of ps was scheduled in.  */
97
#define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
98
 
99
/* The maximum (absolute) cycle that a node of ps was scheduled in.  */
100
#define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
101
 
102
/* Perform signed modulo, always returning a non-negative value.  */
103
#define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
104
 
105
/* The number of different iterations the nodes in ps span, assuming
106
   the stage boundaries are placed efficiently.  */
107
#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
108
                             + 1 + (ps)->ii - 1) / (ps)->ii)
109
 
110
/* A single instruction in the partial schedule.  */
111
struct ps_insn
112
{
113
  /* The corresponding DDG_NODE.  */
114
  ddg_node_ptr node;
115
 
116
  /* The (absolute) cycle in which the PS instruction is scheduled.
117
     Same as SCHED_TIME (node).  */
118
  int cycle;
119
 
120
  /* The next/prev PS_INSN in the same row.  */
121
  ps_insn_ptr next_in_row,
122
              prev_in_row;
123
 
124
  /* The number of nodes in the same row that come after this node.  */
125
  int row_rest_count;
126
};
127
 
128
/* Holds the partial schedule as an array of II rows.  Each entry of the
129
   array points to a linked list of PS_INSNs, which represents the
130
   instructions that are scheduled for that row.  */
131
struct partial_schedule
132
{
133
  int ii;       /* Number of rows in the partial schedule.  */
134
  int history;  /* Threshold for conflict checking using DFA.  */
135
 
136
  /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
137
  ps_insn_ptr *rows;
138
 
139
  /* The earliest absolute cycle of an insn in the partial schedule.  */
140
  int min_cycle;
141
 
142
  /* The latest absolute cycle of an insn in the partial schedule.  */
143
  int max_cycle;
144
 
145
  ddg_ptr g;    /* The DDG of the insns in the partial schedule.  */
146
};
147
 
148
/* We use this to record all the register replacements we do in
149
   the kernel so we can undo SMS if it is not profitable.  */
150
struct undo_replace_buff_elem
151
{
152
  rtx insn;
153
  rtx orig_reg;
154
  rtx new_reg;
155
  struct undo_replace_buff_elem *next;
156
};
157
 
158
 
159
 
160
partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
161
void free_partial_schedule (partial_schedule_ptr);
162
void reset_partial_schedule (partial_schedule_ptr, int new_ii);
163
void print_partial_schedule (partial_schedule_ptr, FILE *);
164
static int kernel_number_of_cycles (rtx first_insn, rtx last_insn);
165
static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
166
                                                ddg_node_ptr node, int cycle,
167
                                                sbitmap must_precede,
168
                                                sbitmap must_follow);
169
static void rotate_partial_schedule (partial_schedule_ptr, int);
170
void set_row_column_for_ps (partial_schedule_ptr);
171
static bool ps_unschedule_node (partial_schedule_ptr, ddg_node_ptr );
172
 
173
 
174
/* This page defines constants and structures for the modulo scheduling
175
   driver.  */
176
 
177
/* As in haifa-sched.c:  */
178
/* issue_rate is the number of insns that can be scheduled in the same
179
   machine cycle.  It can be defined in the config/mach/mach.h file,
180
   otherwise we set it to 1.  */
181
 
182
static int issue_rate;
183
 
184
/* For printing statistics.  */
185
static FILE *stats_file;
186
 
187
static int sms_order_nodes (ddg_ptr, int, int * result);
188
static void set_node_sched_params (ddg_ptr);
189
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int,
190
                                                   int *, FILE*);
191
static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
192
static void generate_prolog_epilog (partial_schedule_ptr ,struct loop * loop, rtx);
193
static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
194
                                       int from_stage, int to_stage,
195
                                       int is_prolog);
196
 
197
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
198
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
199
#define SCHED_FIRST_REG_MOVE(x) \
200
        (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
201
#define SCHED_NREG_MOVES(x) \
202
        (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
203
#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
204
#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
205
#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
206
 
207
/* The scheduling parameters held for each node.  */
208
typedef struct node_sched_params
209
{
210
  int asap;     /* A lower-bound on the absolute scheduling cycle.  */
211
  int time;     /* The absolute scheduling cycle (time >= asap).  */
212
 
213
  /* The following field (first_reg_move) is a pointer to the first
214
     register-move instruction added to handle the modulo-variable-expansion
215
     of the register defined by this node.  This register-move copies the
216
     original register defined by the node.  */
217
  rtx first_reg_move;
218
 
219
  /* The number of register-move instructions added, immediately preceding
220
     first_reg_move.  */
221
  int nreg_moves;
222
 
223
  int row;    /* Holds time % ii.  */
224
  int stage;  /* Holds time / ii.  */
225
 
226
  /* The column of a node inside the ps.  If nodes u, v are on the same row,
227
     u will precede v if column (u) < column (v).  */
228
  int column;
229
} *node_sched_params_ptr;
230
 
231
 
232
/* The following three functions are copied from the current scheduler
233
   code in order to use sched_analyze() for computing the dependencies.
234
   They are used when initializing the sched_info structure.  */
235
static const char *
236
sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
237
{
238
  static char tmp[80];
239
 
240
  sprintf (tmp, "i%4d", INSN_UID (insn));
241
  return tmp;
242
}
243
 
244
static int
245
contributes_to_priority (rtx next, rtx insn)
246
{
247
  return BLOCK_NUM (next) == BLOCK_NUM (insn);
248
}
249
 
250
static void
251
compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
252
                               regset cond_exec ATTRIBUTE_UNUSED,
253
                               regset used ATTRIBUTE_UNUSED,
254
                               regset set ATTRIBUTE_UNUSED)
255
{
256
}
257
 
258
static struct sched_info sms_sched_info =
259
{
260
  NULL,
261
  NULL,
262
  NULL,
263
  NULL,
264
  NULL,
265
  sms_print_insn,
266
  contributes_to_priority,
267
  compute_jump_reg_dependencies,
268
  NULL, NULL,
269
  NULL, NULL,
270
  0, 0, 0
271
};
272
 
273
 
274
/* Return the register decremented and tested in INSN,
275
   or zero if it is not a decrement-and-branch insn.  */
276
 
277
static rtx
278
doloop_register_get (rtx insn ATTRIBUTE_UNUSED)
279
{
280
#ifdef HAVE_doloop_end
281
  rtx pattern, reg, condition;
282
 
283
  if (! JUMP_P (insn))
284
    return NULL_RTX;
285
 
286
  pattern = PATTERN (insn);
287
  condition = doloop_condition_get (pattern);
288
  if (! condition)
289
    return NULL_RTX;
290
 
291
  if (REG_P (XEXP (condition, 0)))
292
    reg = XEXP (condition, 0);
293
  else if (GET_CODE (XEXP (condition, 0)) == PLUS
294
           && REG_P (XEXP (XEXP (condition, 0), 0)))
295
    reg = XEXP (XEXP (condition, 0), 0);
296
  else
297
    gcc_unreachable ();
298
 
299
  return reg;
300
#else
301
  return NULL_RTX;
302
#endif
303
}
304
 
305
/* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
306
   that the number of iterations is a compile-time constant.  If so,
307
   return the rtx that sets COUNT_REG to a constant, and set COUNT to
308
   this constant.  Otherwise return 0.  */
309
static rtx
310
const_iteration_count (rtx count_reg, basic_block pre_header,
311
                       HOST_WIDEST_INT * count)
312
{
313
  rtx insn;
314
  rtx head, tail;
315
 
316
  if (! pre_header)
317
    return NULL_RTX;
318
 
319
  get_block_head_tail (pre_header->index, &head, &tail);
320
 
321
  for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
322
    if (INSN_P (insn) && single_set (insn) &&
323
        rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
324
      {
325
        rtx pat = single_set (insn);
326
 
327
        if (GET_CODE (SET_SRC (pat)) == CONST_INT)
328
          {
329
            *count = INTVAL (SET_SRC (pat));
330
            return insn;
331
          }
332
 
333
        return NULL_RTX;
334
      }
335
 
336
  return NULL_RTX;
337
}
338
 
339
/* A very simple resource-based lower bound on the initiation interval.
340
   ??? Improve the accuracy of this bound by considering the
341
   utilization of various units.  */
342
static int
343
res_MII (ddg_ptr g)
344
{
345
  return (g->num_nodes / issue_rate);
346
}
347
 
348
 
349
/* Points to the array that contains the sched data for each node.  */
350
static node_sched_params_ptr node_sched_params;
351
 
352
/* Allocate sched_params for each node and initialize it.  Assumes that
353
   the aux field of each node contain the asap bound (computed earlier),
354
   and copies it into the sched_params field.  */
355
static void
356
set_node_sched_params (ddg_ptr g)
357
{
358
  int i;
359
 
360
  /* Allocate for each node in the DDG a place to hold the "sched_data".  */
361
  /* Initialize ASAP/ALAP/HIGHT to zero.  */
362
  node_sched_params = (node_sched_params_ptr)
363
                       xcalloc (g->num_nodes,
364
                                sizeof (struct node_sched_params));
365
 
366
  /* Set the pointer of the general data of the node to point to the
367
     appropriate sched_params structure.  */
368
  for (i = 0; i < g->num_nodes; i++)
369
    {
370
      /* Watch out for aliasing problems?  */
371
      node_sched_params[i].asap = g->nodes[i].aux.count;
372
      g->nodes[i].aux.info = &node_sched_params[i];
373
    }
374
}
375
 
376
static void
377
print_node_sched_params (FILE * dump_file, int num_nodes)
378
{
379
  int i;
380
 
381
  if (! dump_file)
382
    return;
383
  for (i = 0; i < num_nodes; i++)
384
    {
385
      node_sched_params_ptr nsp = &node_sched_params[i];
386
      rtx reg_move = nsp->first_reg_move;
387
      int j;
388
 
389
      fprintf (dump_file, "Node %d:\n", i);
390
      fprintf (dump_file, " asap = %d:\n", nsp->asap);
391
      fprintf (dump_file, " time = %d:\n", nsp->time);
392
      fprintf (dump_file, " nreg_moves = %d:\n", nsp->nreg_moves);
393
      for (j = 0; j < nsp->nreg_moves; j++)
394
        {
395
          fprintf (dump_file, " reg_move = ");
396
          print_rtl_single (dump_file, reg_move);
397
          reg_move = PREV_INSN (reg_move);
398
        }
399
    }
400
}
401
 
402
/* Calculate an upper bound for II.  SMS should not schedule the loop if it
403
   requires more cycles than this bound.  Currently set to the sum of the
404
   longest latency edge for each node.  Reset based on experiments.  */
405
static int
406
calculate_maxii (ddg_ptr g)
407
{
408
  int i;
409
  int maxii = 0;
410
 
411
  for (i = 0; i < g->num_nodes; i++)
412
    {
413
      ddg_node_ptr u = &g->nodes[i];
414
      ddg_edge_ptr e;
415
      int max_edge_latency = 0;
416
 
417
      for (e = u->out; e; e = e->next_out)
418
        max_edge_latency = MAX (max_edge_latency, e->latency);
419
 
420
      maxii += max_edge_latency;
421
    }
422
  return maxii;
423
}
424
 
425
/*
426
   Breaking intra-loop register anti-dependences:
427
   Each intra-loop register anti-dependence implies a cross-iteration true
428
   dependence of distance 1. Therefore, we can remove such false dependencies
429
   and figure out if the partial schedule broke them by checking if (for a
430
   true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
431
   if so generate a register move.   The number of such moves is equal to:
432
              SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
433
   nreg_moves = ----------------------------------- + 1 - {   dependence.
434
                            ii                          { 1 if not.
435
*/
436
static struct undo_replace_buff_elem *
437
generate_reg_moves (partial_schedule_ptr ps)
438
{
439
  ddg_ptr g = ps->g;
440
  int ii = ps->ii;
441
  int i;
442
  struct undo_replace_buff_elem *reg_move_replaces = NULL;
443
 
444
  for (i = 0; i < g->num_nodes; i++)
445
    {
446
      ddg_node_ptr u = &g->nodes[i];
447
      ddg_edge_ptr e;
448
      int nreg_moves = 0, i_reg_move;
449
      sbitmap *uses_of_defs;
450
      rtx last_reg_move;
451
      rtx prev_reg, old_reg;
452
 
453
      /* Compute the number of reg_moves needed for u, by looking at life
454
         ranges started at u (excluding self-loops).  */
455
      for (e = u->out; e; e = e->next_out)
456
        if (e->type == TRUE_DEP && e->dest != e->src)
457
          {
458
            int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
459
 
460
            if (e->distance == 1)
461
              nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
462
 
463
            /* If dest precedes src in the schedule of the kernel, then dest
464
               will read before src writes and we can save one reg_copy.  */
465
            if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
466
                && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
467
              nreg_moves4e--;
468
 
469
            nreg_moves = MAX (nreg_moves, nreg_moves4e);
470
          }
471
 
472
      if (nreg_moves == 0)
473
        continue;
474
 
475
      /* Every use of the register defined by node may require a different
476
         copy of this register, depending on the time the use is scheduled.
477
         Set a bitmap vector, telling which nodes use each copy of this
478
         register.  */
479
      uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
480
      sbitmap_vector_zero (uses_of_defs, nreg_moves);
481
      for (e = u->out; e; e = e->next_out)
482
        if (e->type == TRUE_DEP && e->dest != e->src)
483
          {
484
            int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
485
 
486
            if (e->distance == 1)
487
              dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
488
 
489
            if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
490
                && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
491
              dest_copy--;
492
 
493
            if (dest_copy)
494
              SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
495
          }
496
 
497
      /* Now generate the reg_moves, attaching relevant uses to them.  */
498
      SCHED_NREG_MOVES (u) = nreg_moves;
499
      old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
500
      last_reg_move = u->insn;
501
 
502
      for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
503
        {
504
          unsigned int i_use = 0;
505
          rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
506
          rtx reg_move = gen_move_insn (new_reg, prev_reg);
507
          sbitmap_iterator sbi;
508
 
509
          add_insn_before (reg_move, last_reg_move);
510
          last_reg_move = reg_move;
511
 
512
          if (!SCHED_FIRST_REG_MOVE (u))
513
            SCHED_FIRST_REG_MOVE (u) = reg_move;
514
 
515
          EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
516
            {
517
              struct undo_replace_buff_elem *rep;
518
 
519
              rep = (struct undo_replace_buff_elem *)
520
                    xcalloc (1, sizeof (struct undo_replace_buff_elem));
521
              rep->insn = g->nodes[i_use].insn;
522
              rep->orig_reg = old_reg;
523
              rep->new_reg = new_reg;
524
 
525
              if (! reg_move_replaces)
526
                reg_move_replaces = rep;
527
              else
528
                {
529
                  rep->next = reg_move_replaces;
530
                  reg_move_replaces = rep;
531
                }
532
 
533
              replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
534
            }
535
 
536
          prev_reg = new_reg;
537
        }
538
    }
539
  return reg_move_replaces;
540
}
541
 
542
/* We call this when we want to undo the SMS schedule for a given loop.
543
   One of the things that we do is to delete the register moves generated
544
   for the sake of SMS; this function deletes the register move instructions
545
   recorded in the undo buffer.  */
546
static void
547
undo_generate_reg_moves (partial_schedule_ptr ps,
548
                         struct undo_replace_buff_elem *reg_move_replaces)
549
{
550
  int i,j;
551
 
552
  for (i = 0; i < ps->g->num_nodes; i++)
553
    {
554
      ddg_node_ptr u = &ps->g->nodes[i];
555
      rtx prev;
556
      rtx crr = SCHED_FIRST_REG_MOVE (u);
557
 
558
      for (j = 0; j < SCHED_NREG_MOVES (u); j++)
559
        {
560
          prev = PREV_INSN (crr);
561
          delete_insn (crr);
562
          crr = prev;
563
        }
564
      SCHED_FIRST_REG_MOVE (u) = NULL_RTX;
565
    }
566
 
567
  while (reg_move_replaces)
568
    {
569
      struct undo_replace_buff_elem *rep = reg_move_replaces;
570
 
571
      reg_move_replaces = reg_move_replaces->next;
572
      replace_rtx (rep->insn, rep->new_reg, rep->orig_reg);
573
    }
574
}
575
 
576
/* Free memory allocated for the undo buffer.  */
577
static void
578
free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
579
{
580
 
581
  while (reg_move_replaces)
582
    {
583
      struct undo_replace_buff_elem *rep = reg_move_replaces;
584
 
585
      reg_move_replaces = reg_move_replaces->next;
586
      free (rep);
587
    }
588
}
589
 
590
/* Bump the SCHED_TIMEs of all nodes to start from zero.  Set the values
591
   of SCHED_ROW and SCHED_STAGE.  */
592
static void
593
normalize_sched_times (partial_schedule_ptr ps)
594
{
595
  int i;
596
  ddg_ptr g = ps->g;
597
  int amount = PS_MIN_CYCLE (ps);
598
  int ii = ps->ii;
599
 
600
  /* Don't include the closing branch assuming that it is the last node.  */
601
  for (i = 0; i < g->num_nodes - 1; i++)
602
    {
603
      ddg_node_ptr u = &g->nodes[i];
604
      int normalized_time = SCHED_TIME (u) - amount;
605
 
606
      gcc_assert (normalized_time >= 0);
607
 
608
      SCHED_TIME (u) = normalized_time;
609
      SCHED_ROW (u) = normalized_time % ii;
610
      SCHED_STAGE (u) = normalized_time / ii;
611
    }
612
}
613
 
614
/* Set SCHED_COLUMN of each node according to its position in PS.  */
615
static void
616
set_columns_for_ps (partial_schedule_ptr ps)
617
{
618
  int row;
619
 
620
  for (row = 0; row < ps->ii; row++)
621
    {
622
      ps_insn_ptr cur_insn = ps->rows[row];
623
      int column = 0;
624
 
625
      for (; cur_insn; cur_insn = cur_insn->next_in_row)
626
        SCHED_COLUMN (cur_insn->node) = column++;
627
    }
628
}
629
 
630
/* Permute the insns according to their order in PS, from row 0 to
631
   row ii-1, and position them right before LAST.  This schedules
632
   the insns of the loop kernel.  */
633
static void
634
permute_partial_schedule (partial_schedule_ptr ps, rtx last)
635
{
636
  int ii = ps->ii;
637
  int row;
638
  ps_insn_ptr ps_ij;
639
 
640
  for (row = 0; row < ii ; row++)
641
    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
642
      if (PREV_INSN (last) != ps_ij->node->insn)
643
        reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
644
                            PREV_INSN (last));
645
}
646
 
647
/* As part of undoing SMS we return to the original ordering of the
648
   instructions inside the loop kernel.  Given the partial schedule PS, this
649
   function returns the ordering of the instruction according to their CUID
650
   in the DDG (PS->G), which is the original order of the instruction before
651
   performing SMS.  */
652
static void
653
undo_permute_partial_schedule (partial_schedule_ptr ps, rtx last)
654
{
655
  int i;
656
 
657
  for (i = 0 ; i < ps->g->num_nodes; i++)
658
    if (last == ps->g->nodes[i].insn
659
        || last == ps->g->nodes[i].first_note)
660
      break;
661
    else if (PREV_INSN (last) != ps->g->nodes[i].insn)
662
      reorder_insns_nobb (ps->g->nodes[i].first_note, ps->g->nodes[i].insn,
663
                          PREV_INSN (last));
664
}
665
 
666
/* Used to generate the prologue & epilogue.  Duplicate the subset of
667
   nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive
668
   of both), together with a prefix/suffix of their reg_moves.  */
669
static void
670
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
671
                           int to_stage, int for_prolog)
672
{
673
  int row;
674
  ps_insn_ptr ps_ij;
675
 
676
  for (row = 0; row < ps->ii; row++)
677
    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
678
      {
679
        ddg_node_ptr u_node = ps_ij->node;
680
        int j, i_reg_moves;
681
        rtx reg_move = NULL_RTX;
682
 
683
        if (for_prolog)
684
          {
685
            /* SCHED_STAGE (u_node) >= from_stage == 0.  Generate increasing
686
               number of reg_moves starting with the second occurrence of
687
               u_node, which is generated if its SCHED_STAGE <= to_stage.  */
688
            i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
689
            i_reg_moves = MAX (i_reg_moves, 0);
690
            i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
691
 
692
            /* The reg_moves start from the *first* reg_move backwards.  */
693
            if (i_reg_moves)
694
              {
695
                reg_move = SCHED_FIRST_REG_MOVE (u_node);
696
                for (j = 1; j < i_reg_moves; j++)
697
                  reg_move = PREV_INSN (reg_move);
698
              }
699
          }
700
        else /* It's for the epilog.  */
701
          {
702
            /* SCHED_STAGE (u_node) <= to_stage.  Generate all reg_moves,
703
               starting to decrease one stage after u_node no longer occurs;
704
               that is, generate all reg_moves until
705
               SCHED_STAGE (u_node) == from_stage - 1.  */
706
            i_reg_moves = SCHED_NREG_MOVES (u_node)
707
                       - (from_stage - SCHED_STAGE (u_node) - 1);
708
            i_reg_moves = MAX (i_reg_moves, 0);
709
            i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
710
 
711
            /* The reg_moves start from the *last* reg_move forwards.  */
712
            if (i_reg_moves)
713
              {
714
                reg_move = SCHED_FIRST_REG_MOVE (u_node);
715
                for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
716
                  reg_move = PREV_INSN (reg_move);
717
              }
718
          }
719
 
720
        for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
721
          emit_insn (copy_rtx (PATTERN (reg_move)));
722
        if (SCHED_STAGE (u_node) >= from_stage
723
            && SCHED_STAGE (u_node) <= to_stage)
724
          duplicate_insn_chain (u_node->first_note, u_node->insn);
725
      }
726
}
727
 
728
 
729
/* Generate the instructions (including reg_moves) for prolog & epilog.  */
730
static void
731
generate_prolog_epilog (partial_schedule_ptr ps, struct loop * loop, rtx count_reg)
732
{
733
  int i;
734
  int last_stage = PS_STAGE_COUNT (ps) - 1;
735
  edge e;
736
 
737
  /* Generate the prolog, inserting its insns on the loop-entry edge.  */
738
  start_sequence ();
739
 
740
  if (count_reg)
741
   /* Generate a subtract instruction at the beginning of the prolog to
742
      adjust the loop count by STAGE_COUNT.  */
743
   emit_insn (gen_sub2_insn (count_reg, GEN_INT (last_stage)));
744
 
745
  for (i = 0; i < last_stage; i++)
746
    duplicate_insns_of_cycles (ps, 0, i, 1);
747
 
748
  /* Put the prolog ,  on the one and only entry edge.  */
749
  e = loop_preheader_edge (loop);
750
  loop_split_edge_with(e , get_insns());
751
 
752
  end_sequence ();
753
 
754
  /* Generate the epilog, inserting its insns on the loop-exit edge.  */
755
  start_sequence ();
756
 
757
  for (i = 0; i < last_stage; i++)
758
    duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
759
 
760
  /* Put the epilogue on the one and only one exit edge.  */
761
  gcc_assert (loop->single_exit);
762
  e = loop->single_exit;
763
  loop_split_edge_with(e , get_insns());
764
  end_sequence ();
765
}
766
 
767
/* Return the line note insn preceding INSN, for debugging.  Taken from
768
   emit-rtl.c.  */
769
static rtx
770
find_line_note (rtx insn)
771
{
772
  for (; insn; insn = PREV_INSN (insn))
773
    if (NOTE_P (insn)
774
        && NOTE_LINE_NUMBER (insn) >= 0)
775
      break;
776
 
777
  return insn;
778
}
779
 
780
/* Return true if all the BBs of the loop are empty except the
781
   loop header.  */
782
static bool
783
loop_single_full_bb_p (struct loop *loop)
784
{
785
  unsigned i;
786
  basic_block *bbs = get_loop_body (loop);
787
 
788
  for (i = 0; i < loop->num_nodes ; i++)
789
    {
790
      rtx head, tail;
791
      bool empty_bb = true;
792
 
793
      if (bbs[i] == loop->header)
794
        continue;
795
 
796
      /* Make sure that basic blocks other than the header
797
         have only notes labels or jumps.  */
798
      get_block_head_tail (bbs[i]->index, &head, &tail);
799
      for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
800
        {
801
          if (NOTE_P (head) || LABEL_P (head)
802
              || (INSN_P (head) && JUMP_P (head)))
803
            continue;
804
          empty_bb = false;
805
          break;
806
        }
807
 
808
      if (! empty_bb)
809
        {
810
          free (bbs);
811
          return false;
812
        }
813
    }
814
  free (bbs);
815
  return true;
816
}
817
 
818
/* A simple loop from SMS point of view; it is a loop that is composed of
819
   either a single basic block or two BBs - a header and a latch.  */
820
#define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 )                     \
821
                                  && (EDGE_COUNT (loop->latch->preds) == 1) \
822
                                  && (EDGE_COUNT (loop->latch->succs) == 1))
823
 
824
/* Return true if the loop is in its canonical form and false if not.
825
   i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
826
static bool
827
loop_canon_p (struct loop *loop, FILE *dump_file)
828
{
829
 
830
  if (loop->inner || ! loop->outer)
831
    return false;
832
 
833
  if (!loop->single_exit)
834
    {
835
      if (dump_file)
836
        {
837
          rtx line_note = find_line_note (BB_END (loop->header));
838
 
839
          fprintf (dump_file, "SMS loop many exits ");
840
          if (line_note)
841
            {
842
              expanded_location xloc;
843
              NOTE_EXPANDED_LOCATION (xloc, line_note);
844
              fprintf (stats_file, " %s %d (file, line)\n",
845
                       xloc.file, xloc.line);
846
            }
847
        }
848
      return false;
849
    }
850
 
851
  if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
852
    {
853
      if (dump_file)
854
        {
855
          rtx line_note = find_line_note (BB_END (loop->header));
856
 
857
          fprintf (dump_file, "SMS loop many BBs. ");
858
          if (line_note)
859
            {
860
              expanded_location xloc;
861
              NOTE_EXPANDED_LOCATION (xloc, line_note);
862
              fprintf (stats_file, " %s %d (file, line)\n",
863
                       xloc.file, xloc.line);
864
            }
865
        }
866
      return false;
867
    }
868
 
869
    return true;
870
}
871
 
872
/* If there are more than one entry for the loop,
873
   make it one by splitting the first entry edge and
874
   redirecting the others to the new BB.  */
875
static void
876
canon_loop (struct loop *loop)
877
{
878
  edge e;
879
  edge_iterator i;
880
 
881
  /* Avoid annoying special cases of edges going to exit
882
     block.  */
883
  FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
884
    if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
885
      loop_split_edge_with (e, NULL_RTX);
886
 
887
  if (loop->latch == loop->header
888
      || EDGE_COUNT (loop->latch->succs) > 1)
889
    {
890
      FOR_EACH_EDGE (e, i, loop->header->preds)
891
        if (e->src == loop->latch)
892
          break;
893
      loop_split_edge_with (e, NULL_RTX);
894
    }
895
}
896
 
897
/* Build the loop information without loop
898
   canonization, the loop canonization will
899
   be performed if the loop is SMSable.  */
900
static struct loops *
901
build_loops_structure (FILE *dumpfile)
902
{
903
  struct loops *loops = xcalloc (1, sizeof (struct loops));
904
 
905
  /* Find the loops.  */
906
 
907
  if (flow_loops_find (loops) <= 1)
908
    {
909
      /* No loops.  */
910
      flow_loops_free (loops);
911
      free (loops);
912
 
913
      return NULL;
914
    }
915
 
916
  /* Not going to update these.  */
917
  free (loops->cfg.rc_order);
918
  loops->cfg.rc_order = NULL;
919
  free (loops->cfg.dfs_order);
920
  loops->cfg.dfs_order = NULL;
921
 
922
  create_preheaders (loops, CP_SIMPLE_PREHEADERS);
923
  mark_single_exit_loops (loops);
924
  /* Dump loops.  */
925
  flow_loops_dump (loops, dumpfile, NULL, 1);
926
 
927
#ifdef ENABLE_CHECKING
928
  verify_dominators (CDI_DOMINATORS);
929
  verify_loop_structure (loops);
930
#endif
931
 
932
  return loops;
933
}
934
 
935
/* Main entry point, perform SMS scheduling on the loops of the function
936
   that consist of single basic blocks.  */
937
void
938
sms_schedule (FILE *dump_file)
939
{
940
  static int passes = 0;
941
  rtx insn;
942
  ddg_ptr *g_arr, g;
943
  int * node_order;
944
  int maxii;
945
  unsigned i,num_loops;
946
  partial_schedule_ptr ps;
947
  struct df *df;
948
  struct loops *loops;
949
  basic_block bb = NULL;
950
  /* vars to the versioning only if needed*/
951
  struct loop * nloop;
952
  basic_block condition_bb = NULL;
953
  edge latch_edge;
954
  gcov_type trip_count = 0;
955
 
956
  if (! (loops = build_loops_structure (dump_file)))
957
    return;  /* There is no loops to schedule.  */
958
 
959
 
960
  stats_file = dump_file;
961
 
962
  /* Initialize issue_rate.  */
963
  if (targetm.sched.issue_rate)
964
    {
965
      int temp = reload_completed;
966
 
967
      reload_completed = 1;
968
      issue_rate = targetm.sched.issue_rate ();
969
      reload_completed = temp;
970
    }
971
  else
972
    issue_rate = 1;
973
 
974
  /* Initialize the scheduler.  */
975
  current_sched_info = &sms_sched_info;
976
  sched_init (NULL);
977
 
978
  /* Init Data Flow analysis, to be used in interloop dep calculation.  */
979
  df = df_init ();
980
  df_analyze (df, 0, DF_ALL);
981
 
982
  /* Allocate memory to hold the DDG array one entry for each loop.
983
     We use loop->num as index into this array.  */
984
  g_arr = xcalloc (loops->num, sizeof (ddg_ptr));
985
 
986
 
987
  /* Build DDGs for all the relevant loops and hold them in G_ARR
988
     indexed by the loop index.  */
989
  for (i = 0; i < loops->num; i++)
990
    {
991
      rtx head, tail;
992
      rtx count_reg;
993
      struct loop *loop = loops->parray[i];
994
 
995
      /* For debugging.  */
996
      if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
997
        {
998
          if (dump_file)
999
            fprintf (dump_file, "SMS reached MAX_PASSES... \n");
1000
 
1001
          break;
1002
        }
1003
 
1004
      if (! loop_canon_p (loop, dump_file))
1005
        continue;
1006
 
1007
      if (! loop_single_full_bb_p (loop))
1008
        continue;
1009
 
1010
      bb = loop->header;
1011
 
1012
      get_block_head_tail (bb->index, &head, &tail);
1013
      latch_edge = loop_latch_edge (loop);
1014
      gcc_assert (loop->single_exit);
1015
      if (loop->single_exit->count)
1016
        trip_count = latch_edge->count / loop->single_exit->count;
1017
 
1018
      /* Perfrom SMS only on loops that their average count is above threshold.  */
1019
 
1020
      if ( latch_edge->count
1021
          && (latch_edge->count < loop->single_exit->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1022
        {
1023
          if (stats_file)
1024
            {
1025
              rtx line_note = find_line_note (tail);
1026
 
1027
              if (line_note)
1028
                {
1029
                  expanded_location xloc;
1030
                  NOTE_EXPANDED_LOCATION (xloc, line_note);
1031
                  fprintf (stats_file, "SMS bb %s %d (file, line)\n",
1032
                           xloc.file, xloc.line);
1033
                }
1034
              fprintf (stats_file, "SMS single-bb-loop\n");
1035
              if (profile_info && flag_branch_probabilities)
1036
                {
1037
                  fprintf (stats_file, "SMS loop-count ");
1038
                  fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1039
                           (HOST_WIDEST_INT) bb->count);
1040
                  fprintf (stats_file, "\n");
1041
                  fprintf (stats_file, "SMS trip-count ");
1042
                  fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1043
                           (HOST_WIDEST_INT) trip_count);
1044
                  fprintf (stats_file, "\n");
1045
                  fprintf (stats_file, "SMS profile-sum-max ");
1046
                  fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1047
                           (HOST_WIDEST_INT) profile_info->sum_max);
1048
                  fprintf (stats_file, "\n");
1049
                }
1050
            }
1051
          continue;
1052
        }
1053
 
1054
      /* Make sure this is a doloop.  */
1055
      if ( !(count_reg = doloop_register_get (tail)))
1056
        continue;
1057
 
1058
      /* Don't handle BBs with calls or barriers, or !single_set insns.  */
1059
      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1060
        if (CALL_P (insn)
1061
            || BARRIER_P (insn)
1062
            || (INSN_P (insn) && !JUMP_P (insn)
1063
                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1064
          break;
1065
 
1066
      if (insn != NEXT_INSN (tail))
1067
        {
1068
          if (stats_file)
1069
            {
1070
              if (CALL_P (insn))
1071
                fprintf (stats_file, "SMS loop-with-call\n");
1072
              else if (BARRIER_P (insn))
1073
                fprintf (stats_file, "SMS loop-with-barrier\n");
1074
              else
1075
                fprintf (stats_file, "SMS loop-with-not-single-set\n");
1076
              print_rtl_single (stats_file, insn);
1077
            }
1078
 
1079
          continue;
1080
        }
1081
 
1082
      if (! (g = create_ddg (bb, df, 0)))
1083
        {
1084
          if (stats_file)
1085
            fprintf (stats_file, "SMS doloop\n");
1086
          continue;
1087
        }
1088
 
1089
      g_arr[i] = g;
1090
    }
1091
 
1092
  /* Release Data Flow analysis data structures.  */
1093
  df_finish (df);
1094
 
1095
  /* We don't want to perform SMS on new loops - created by versioning.  */
1096
  num_loops = loops->num;
1097
  /* Go over the built DDGs and perfrom SMS for each one of them.  */
1098
  for (i = 0; i < num_loops; i++)
1099
    {
1100
      rtx head, tail;
1101
      rtx count_reg, count_init;
1102
      int mii, rec_mii;
1103
      unsigned stage_count = 0;
1104
      HOST_WIDEST_INT loop_count = 0;
1105
      struct loop *loop = loops->parray[i];
1106
 
1107
      if (! (g = g_arr[i]))
1108
        continue;
1109
 
1110
      if (dump_file)
1111
        print_ddg (dump_file, g);
1112
 
1113
      get_block_head_tail (loop->header->index, &head, &tail);
1114
 
1115
      latch_edge = loop_latch_edge (loop);
1116
      gcc_assert (loop->single_exit);
1117
      if (loop->single_exit->count)
1118
        trip_count = latch_edge->count / loop->single_exit->count;
1119
 
1120
      if (stats_file)
1121
        {
1122
          rtx line_note = find_line_note (tail);
1123
 
1124
          if (line_note)
1125
            {
1126
              expanded_location xloc;
1127
              NOTE_EXPANDED_LOCATION (xloc, line_note);
1128
              fprintf (stats_file, "SMS bb %s %d (file, line)\n",
1129
                       xloc.file, xloc.line);
1130
            }
1131
          fprintf (stats_file, "SMS single-bb-loop\n");
1132
          if (profile_info && flag_branch_probabilities)
1133
            {
1134
              fprintf (stats_file, "SMS loop-count ");
1135
              fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1136
                       (HOST_WIDEST_INT) bb->count);
1137
              fprintf (stats_file, "\n");
1138
              fprintf (stats_file, "SMS profile-sum-max ");
1139
              fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1140
                       (HOST_WIDEST_INT) profile_info->sum_max);
1141
              fprintf (stats_file, "\n");
1142
            }
1143
          fprintf (stats_file, "SMS doloop\n");
1144
          fprintf (stats_file, "SMS built-ddg %d\n", g->num_nodes);
1145
          fprintf (stats_file, "SMS num-loads %d\n", g->num_loads);
1146
          fprintf (stats_file, "SMS num-stores %d\n", g->num_stores);
1147
        }
1148
 
1149
 
1150
      /* In case of th loop have doloop register it gets special
1151
         handling.  */
1152
      count_init = NULL_RTX;
1153
      if ((count_reg = doloop_register_get (tail)))
1154
        {
1155
          basic_block pre_header;
1156
 
1157
          pre_header = loop_preheader_edge (loop)->src;
1158
          count_init = const_iteration_count (count_reg, pre_header,
1159
                                              &loop_count);
1160
        }
1161
      gcc_assert (count_reg);
1162
 
1163
      if (stats_file && count_init)
1164
        {
1165
          fprintf (stats_file, "SMS const-doloop ");
1166
          fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1167
                     loop_count);
1168
          fprintf (stats_file, "\n");
1169
        }
1170
 
1171
      node_order = (int *) xmalloc (sizeof (int) * g->num_nodes);
1172
 
1173
      mii = 1; /* Need to pass some estimate of mii.  */
1174
      rec_mii = sms_order_nodes (g, mii, node_order);
1175
      mii = MAX (res_MII (g), rec_mii);
1176
      maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100;
1177
 
1178
      if (stats_file)
1179
        fprintf (stats_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1180
                 rec_mii, mii, maxii);
1181
 
1182
      /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1183
         ASAP.  */
1184
      set_node_sched_params (g);
1185
 
1186
      ps = sms_schedule_by_order (g, mii, maxii, node_order, dump_file);
1187
 
1188
      if (ps)
1189
        stage_count = PS_STAGE_COUNT (ps);
1190
 
1191
      /* Stage count of 1 means that there is no interleaving between
1192
         iterations, let the scheduling passes do the job.  */
1193
      if (stage_count < 1
1194
          || (count_init && (loop_count <= stage_count))
1195
          || (flag_branch_probabilities && (trip_count <= stage_count)))
1196
        {
1197
          if (dump_file)
1198
            {
1199
              fprintf (dump_file, "SMS failed... \n");
1200
              fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1201
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1202
              fprintf (dump_file, ", trip-count=");
1203
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1204
              fprintf (dump_file, ")\n");
1205
            }
1206
          continue;
1207
        }
1208
      else
1209
        {
1210
          int orig_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1211
          int new_cycles;
1212
          struct undo_replace_buff_elem *reg_move_replaces;
1213
 
1214
          if (stats_file)
1215
            {
1216
              fprintf (stats_file,
1217
                       "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1218
                       stage_count);
1219
              print_partial_schedule (ps, stats_file);
1220
              fprintf (stats_file,
1221
                       "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1222
                       g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1223
            }
1224
 
1225
          /* Set the stage boundaries.  If the DDG is built with closing_branch_deps,
1226
             the closing_branch was scheduled and should appear in the last (ii-1)
1227
             row.  Otherwise, we are free to schedule the branch, and we let nodes
1228
             that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1229
             row; this should reduce stage_count to minimum.  */
1230
          normalize_sched_times (ps);
1231
          rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1232
          set_columns_for_ps (ps);
1233
 
1234
          /* Generate the kernel just to be able to measure its cycles.  */
1235
          permute_partial_schedule (ps, g->closing_branch->first_note);
1236
          reg_move_replaces = generate_reg_moves (ps);
1237
 
1238
          /* Get the number of cycles the new kernel expect to execute in.  */
1239
          new_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1240
 
1241
          /* Get back to the original loop so we can do loop versioning.  */
1242
          undo_permute_partial_schedule (ps, g->closing_branch->first_note);
1243
          if (reg_move_replaces)
1244
            undo_generate_reg_moves (ps, reg_move_replaces);
1245
 
1246
          if ( new_cycles >= orig_cycles)
1247
            {
1248
              /* SMS is not profitable so undo the permutation and reg move generation
1249
                 and return the kernel to its original state.  */
1250
              if (dump_file)
1251
                fprintf (dump_file, "Undoing SMS because it is not profitable.\n");
1252
 
1253
            }
1254
          else
1255
            {
1256
              canon_loop (loop);
1257
 
1258
              /* case the BCT count is not known , Do loop-versioning */
1259
              if (count_reg && ! count_init)
1260
                {
1261
                  rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1262
                                                 GEN_INT(stage_count));
1263
 
1264
                  nloop = loop_version (loops, loop, comp_rtx, &condition_bb,
1265
                                        true);
1266
                }
1267
 
1268
              /* Set new iteration count of loop kernel.  */
1269
              if (count_reg && count_init)
1270
                SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1271
                                                             - stage_count + 1);
1272
 
1273
              /* Now apply the scheduled kernel to the RTL of the loop.  */
1274
              permute_partial_schedule (ps, g->closing_branch->first_note);
1275
 
1276
              /* Mark this loop as software pipelined so the later
1277
              scheduling passes doesn't touch it.  */
1278
              if (! flag_resched_modulo_sched)
1279
                g->bb->flags |= BB_DISABLE_SCHEDULE;
1280
              /* The life-info is not valid any more.  */
1281
              g->bb->flags |= BB_DIRTY;
1282
 
1283
              reg_move_replaces = generate_reg_moves (ps);
1284
              if (dump_file)
1285
                print_node_sched_params (dump_file, g->num_nodes);
1286
              /* Generate prolog and epilog.  */
1287
              if (count_reg && !count_init)
1288
                generate_prolog_epilog (ps, loop, count_reg);
1289
              else
1290
                generate_prolog_epilog (ps, loop, NULL_RTX);
1291
            }
1292
          free_undo_replace_buff (reg_move_replaces);
1293
        }
1294
 
1295
      free_partial_schedule (ps);
1296
      free (node_sched_params);
1297
      free (node_order);
1298
      free_ddg (g);
1299
    }
1300
 
1301
  /* Release scheduler data, needed until now because of DFA.  */
1302
  sched_finish ();
1303
  loop_optimizer_finalize (loops, dump_file);
1304
}
1305
 
1306
/* The SMS scheduling algorithm itself
1307
   -----------------------------------
1308
   Input: 'O' an ordered list of insns of a loop.
1309
   Output: A scheduling of the loop - kernel, prolog, and epilogue.
1310
 
1311
   'Q' is the empty Set
1312
   'PS' is the partial schedule; it holds the currently scheduled nodes with
1313
        their cycle/slot.
1314
   'PSP' previously scheduled predecessors.
1315
   'PSS' previously scheduled successors.
1316
   't(u)' the cycle where u is scheduled.
1317
   'l(u)' is the latency of u.
1318
   'd(v,u)' is the dependence distance from v to u.
1319
   'ASAP(u)' the earliest time at which u could be scheduled as computed in
1320
             the node ordering phase.
1321
   'check_hardware_resources_conflicts(u, PS, c)'
1322
                             run a trace around cycle/slot through DFA model
1323
                             to check resource conflicts involving instruction u
1324
                             at cycle c given the partial schedule PS.
1325
   'add_to_partial_schedule_at_time(u, PS, c)'
1326
                             Add the node/instruction u to the partial schedule
1327
                             PS at time c.
1328
   'calculate_register_pressure(PS)'
1329
                             Given a schedule of instructions, calculate the register
1330
                             pressure it implies.  One implementation could be the
1331
                             maximum number of overlapping live ranges.
1332
   'maxRP' The maximum allowed register pressure, it is usually derived from the number
1333
           registers available in the hardware.
1334
 
1335
   1. II = MII.
1336
   2. PS = empty list
1337
   3. for each node u in O in pre-computed order
1338
   4.   if (PSP(u) != Q && PSS(u) == Q) then
1339
   5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1340
   6.     start = Early_start; end = Early_start + II - 1; step = 1
1341
   11.  else if (PSP(u) == Q && PSS(u) != Q) then
1342
   12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1343
   13.     start = Late_start; end = Late_start - II + 1; step = -1
1344
   14.  else if (PSP(u) != Q && PSS(u) != Q) then
1345
   15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1346
   16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1347
   17.     start = Early_start;
1348
   18.     end = min(Early_start + II - 1 , Late_start);
1349
   19.     step = 1
1350
   20.     else "if (PSP(u) == Q && PSS(u) == Q)"
1351
   21.    start = ASAP(u); end = start + II - 1; step = 1
1352
   22.  endif
1353
 
1354
   23.  success = false
1355
   24.  for (c = start ; c != end ; c += step)
1356
   25.     if check_hardware_resources_conflicts(u, PS, c) then
1357
   26.       add_to_partial_schedule_at_time(u, PS, c)
1358
   27.       success = true
1359
   28.       break
1360
   29.     endif
1361
   30.  endfor
1362
   31.  if (success == false) then
1363
   32.    II = II + 1
1364
   33.    if (II > maxII) then
1365
   34.       finish - failed to schedule
1366
   35.   endif
1367
   36.    goto 2.
1368
   37.  endif
1369
   38. endfor
1370
   39. if (calculate_register_pressure(PS) > maxRP) then
1371
   40.    goto 32.
1372
   41. endif
1373
   42. compute epilogue & prologue
1374
   43. finish - succeeded to schedule
1375
*/
1376
 
1377
/* A limit on the number of cycles that resource conflicts can span.  ??? Should
1378
   be provided by DFA, and be dependent on the type of insn scheduled.  Currently
1379
   set to 0 to save compile time.  */
1380
#define DFA_HISTORY SMS_DFA_HISTORY
1381
 
1382
/* Given the partial schedule PS, this function calculates and returns the
1383
   cycles in which we can schedule the node with the given index I.
1384
   NOTE: Here we do the backtracking in SMS, in some special cases. We have
1385
   noticed that there are several cases in which we fail    to SMS the loop
1386
   because the sched window of a node is empty    due to tight data-deps. In
1387
   such cases we want to unschedule    some of the predecessors/successors
1388
   until we get non-empty    scheduling window.  It returns -1 if the
1389
   scheduling window is empty and zero otherwise.  */
1390
 
1391
static int
1392
get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1393
                  sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1394
{
1395
  int start, step, end;
1396
  ddg_edge_ptr e;
1397
  int u = nodes_order [i];
1398
  ddg_node_ptr u_node = &ps->g->nodes[u];
1399
  sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1400
  sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1401
  sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1402
  sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1403
  int psp_not_empty;
1404
  int pss_not_empty;
1405
 
1406
  /* 1. compute sched window for u (start, end, step).  */
1407
  sbitmap_zero (psp);
1408
  sbitmap_zero (pss);
1409
  psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1410
  pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1411
 
1412
  if (psp_not_empty && !pss_not_empty)
1413
    {
1414
      int early_start = INT_MIN;
1415
 
1416
      end = INT_MAX;
1417
      for (e = u_node->in; e != 0; e = e->next_in)
1418
        {
1419
          ddg_node_ptr v_node = e->src;
1420
          if (TEST_BIT (sched_nodes, v_node->cuid))
1421
            {
1422
              int node_st = SCHED_TIME (v_node)
1423
                            + e->latency - (e->distance * ii);
1424
 
1425
              early_start = MAX (early_start, node_st);
1426
 
1427
              if (e->data_type == MEM_DEP)
1428
                end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1429
            }
1430
        }
1431
      start = early_start;
1432
      end = MIN (end, early_start + ii);
1433
      step = 1;
1434
    }
1435
 
1436
  else if (!psp_not_empty && pss_not_empty)
1437
    {
1438
      int late_start = INT_MAX;
1439
 
1440
      end = INT_MIN;
1441
      for (e = u_node->out; e != 0; e = e->next_out)
1442
        {
1443
          ddg_node_ptr v_node = e->dest;
1444
          if (TEST_BIT (sched_nodes, v_node->cuid))
1445
            {
1446
              late_start = MIN (late_start,
1447
                                SCHED_TIME (v_node) - e->latency
1448
                                + (e->distance * ii));
1449
              if (e->data_type == MEM_DEP)
1450
                end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1451
            }
1452
        }
1453
      start = late_start;
1454
      end = MAX (end, late_start - ii);
1455
      step = -1;
1456
    }
1457
 
1458
  else if (psp_not_empty && pss_not_empty)
1459
    {
1460
      int early_start = INT_MIN;
1461
      int late_start = INT_MAX;
1462
 
1463
      start = INT_MIN;
1464
      end = INT_MAX;
1465
      for (e = u_node->in; e != 0; e = e->next_in)
1466
        {
1467
          ddg_node_ptr v_node = e->src;
1468
 
1469
          if (TEST_BIT (sched_nodes, v_node->cuid))
1470
            {
1471
              early_start = MAX (early_start,
1472
                                 SCHED_TIME (v_node) + e->latency
1473
                                 - (e->distance * ii));
1474
              if (e->data_type == MEM_DEP)
1475
                end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1476
            }
1477
        }
1478
      for (e = u_node->out; e != 0; e = e->next_out)
1479
        {
1480
          ddg_node_ptr v_node = e->dest;
1481
 
1482
          if (TEST_BIT (sched_nodes, v_node->cuid))
1483
            {
1484
              late_start = MIN (late_start,
1485
                                SCHED_TIME (v_node) - e->latency
1486
                                + (e->distance * ii));
1487
              if (e->data_type == MEM_DEP)
1488
                start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1489
            }
1490
        }
1491
      start = MAX (start, early_start);
1492
      end = MIN (end, MIN (early_start + ii, late_start + 1));
1493
      step = 1;
1494
    }
1495
  else /* psp is empty && pss is empty.  */
1496
    {
1497
      start = SCHED_ASAP (u_node);
1498
      end = start + ii;
1499
      step = 1;
1500
    }
1501
 
1502
  *start_p = start;
1503
  *step_p = step;
1504
  *end_p = end;
1505
  sbitmap_free (psp);
1506
  sbitmap_free (pss);
1507
 
1508
  if ((start >= end && step == 1) || (start <= end && step == -1))
1509
    return -1;
1510
  else
1511
    return 0;
1512
}
1513
 
1514
/* This function implements the scheduling algorithm for SMS according to the
1515
   above algorithm.  */
1516
static partial_schedule_ptr
1517
sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *dump_file)
1518
{
1519
  int ii = mii;
1520
  int i, c, success;
1521
  int try_again_with_larger_ii = true;
1522
  int num_nodes = g->num_nodes;
1523
  ddg_edge_ptr e;
1524
  int start, end, step; /* Place together into one struct?  */
1525
  sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1526
  sbitmap must_precede = sbitmap_alloc (num_nodes);
1527
  sbitmap must_follow = sbitmap_alloc (num_nodes);
1528
  sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1529
 
1530
  partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1531
 
1532
  sbitmap_ones (tobe_scheduled);
1533
  sbitmap_zero (sched_nodes);
1534
 
1535
  while ((! sbitmap_equal (tobe_scheduled, sched_nodes)
1536
         || try_again_with_larger_ii ) && ii < maxii)
1537
    {
1538
      int j;
1539
      bool unscheduled_nodes = false;
1540
 
1541
      if (dump_file)
1542
        fprintf(dump_file, "Starting with ii=%d\n", ii);
1543
      if (try_again_with_larger_ii)
1544
        {
1545
          try_again_with_larger_ii = false;
1546
          sbitmap_zero (sched_nodes);
1547
        }
1548
 
1549
      for (i = 0; i < num_nodes; i++)
1550
        {
1551
          int u = nodes_order[i];
1552
          ddg_node_ptr u_node = &ps->g->nodes[u];
1553
          rtx insn = u_node->insn;
1554
 
1555
          if (!INSN_P (insn))
1556
            {
1557
              RESET_BIT (tobe_scheduled, u);
1558
              continue;
1559
            }
1560
 
1561
          if (JUMP_P (insn)) /* Closing branch handled later.  */
1562
            {
1563
              RESET_BIT (tobe_scheduled, u);
1564
              continue;
1565
            }
1566
 
1567
          if (TEST_BIT (sched_nodes, u))
1568
            continue;
1569
 
1570
          /* Try to get non-empty scheduling window.  */
1571
          j = i;
1572
          while (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, &step, &end) < 0
1573
                 && j > 0)
1574
            {
1575
              unscheduled_nodes = true;
1576
              if (TEST_BIT (NODE_PREDECESSORS (u_node), nodes_order[j - 1])
1577
                  || TEST_BIT (NODE_SUCCESSORS (u_node), nodes_order[j - 1]))
1578
                {
1579
                  ps_unschedule_node (ps, &ps->g->nodes[nodes_order[j - 1]]);
1580
                  RESET_BIT (sched_nodes, nodes_order [j - 1]);
1581
                }
1582
              j--;
1583
            }
1584
          if (j < 0)
1585
            {
1586
              /* ??? Try backtracking instead of immediately ii++?  */
1587
              ii++;
1588
              try_again_with_larger_ii = true;
1589
              reset_partial_schedule (ps, ii);
1590
              break;
1591
            }
1592
          /* 2. Try scheduling u in window.  */
1593
          if (dump_file)
1594
            fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
1595
                    u, start, end, step);
1596
 
1597
          /* use must_follow & must_precede bitmaps to determine order
1598
             of nodes within the cycle.  */
1599
          sbitmap_zero (must_precede);
1600
          sbitmap_zero (must_follow);
1601
          for (e = u_node->in; e != 0; e = e->next_in)
1602
            if (TEST_BIT (sched_nodes, e->src->cuid)
1603
                && e->latency == (ii * e->distance)
1604
                && start == SCHED_TIME (e->src))
1605
             SET_BIT (must_precede, e->src->cuid);
1606
 
1607
          for (e = u_node->out; e != 0; e = e->next_out)
1608
            if (TEST_BIT (sched_nodes, e->dest->cuid)
1609
                && e->latency == (ii * e->distance)
1610
                && end == SCHED_TIME (e->dest))
1611
             SET_BIT (must_follow, e->dest->cuid);
1612
 
1613
          success = 0;
1614
          if ((step > 0 && start < end) ||  (step < 0 && start > end))
1615
            for (c = start; c != end; c += step)
1616
              {
1617
                ps_insn_ptr psi;
1618
 
1619
                psi = ps_add_node_check_conflicts (ps, u_node, c,
1620
                                                   must_precede,
1621
                                                   must_follow);
1622
 
1623
                if (psi)
1624
                  {
1625
                    SCHED_TIME (u_node) = c;
1626
                    SET_BIT (sched_nodes, u);
1627
                    success = 1;
1628
                    if (dump_file)
1629
                      fprintf(dump_file, "Schedule in %d\n", c);
1630
                    break;
1631
                  }
1632
              }
1633
          if (!success)
1634
            {
1635
              /* ??? Try backtracking instead of immediately ii++?  */
1636
              ii++;
1637
              try_again_with_larger_ii = true;
1638
              reset_partial_schedule (ps, ii);
1639
              break;
1640
            }
1641
          if (unscheduled_nodes)
1642
            break;
1643
 
1644
          /* ??? If (success), check register pressure estimates.  */
1645
        } /* Continue with next node.  */
1646
    } /* While try_again_with_larger_ii.  */
1647
 
1648
  sbitmap_free (sched_nodes);
1649
 
1650
  if (ii >= maxii)
1651
    {
1652
      free_partial_schedule (ps);
1653
      ps = NULL;
1654
    }
1655
  return ps;
1656
}
1657
 
1658
 
1659
/* This page implements the algorithm for ordering the nodes of a DDG
1660
   for modulo scheduling, activated through the
1661
   "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
1662
 
1663
#define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1664
#define ASAP(x) (ORDER_PARAMS ((x))->asap)
1665
#define ALAP(x) (ORDER_PARAMS ((x))->alap)
1666
#define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1667
#define MOB(x) (ALAP ((x)) - ASAP ((x)))
1668
#define DEPTH(x) (ASAP ((x)))
1669
 
1670
typedef struct node_order_params * nopa;
1671
 
1672
static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1673
static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1674
static nopa  calculate_order_params (ddg_ptr, int mii);
1675
static int find_max_asap (ddg_ptr, sbitmap);
1676
static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1677
static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1678
 
1679
enum sms_direction {BOTTOMUP, TOPDOWN};
1680
 
1681
struct node_order_params
1682
{
1683
  int asap;
1684
  int alap;
1685
  int height;
1686
};
1687
 
1688
/* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
1689
static void
1690
check_nodes_order (int *node_order, int num_nodes)
1691
{
1692
  int i;
1693
  sbitmap tmp = sbitmap_alloc (num_nodes);
1694
 
1695
  sbitmap_zero (tmp);
1696
 
1697
  for (i = 0; i < num_nodes; i++)
1698
    {
1699
      int u = node_order[i];
1700
 
1701
      gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
1702
 
1703
      SET_BIT (tmp, u);
1704
    }
1705
 
1706
  sbitmap_free (tmp);
1707
}
1708
 
1709
/* Order the nodes of G for scheduling and pass the result in
1710
   NODE_ORDER.  Also set aux.count of each node to ASAP.
1711
   Return the recMII for the given DDG.  */
1712
static int
1713
sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1714
{
1715
  int i;
1716
  int rec_mii = 0;
1717
  ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1718
 
1719
  nopa nops = calculate_order_params (g, mii);
1720
 
1721
  order_nodes_of_sccs (sccs, node_order);
1722
 
1723
  if (sccs->num_sccs > 0)
1724
    /* First SCC has the largest recurrence_length.  */
1725
    rec_mii = sccs->sccs[0]->recurrence_length;
1726
 
1727
  /* Save ASAP before destroying node_order_params.  */
1728
  for (i = 0; i < g->num_nodes; i++)
1729
    {
1730
      ddg_node_ptr v = &g->nodes[i];
1731
      v->aux.count = ASAP (v);
1732
    }
1733
 
1734
  free (nops);
1735
  free_ddg_all_sccs (sccs);
1736
  check_nodes_order (node_order, g->num_nodes);
1737
 
1738
  return rec_mii;
1739
}
1740
 
1741
static void
1742
order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1743
{
1744
  int i, pos = 0;
1745
  ddg_ptr g = all_sccs->ddg;
1746
  int num_nodes = g->num_nodes;
1747
  sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1748
  sbitmap on_path = sbitmap_alloc (num_nodes);
1749
  sbitmap tmp = sbitmap_alloc (num_nodes);
1750
  sbitmap ones = sbitmap_alloc (num_nodes);
1751
 
1752
  sbitmap_zero (prev_sccs);
1753
  sbitmap_ones (ones);
1754
 
1755
  /* Perfrom the node ordering starting from the SCC with the highest recMII.
1756
     For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
1757
  for (i = 0; i < all_sccs->num_sccs; i++)
1758
    {
1759
      ddg_scc_ptr scc = all_sccs->sccs[i];
1760
 
1761
      /* Add nodes on paths from previous SCCs to the current SCC.  */
1762
      find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1763
      sbitmap_a_or_b (tmp, scc->nodes, on_path);
1764
 
1765
      /* Add nodes on paths from the current SCC to previous SCCs.  */
1766
      find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1767
      sbitmap_a_or_b (tmp, tmp, on_path);
1768
 
1769
      /* Remove nodes of previous SCCs from current extended SCC.  */
1770
      sbitmap_difference (tmp, tmp, prev_sccs);
1771
 
1772
      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1773
      /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
1774
    }
1775
 
1776
  /* Handle the remaining nodes that do not belong to any scc.  Each call
1777
     to order_nodes_in_scc handles a single connected component.  */
1778
  while (pos < g->num_nodes)
1779
    {
1780
      sbitmap_difference (tmp, ones, prev_sccs);
1781
      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1782
    }
1783
  sbitmap_free (prev_sccs);
1784
  sbitmap_free (on_path);
1785
  sbitmap_free (tmp);
1786
  sbitmap_free (ones);
1787
}
1788
 
1789
/* MII is needed if we consider backarcs (that do not close recursive cycles).  */
1790
static struct node_order_params *
1791
calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1792
{
1793
  int u;
1794
  int max_asap;
1795
  int num_nodes = g->num_nodes;
1796
  ddg_edge_ptr e;
1797
  /* Allocate a place to hold ordering params for each node in the DDG.  */
1798
  nopa node_order_params_arr;
1799
 
1800
  /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
1801
  node_order_params_arr = (nopa) xcalloc (num_nodes,
1802
                                          sizeof (struct node_order_params));
1803
 
1804
  /* Set the aux pointer of each node to point to its order_params structure.  */
1805
  for (u = 0; u < num_nodes; u++)
1806
    g->nodes[u].aux.info = &node_order_params_arr[u];
1807
 
1808
  /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1809
     calculate ASAP, ALAP, mobility, distance, and height for each node
1810
     in the dependence (direct acyclic) graph.  */
1811
 
1812
  /* We assume that the nodes in the array are in topological order.  */
1813
 
1814
  max_asap = 0;
1815
  for (u = 0; u < num_nodes; u++)
1816
    {
1817
      ddg_node_ptr u_node = &g->nodes[u];
1818
 
1819
      ASAP (u_node) = 0;
1820
      for (e = u_node->in; e; e = e->next_in)
1821
        if (e->distance == 0)
1822
          ASAP (u_node) = MAX (ASAP (u_node),
1823
                               ASAP (e->src) + e->latency);
1824
      max_asap = MAX (max_asap, ASAP (u_node));
1825
    }
1826
 
1827
  for (u = num_nodes - 1; u > -1; u--)
1828
    {
1829
      ddg_node_ptr u_node = &g->nodes[u];
1830
 
1831
      ALAP (u_node) = max_asap;
1832
      HEIGHT (u_node) = 0;
1833
      for (e = u_node->out; e; e = e->next_out)
1834
        if (e->distance == 0)
1835
          {
1836
            ALAP (u_node) = MIN (ALAP (u_node),
1837
                                 ALAP (e->dest) - e->latency);
1838
            HEIGHT (u_node) = MAX (HEIGHT (u_node),
1839
                                   HEIGHT (e->dest) + e->latency);
1840
          }
1841
    }
1842
 
1843
  return node_order_params_arr;
1844
}
1845
 
1846
static int
1847
find_max_asap (ddg_ptr g, sbitmap nodes)
1848
{
1849
  unsigned int u = 0;
1850
  int max_asap = -1;
1851
  int result = -1;
1852
  sbitmap_iterator sbi;
1853
 
1854
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1855
    {
1856
      ddg_node_ptr u_node = &g->nodes[u];
1857
 
1858
      if (max_asap < ASAP (u_node))
1859
        {
1860
          max_asap = ASAP (u_node);
1861
          result = u;
1862
        }
1863
    }
1864
  return result;
1865
}
1866
 
1867
static int
1868
find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1869
{
1870
  unsigned int u = 0;
1871
  int max_hv = -1;
1872
  int min_mob = INT_MAX;
1873
  int result = -1;
1874
  sbitmap_iterator sbi;
1875
 
1876
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1877
    {
1878
      ddg_node_ptr u_node = &g->nodes[u];
1879
 
1880
      if (max_hv < HEIGHT (u_node))
1881
        {
1882
          max_hv = HEIGHT (u_node);
1883
          min_mob = MOB (u_node);
1884
          result = u;
1885
        }
1886
      else if ((max_hv == HEIGHT (u_node))
1887
               && (min_mob > MOB (u_node)))
1888
        {
1889
          min_mob = MOB (u_node);
1890
          result = u;
1891
        }
1892
    }
1893
  return result;
1894
}
1895
 
1896
static int
1897
find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1898
{
1899
  unsigned int u = 0;
1900
  int max_dv = -1;
1901
  int min_mob = INT_MAX;
1902
  int result = -1;
1903
  sbitmap_iterator sbi;
1904
 
1905
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1906
    {
1907
      ddg_node_ptr u_node = &g->nodes[u];
1908
 
1909
      if (max_dv < DEPTH (u_node))
1910
        {
1911
          max_dv = DEPTH (u_node);
1912
          min_mob = MOB (u_node);
1913
          result = u;
1914
        }
1915
      else if ((max_dv == DEPTH (u_node))
1916
               && (min_mob > MOB (u_node)))
1917
        {
1918
          min_mob = MOB (u_node);
1919
          result = u;
1920
        }
1921
    }
1922
  return result;
1923
}
1924
 
1925
/* Places the nodes of SCC into the NODE_ORDER array starting
1926
   at position POS, according to the SMS ordering algorithm.
1927
   NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1928
   the NODE_ORDER array, starting from position zero.  */
1929
static int
1930
order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1931
                    int * node_order, int pos)
1932
{
1933
  enum sms_direction dir;
1934
  int num_nodes = g->num_nodes;
1935
  sbitmap workset = sbitmap_alloc (num_nodes);
1936
  sbitmap tmp = sbitmap_alloc (num_nodes);
1937
  sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1938
  sbitmap predecessors = sbitmap_alloc (num_nodes);
1939
  sbitmap successors = sbitmap_alloc (num_nodes);
1940
 
1941
  sbitmap_zero (predecessors);
1942
  find_predecessors (predecessors, g, nodes_ordered);
1943
 
1944
  sbitmap_zero (successors);
1945
  find_successors (successors, g, nodes_ordered);
1946
 
1947
  sbitmap_zero (tmp);
1948
  if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1949
    {
1950
      sbitmap_copy (workset, tmp);
1951
      dir = BOTTOMUP;
1952
    }
1953
  else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1954
    {
1955
      sbitmap_copy (workset, tmp);
1956
      dir = TOPDOWN;
1957
    }
1958
  else
1959
    {
1960
      int u;
1961
 
1962
      sbitmap_zero (workset);
1963
      if ((u = find_max_asap (g, scc)) >= 0)
1964
        SET_BIT (workset, u);
1965
      dir = BOTTOMUP;
1966
    }
1967
 
1968
  sbitmap_zero (zero_bitmap);
1969
  while (!sbitmap_equal (workset, zero_bitmap))
1970
    {
1971
      int v;
1972
      ddg_node_ptr v_node;
1973
      sbitmap v_node_preds;
1974
      sbitmap v_node_succs;
1975
 
1976
      if (dir == TOPDOWN)
1977
        {
1978
          while (!sbitmap_equal (workset, zero_bitmap))
1979
            {
1980
              v = find_max_hv_min_mob (g, workset);
1981
              v_node = &g->nodes[v];
1982
              node_order[pos++] = v;
1983
              v_node_succs = NODE_SUCCESSORS (v_node);
1984
              sbitmap_a_and_b (tmp, v_node_succs, scc);
1985
 
1986
              /* Don't consider the already ordered successors again.  */
1987
              sbitmap_difference (tmp, tmp, nodes_ordered);
1988
              sbitmap_a_or_b (workset, workset, tmp);
1989
              RESET_BIT (workset, v);
1990
              SET_BIT (nodes_ordered, v);
1991
            }
1992
          dir = BOTTOMUP;
1993
          sbitmap_zero (predecessors);
1994
          find_predecessors (predecessors, g, nodes_ordered);
1995
          sbitmap_a_and_b (workset, predecessors, scc);
1996
        }
1997
      else
1998
        {
1999
          while (!sbitmap_equal (workset, zero_bitmap))
2000
            {
2001
              v = find_max_dv_min_mob (g, workset);
2002
              v_node = &g->nodes[v];
2003
              node_order[pos++] = v;
2004
              v_node_preds = NODE_PREDECESSORS (v_node);
2005
              sbitmap_a_and_b (tmp, v_node_preds, scc);
2006
 
2007
              /* Don't consider the already ordered predecessors again.  */
2008
              sbitmap_difference (tmp, tmp, nodes_ordered);
2009
              sbitmap_a_or_b (workset, workset, tmp);
2010
              RESET_BIT (workset, v);
2011
              SET_BIT (nodes_ordered, v);
2012
            }
2013
          dir = TOPDOWN;
2014
          sbitmap_zero (successors);
2015
          find_successors (successors, g, nodes_ordered);
2016
          sbitmap_a_and_b (workset, successors, scc);
2017
        }
2018
    }
2019
  sbitmap_free (tmp);
2020
  sbitmap_free (workset);
2021
  sbitmap_free (zero_bitmap);
2022
  sbitmap_free (predecessors);
2023
  sbitmap_free (successors);
2024
  return pos;
2025
}
2026
 
2027
 
2028
/* This page contains functions for manipulating partial-schedules during
2029
   modulo scheduling.  */
2030
 
2031
/* Create a partial schedule and allocate a memory to hold II rows.  */
2032
partial_schedule_ptr
2033
create_partial_schedule (int ii, ddg_ptr g, int history)
2034
{
2035
  partial_schedule_ptr ps = (partial_schedule_ptr)
2036
                             xmalloc (sizeof (struct partial_schedule));
2037
  ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2038
  ps->ii = ii;
2039
  ps->history = history;
2040
  ps->min_cycle = INT_MAX;
2041
  ps->max_cycle = INT_MIN;
2042
  ps->g = g;
2043
 
2044
  return ps;
2045
}
2046
 
2047
/* Free the PS_INSNs in rows array of the given partial schedule.
2048
   ??? Consider caching the PS_INSN's.  */
2049
static void
2050
free_ps_insns (partial_schedule_ptr ps)
2051
{
2052
  int i;
2053
 
2054
  for (i = 0; i < ps->ii; i++)
2055
    {
2056
      while (ps->rows[i])
2057
        {
2058
          ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2059
 
2060
          free (ps->rows[i]);
2061
          ps->rows[i] = ps_insn;
2062
        }
2063
      ps->rows[i] = NULL;
2064
    }
2065
}
2066
 
2067
/* Free all the memory allocated to the partial schedule.  */
2068
void
2069
free_partial_schedule (partial_schedule_ptr ps)
2070
{
2071
  if (!ps)
2072
    return;
2073
  free_ps_insns (ps);
2074
  free (ps->rows);
2075
  free (ps);
2076
}
2077
 
2078
/* Clear the rows array with its PS_INSNs, and create a new one with
2079
   NEW_II rows.  */
2080
void
2081
reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2082
{
2083
  if (!ps)
2084
    return;
2085
  free_ps_insns (ps);
2086
  if (new_ii == ps->ii)
2087
    return;
2088
  ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2089
                                                 * sizeof (ps_insn_ptr));
2090
  memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2091
  ps->ii = new_ii;
2092
  ps->min_cycle = INT_MAX;
2093
  ps->max_cycle = INT_MIN;
2094
}
2095
 
2096
/* Prints the partial schedule as an ii rows array, for each rows
2097
   print the ids of the insns in it.  */
2098
void
2099
print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2100
{
2101
  int i;
2102
 
2103
  for (i = 0; i < ps->ii; i++)
2104
    {
2105
      ps_insn_ptr ps_i = ps->rows[i];
2106
 
2107
      fprintf (dump, "\n[CYCLE %d ]: ", i);
2108
      while (ps_i)
2109
        {
2110
          fprintf (dump, "%d, ",
2111
                   INSN_UID (ps_i->node->insn));
2112
          ps_i = ps_i->next_in_row;
2113
        }
2114
    }
2115
}
2116
 
2117
/* Creates an object of PS_INSN and initializes it to the given parameters.  */
2118
static ps_insn_ptr
2119
create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2120
{
2121
  ps_insn_ptr ps_i = xmalloc (sizeof (struct ps_insn));
2122
 
2123
  ps_i->node = node;
2124
  ps_i->next_in_row = NULL;
2125
  ps_i->prev_in_row = NULL;
2126
  ps_i->row_rest_count = rest_count;
2127
  ps_i->cycle = cycle;
2128
 
2129
  return ps_i;
2130
}
2131
 
2132
 
2133
/* Removes the given PS_INSN from the partial schedule.  Returns false if the
2134
   node is not found in the partial schedule, else returns true.  */
2135
static bool
2136
remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2137
{
2138
  int row;
2139
 
2140
  if (!ps || !ps_i)
2141
    return false;
2142
 
2143
  row = SMODULO (ps_i->cycle, ps->ii);
2144
  if (! ps_i->prev_in_row)
2145
    {
2146
      if (ps_i != ps->rows[row])
2147
        return false;
2148
 
2149
      ps->rows[row] = ps_i->next_in_row;
2150
      if (ps->rows[row])
2151
        ps->rows[row]->prev_in_row = NULL;
2152
    }
2153
  else
2154
    {
2155
      ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2156
      if (ps_i->next_in_row)
2157
        ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2158
    }
2159
  free (ps_i);
2160
  return true;
2161
}
2162
 
2163
/* Unlike what literature describes for modulo scheduling (which focuses
2164
   on VLIW machines) the order of the instructions inside a cycle is
2165
   important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2166
   where the current instruction should go relative to the already
2167
   scheduled instructions in the given cycle.  Go over these
2168
   instructions and find the first possible column to put it in.  */
2169
static bool
2170
ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2171
                     sbitmap must_precede, sbitmap must_follow)
2172
{
2173
  ps_insn_ptr next_ps_i;
2174
  ps_insn_ptr first_must_follow = NULL;
2175
  ps_insn_ptr last_must_precede = NULL;
2176
  int row;
2177
 
2178
  if (! ps_i)
2179
    return false;
2180
 
2181
  row = SMODULO (ps_i->cycle, ps->ii);
2182
 
2183
  /* Find the first must follow and the last must precede
2184
     and insert the node immediately after the must precede
2185
     but make sure that it there is no must follow after it.  */
2186
  for (next_ps_i = ps->rows[row];
2187
       next_ps_i;
2188
       next_ps_i = next_ps_i->next_in_row)
2189
    {
2190
      if (TEST_BIT (must_follow, next_ps_i->node->cuid)
2191
          && ! first_must_follow)
2192
        first_must_follow = next_ps_i;
2193
      if (TEST_BIT (must_precede, next_ps_i->node->cuid))
2194
        {
2195
          /* If we have already met a node that must follow, then
2196
             there is no possible column.  */
2197
          if (first_must_follow)
2198
            return false;
2199
          else
2200
            last_must_precede = next_ps_i;
2201
        }
2202
    }
2203
 
2204
  /* Now insert the node after INSERT_AFTER_PSI.  */
2205
 
2206
  if (! last_must_precede)
2207
    {
2208
      ps_i->next_in_row = ps->rows[row];
2209
      ps_i->prev_in_row = NULL;
2210
      if (ps_i->next_in_row)
2211
        ps_i->next_in_row->prev_in_row = ps_i;
2212
      ps->rows[row] = ps_i;
2213
    }
2214
  else
2215
    {
2216
      ps_i->next_in_row = last_must_precede->next_in_row;
2217
      last_must_precede->next_in_row = ps_i;
2218
      ps_i->prev_in_row = last_must_precede;
2219
      if (ps_i->next_in_row)
2220
        ps_i->next_in_row->prev_in_row = ps_i;
2221
    }
2222
 
2223
  return true;
2224
}
2225
 
2226
/* Advances the PS_INSN one column in its current row; returns false
2227
   in failure and true in success.  Bit N is set in MUST_FOLLOW if
2228
   the node with cuid N must be come after the node pointed to by
2229
   PS_I when scheduled in the same cycle.  */
2230
static int
2231
ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2232
                        sbitmap must_follow)
2233
{
2234
  ps_insn_ptr prev, next;
2235
  int row;
2236
  ddg_node_ptr next_node;
2237
 
2238
  if (!ps || !ps_i)
2239
    return false;
2240
 
2241
  row = SMODULO (ps_i->cycle, ps->ii);
2242
 
2243
  if (! ps_i->next_in_row)
2244
    return false;
2245
 
2246
  next_node = ps_i->next_in_row->node;
2247
 
2248
  /* Check if next_in_row is dependent on ps_i, both having same sched
2249
     times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
2250
  if (TEST_BIT (must_follow, next_node->cuid))
2251
    return false;
2252
 
2253
  /* Advance PS_I over its next_in_row in the doubly linked list.  */
2254
  prev = ps_i->prev_in_row;
2255
  next = ps_i->next_in_row;
2256
 
2257
  if (ps_i == ps->rows[row])
2258
    ps->rows[row] = next;
2259
 
2260
  ps_i->next_in_row = next->next_in_row;
2261
 
2262
  if (next->next_in_row)
2263
    next->next_in_row->prev_in_row = ps_i;
2264
 
2265
  next->next_in_row = ps_i;
2266
  ps_i->prev_in_row = next;
2267
 
2268
  next->prev_in_row = prev;
2269
  if (prev)
2270
    prev->next_in_row = next;
2271
 
2272
  return true;
2273
}
2274
 
2275
/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2276
   Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
2277
   set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2278
   before/after (respectively) the node pointed to by PS_I when scheduled
2279
   in the same cycle.  */
2280
static ps_insn_ptr
2281
add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2282
                sbitmap must_precede, sbitmap must_follow)
2283
{
2284
  ps_insn_ptr ps_i;
2285
  int rest_count = 1;
2286
  int row = SMODULO (cycle, ps->ii);
2287
 
2288
  if (ps->rows[row]
2289
      && ps->rows[row]->row_rest_count >= issue_rate)
2290
    return NULL;
2291
 
2292
  if (ps->rows[row])
2293
    rest_count += ps->rows[row]->row_rest_count;
2294
 
2295
  ps_i = create_ps_insn (node, rest_count, cycle);
2296
 
2297
  /* Finds and inserts PS_I according to MUST_FOLLOW and
2298
     MUST_PRECEDE.  */
2299
  if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2300
    {
2301
      free (ps_i);
2302
      return NULL;
2303
    }
2304
 
2305
  return ps_i;
2306
}
2307
 
2308
/* Advance time one cycle.  Assumes DFA is being used.  */
2309
static void
2310
advance_one_cycle (void)
2311
{
2312
  if (targetm.sched.dfa_pre_cycle_insn)
2313
    state_transition (curr_state,
2314
                      targetm.sched.dfa_pre_cycle_insn ());
2315
 
2316
  state_transition (curr_state, NULL);
2317
 
2318
  if (targetm.sched.dfa_post_cycle_insn)
2319
    state_transition (curr_state,
2320
                      targetm.sched.dfa_post_cycle_insn ());
2321
}
2322
 
2323
/* Given the kernel of a loop (from FIRST_INSN to LAST_INSN), finds
2324
   the number of cycles according to DFA that the kernel fits in,
2325
   we use this to check if we done well with SMS after we add
2326
   register moves.  In some cases register moves overhead makes
2327
   it even worse than the original loop.  We want SMS to be performed
2328
   when it gives less cycles after register moves are added.  */
2329
static int
2330
kernel_number_of_cycles (rtx first_insn, rtx last_insn)
2331
{
2332
  int cycles = 0;
2333
  rtx insn;
2334
  int can_issue_more = issue_rate;
2335
 
2336
  state_reset (curr_state);
2337
 
2338
  for (insn = first_insn;
2339
       insn != NULL_RTX && insn != last_insn;
2340
       insn = NEXT_INSN (insn))
2341
    {
2342
      if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
2343
        continue;
2344
 
2345
      /* Check if there is room for the current insn.  */
2346
      if (!can_issue_more || state_dead_lock_p (curr_state))
2347
        {
2348
          cycles ++;
2349
          advance_one_cycle ();
2350
          can_issue_more = issue_rate;
2351
        }
2352
 
2353
        /* Update the DFA state and return with failure if the DFA found
2354
           recource conflicts.  */
2355
      if (state_transition (curr_state, insn) >= 0)
2356
        {
2357
          cycles ++;
2358
          advance_one_cycle ();
2359
          can_issue_more = issue_rate;
2360
        }
2361
 
2362
      if (targetm.sched.variable_issue)
2363
        can_issue_more =
2364
          targetm.sched.variable_issue (sched_dump, sched_verbose,
2365
                                        insn, can_issue_more);
2366
      /* A naked CLOBBER or USE generates no instruction, so don't
2367
         let them consume issue slots.  */
2368
      else if (GET_CODE (PATTERN (insn)) != USE
2369
               && GET_CODE (PATTERN (insn)) != CLOBBER)
2370
        can_issue_more--;
2371
    }
2372
  return cycles;
2373
}
2374
 
2375
/* Checks if PS has resource conflicts according to DFA, starting from
2376
   FROM cycle to TO cycle; returns true if there are conflicts and false
2377
   if there are no conflicts.  Assumes DFA is being used.  */
2378
static int
2379
ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2380
{
2381
  int cycle;
2382
 
2383
  state_reset (curr_state);
2384
 
2385
  for (cycle = from; cycle <= to; cycle++)
2386
    {
2387
      ps_insn_ptr crr_insn;
2388
      /* Holds the remaining issue slots in the current row.  */
2389
      int can_issue_more = issue_rate;
2390
 
2391
      /* Walk through the DFA for the current row.  */
2392
      for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2393
           crr_insn;
2394
           crr_insn = crr_insn->next_in_row)
2395
        {
2396
          rtx insn = crr_insn->node->insn;
2397
 
2398
          if (!INSN_P (insn))
2399
            continue;
2400
 
2401
          /* Check if there is room for the current insn.  */
2402
          if (!can_issue_more || state_dead_lock_p (curr_state))
2403
            return true;
2404
 
2405
          /* Update the DFA state and return with failure if the DFA found
2406
             recource conflicts.  */
2407
          if (state_transition (curr_state, insn) >= 0)
2408
            return true;
2409
 
2410
          if (targetm.sched.variable_issue)
2411
            can_issue_more =
2412
              targetm.sched.variable_issue (sched_dump, sched_verbose,
2413
                                            insn, can_issue_more);
2414
          /* A naked CLOBBER or USE generates no instruction, so don't
2415
             let them consume issue slots.  */
2416
          else if (GET_CODE (PATTERN (insn)) != USE
2417
                   && GET_CODE (PATTERN (insn)) != CLOBBER)
2418
            can_issue_more--;
2419
        }
2420
 
2421
      /* Advance the DFA to the next cycle.  */
2422
      advance_one_cycle ();
2423
    }
2424
  return false;
2425
}
2426
 
2427
/* Checks if the given node causes resource conflicts when added to PS at
2428
   cycle C.  If not the node is added to PS and returned; otherwise zero
2429
   is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2430
   cuid N must be come before/after (respectively) the node pointed to by
2431
   PS_I when scheduled in the same cycle.  */
2432
ps_insn_ptr
2433
ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2434
                             int c, sbitmap must_precede,
2435
                             sbitmap must_follow)
2436
{
2437
  int has_conflicts = 0;
2438
  ps_insn_ptr ps_i;
2439
 
2440
  /* First add the node to the PS, if this succeeds check for
2441
     conflicts, trying different issue slots in the same row.  */
2442
  if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2443
    return NULL; /* Failed to insert the node at the given cycle.  */
2444
 
2445
  has_conflicts = ps_has_conflicts (ps, c, c)
2446
                  || (ps->history > 0
2447
                      && ps_has_conflicts (ps,
2448
                                           c - ps->history,
2449
                                           c + ps->history));
2450
 
2451
  /* Try different issue slots to find one that the given node can be
2452
     scheduled in without conflicts.  */
2453
  while (has_conflicts)
2454
    {
2455
      if (! ps_insn_advance_column (ps, ps_i, must_follow))
2456
        break;
2457
      has_conflicts = ps_has_conflicts (ps, c, c)
2458
                      || (ps->history > 0
2459
                          && ps_has_conflicts (ps,
2460
                                               c - ps->history,
2461
                                               c + ps->history));
2462
    }
2463
 
2464
  if (has_conflicts)
2465
    {
2466
      remove_node_from_ps (ps, ps_i);
2467
      return NULL;
2468
    }
2469
 
2470
  ps->min_cycle = MIN (ps->min_cycle, c);
2471
  ps->max_cycle = MAX (ps->max_cycle, c);
2472
  return ps_i;
2473
}
2474
 
2475
/* Rotate the rows of PS such that insns scheduled at time
2476
   START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
2477
void
2478
rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2479
{
2480
  int i, row, backward_rotates;
2481
  int last_row = ps->ii - 1;
2482
 
2483
  if (start_cycle == 0)
2484
    return;
2485
 
2486
  backward_rotates = SMODULO (start_cycle, ps->ii);
2487
 
2488
  /* Revisit later and optimize this into a single loop.  */
2489
  for (i = 0; i < backward_rotates; i++)
2490
    {
2491
      ps_insn_ptr first_row = ps->rows[0];
2492
 
2493
      for (row = 0; row < last_row; row++)
2494
        ps->rows[row] = ps->rows[row+1];
2495
 
2496
      ps->rows[last_row] = first_row;
2497
    }
2498
 
2499
  ps->max_cycle -= start_cycle;
2500
  ps->min_cycle -= start_cycle;
2501
}
2502
 
2503
/* Remove the node N from the partial schedule PS; because we restart the DFA
2504
   each time we want to check for resource conflicts; this is equivalent to
2505
   unscheduling the node N.  */
2506
static bool
2507
ps_unschedule_node (partial_schedule_ptr ps, ddg_node_ptr n)
2508
{
2509
  ps_insn_ptr ps_i;
2510
  int row = SMODULO (SCHED_TIME (n), ps->ii);
2511
 
2512
  if (row < 0 || row > ps->ii)
2513
    return false;
2514
 
2515
  for (ps_i = ps->rows[row];
2516
       ps_i &&  ps_i->node != n;
2517
       ps_i = ps_i->next_in_row);
2518
  if (!ps_i)
2519
    return false;
2520
 
2521
  return remove_node_from_ps (ps, ps_i);
2522
}
2523
#endif /* INSN_SCHEDULING */
2524
 
2525
static bool
2526
gate_handle_sms (void)
2527
{
2528
  return (optimize > 0 && flag_modulo_sched);
2529
}
2530
 
2531
 
2532
/* Run instruction scheduler.  */
2533
/* Perform SMS module scheduling.  */
2534
static void
2535
rest_of_handle_sms (void)
2536
{
2537
#ifdef INSN_SCHEDULING
2538
  basic_block bb;
2539
  sbitmap blocks;
2540
 
2541
  /* We want to be able to create new pseudos.  */
2542
  no_new_pseudos = 0;
2543
  /* Collect loop information to be used in SMS.  */
2544
  cfg_layout_initialize (CLEANUP_UPDATE_LIFE);
2545
  sms_schedule (dump_file);
2546
 
2547
  /* Update the life information, because we add pseudos.  */
2548
  max_regno = max_reg_num ();
2549
  allocate_reg_info (max_regno, FALSE, FALSE);
2550
  blocks = sbitmap_alloc (last_basic_block);
2551
  sbitmap_ones (blocks);
2552
  update_life_info (blocks, UPDATE_LIFE_GLOBAL_RM_NOTES,
2553
                    (PROP_DEATH_NOTES
2554
                     | PROP_REG_INFO
2555
                     | PROP_KILL_DEAD_CODE
2556
                     | PROP_SCAN_DEAD_CODE));
2557
 
2558
  no_new_pseudos = 1;
2559
 
2560
  /* Finalize layout changes.  */
2561
  FOR_EACH_BB (bb)
2562
    if (bb->next_bb != EXIT_BLOCK_PTR)
2563
      bb->aux = bb->next_bb;
2564
  cfg_layout_finalize ();
2565
  free_dominance_info (CDI_DOMINATORS);
2566
#endif /* INSN_SCHEDULING */
2567
}
2568
 
2569
struct tree_opt_pass pass_sms =
2570
{
2571
  "sms",                                /* name */
2572
  gate_handle_sms,                      /* gate */
2573
  rest_of_handle_sms,                   /* execute */
2574
  NULL,                                 /* sub */
2575
  NULL,                                 /* next */
2576
  0,                                    /* static_pass_number */
2577
  TV_SMS,                               /* tv_id */
2578
  0,                                    /* properties_required */
2579
  0,                                    /* properties_provided */
2580
  0,                                    /* properties_destroyed */
2581
  0,                                    /* todo_flags_start */
2582
  TODO_dump_func |
2583
  TODO_ggc_collect,                     /* todo_flags_finish */
2584
  'm'                                   /* letter */
2585
};
2586
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.