OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [modulo-sched.c] - Blame information for rev 645

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Swing Modulo Scheduling implementation.
2
   Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3
   Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it under
8
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 3, or (at your option) any later
10
version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13
WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
 
22
#include "config.h"
23
#include "system.h"
24
#include "coretypes.h"
25
#include "tm.h"
26
#include "toplev.h"
27
#include "rtl.h"
28
#include "tm_p.h"
29
#include "hard-reg-set.h"
30
#include "regs.h"
31
#include "function.h"
32
#include "flags.h"
33
#include "insn-config.h"
34
#include "insn-attr.h"
35
#include "except.h"
36
#include "toplev.h"
37
#include "recog.h"
38
#include "sched-int.h"
39
#include "target.h"
40
#include "cfglayout.h"
41
#include "cfgloop.h"
42
#include "cfghooks.h"
43
#include "expr.h"
44
#include "params.h"
45
#include "gcov-io.h"
46
#include "df.h"
47
#include "ddg.h"
48
#include "timevar.h"
49
#include "tree-pass.h"
50
 
51
#ifdef INSN_SCHEDULING
52
 
53
/* This file contains the implementation of the Swing Modulo Scheduler,
54
   described in the following references:
55
   [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
56
       Lifetime--sensitive modulo scheduling in a production environment.
57
       IEEE Trans. on Comps., 50(3), March 2001
58
   [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
59
       Swing Modulo Scheduling: A Lifetime Sensitive Approach.
60
       PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
61
 
62
   The basic structure is:
63
   1. Build a data-dependence graph (DDG) for each loop.
64
   2. Use the DDG to order the insns of a loop (not in topological order
65
      necessarily, but rather) trying to place each insn after all its
66
      predecessors _or_ after all its successors.
67
   3. Compute MII: a lower bound on the number of cycles to schedule the loop.
68
   4. Use the ordering to perform list-scheduling of the loop:
69
      1. Set II = MII.  We will try to schedule the loop within II cycles.
70
      2. Try to schedule the insns one by one according to the ordering.
71
         For each insn compute an interval of cycles by considering already-
72
         scheduled preds and succs (and associated latencies); try to place
73
         the insn in the cycles of this window checking for potential
74
         resource conflicts (using the DFA interface).
75
         Note: this is different from the cycle-scheduling of schedule_insns;
76
         here the insns are not scheduled monotonically top-down (nor bottom-
77
         up).
78
      3. If failed in scheduling all insns - bump II++ and try again, unless
79
         II reaches an upper bound MaxII, in which case report failure.
80
   5. If we succeeded in scheduling the loop within II cycles, we now
81
      generate prolog and epilog, decrease the counter of the loop, and
82
      perform modulo variable expansion for live ranges that span more than
83
      II cycles (i.e. use register copies to prevent a def from overwriting
84
      itself before reaching the use).
85
*/
86
 
87
 
88
/* This page defines partial-schedule structures and functions for
89
   modulo scheduling.  */
90
 
91
typedef struct partial_schedule *partial_schedule_ptr;
92
typedef struct ps_insn *ps_insn_ptr;
93
 
94
/* The minimum (absolute) cycle that a node of ps was scheduled in.  */
95
#define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
96
 
97
/* The maximum (absolute) cycle that a node of ps was scheduled in.  */
98
#define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
99
 
100
/* Perform signed modulo, always returning a non-negative value.  */
101
#define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
102
 
103
/* The number of different iterations the nodes in ps span, assuming
104
   the stage boundaries are placed efficiently.  */
105
#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
106
                             + 1 + (ps)->ii - 1) / (ps)->ii)
107
 
108
/* A single instruction in the partial schedule.  */
109
struct ps_insn
110
{
111
  /* The corresponding DDG_NODE.  */
112
  ddg_node_ptr node;
113
 
114
  /* The (absolute) cycle in which the PS instruction is scheduled.
115
     Same as SCHED_TIME (node).  */
116
  int cycle;
117
 
118
  /* The next/prev PS_INSN in the same row.  */
119
  ps_insn_ptr next_in_row,
120
              prev_in_row;
121
 
122
  /* The number of nodes in the same row that come after this node.  */
123
  int row_rest_count;
124
};
125
 
126
/* Holds the partial schedule as an array of II rows.  Each entry of the
127
   array points to a linked list of PS_INSNs, which represents the
128
   instructions that are scheduled for that row.  */
129
struct partial_schedule
130
{
131
  int ii;       /* Number of rows in the partial schedule.  */
132
  int history;  /* Threshold for conflict checking using DFA.  */
133
 
134
  /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
135
  ps_insn_ptr *rows;
136
 
137
  /* The earliest absolute cycle of an insn in the partial schedule.  */
138
  int min_cycle;
139
 
140
  /* The latest absolute cycle of an insn in the partial schedule.  */
141
  int max_cycle;
142
 
143
  ddg_ptr g;    /* The DDG of the insns in the partial schedule.  */
144
};
145
 
146
/* We use this to record all the register replacements we do in
147
   the kernel so we can undo SMS if it is not profitable.  */
148
struct undo_replace_buff_elem
149
{
150
  rtx insn;
151
  rtx orig_reg;
152
  rtx new_reg;
153
  struct undo_replace_buff_elem *next;
154
};
155
 
156
 
157
 
158
static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
159
static void free_partial_schedule (partial_schedule_ptr);
160
static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
161
void print_partial_schedule (partial_schedule_ptr, FILE *);
162
static int kernel_number_of_cycles (rtx first_insn, rtx last_insn);
163
static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
164
                                                ddg_node_ptr node, int cycle,
165
                                                sbitmap must_precede,
166
                                                sbitmap must_follow);
167
static void rotate_partial_schedule (partial_schedule_ptr, int);
168
void set_row_column_for_ps (partial_schedule_ptr);
169
static bool ps_unschedule_node (partial_schedule_ptr, ddg_node_ptr );
170
 
171
 
172
/* This page defines constants and structures for the modulo scheduling
173
   driver.  */
174
 
175
/* As in haifa-sched.c:  */
176
/* issue_rate is the number of insns that can be scheduled in the same
177
   machine cycle.  It can be defined in the config/mach/mach.h file,
178
   otherwise we set it to 1.  */
179
 
180
static int issue_rate;
181
 
182
static int sms_order_nodes (ddg_ptr, int, int * result);
183
static void set_node_sched_params (ddg_ptr);
184
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
185
static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
186
static void generate_prolog_epilog (partial_schedule_ptr ,struct loop * loop, rtx);
187
static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
188
                                       int from_stage, int to_stage,
189
                                       int is_prolog);
190
 
191
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
192
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
193
#define SCHED_FIRST_REG_MOVE(x) \
194
        (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
195
#define SCHED_NREG_MOVES(x) \
196
        (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
197
#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
198
#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
199
#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
200
 
201
/* The scheduling parameters held for each node.  */
202
typedef struct node_sched_params
203
{
204
  int asap;     /* A lower-bound on the absolute scheduling cycle.  */
205
  int time;     /* The absolute scheduling cycle (time >= asap).  */
206
 
207
  /* The following field (first_reg_move) is a pointer to the first
208
     register-move instruction added to handle the modulo-variable-expansion
209
     of the register defined by this node.  This register-move copies the
210
     original register defined by the node.  */
211
  rtx first_reg_move;
212
 
213
  /* The number of register-move instructions added, immediately preceding
214
     first_reg_move.  */
215
  int nreg_moves;
216
 
217
  int row;    /* Holds time % ii.  */
218
  int stage;  /* Holds time / ii.  */
219
 
220
  /* The column of a node inside the ps.  If nodes u, v are on the same row,
221
     u will precede v if column (u) < column (v).  */
222
  int column;
223
} *node_sched_params_ptr;
224
 
225
 
226
/* The following three functions are copied from the current scheduler
227
   code in order to use sched_analyze() for computing the dependencies.
228
   They are used when initializing the sched_info structure.  */
229
static const char *
230
sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
231
{
232
  static char tmp[80];
233
 
234
  sprintf (tmp, "i%4d", INSN_UID (insn));
235
  return tmp;
236
}
237
 
238
static void
239
compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
240
                               regset cond_exec ATTRIBUTE_UNUSED,
241
                               regset used ATTRIBUTE_UNUSED,
242
                               regset set ATTRIBUTE_UNUSED)
243
{
244
}
245
 
246
static struct sched_info sms_sched_info =
247
{
248
  NULL,
249
  NULL,
250
  NULL,
251
  NULL,
252
  NULL,
253
  sms_print_insn,
254
  NULL,
255
  compute_jump_reg_dependencies,
256
  NULL, NULL,
257
  NULL, NULL,
258
  0, 0, 0,
259
 
260
  NULL, NULL, NULL, NULL, NULL,
261
#ifdef ENABLE_CHECKING
262
  NULL,
263
#endif
264
 
265
};
266
 
267
 
268
/* Return the register decremented and tested in INSN,
269
   or zero if it is not a decrement-and-branch insn.  */
270
 
271
static rtx
272
doloop_register_get (rtx insn ATTRIBUTE_UNUSED)
273
{
274
#ifdef HAVE_doloop_end
275
  rtx pattern, reg, condition;
276
 
277
  if (! JUMP_P (insn))
278
    return NULL_RTX;
279
 
280
  pattern = PATTERN (insn);
281
  condition = doloop_condition_get (pattern);
282
  if (! condition)
283
    return NULL_RTX;
284
 
285
  if (REG_P (XEXP (condition, 0)))
286
    reg = XEXP (condition, 0);
287
  else if (GET_CODE (XEXP (condition, 0)) == PLUS
288
           && REG_P (XEXP (XEXP (condition, 0), 0)))
289
    reg = XEXP (XEXP (condition, 0), 0);
290
  else
291
    gcc_unreachable ();
292
 
293
  return reg;
294
#else
295
  return NULL_RTX;
296
#endif
297
}
298
 
299
/* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
300
   that the number of iterations is a compile-time constant.  If so,
301
   return the rtx that sets COUNT_REG to a constant, and set COUNT to
302
   this constant.  Otherwise return 0.  */
303
static rtx
304
const_iteration_count (rtx count_reg, basic_block pre_header,
305
                       HOST_WIDEST_INT * count)
306
{
307
  rtx insn;
308
  rtx head, tail;
309
 
310
  if (! pre_header)
311
    return NULL_RTX;
312
 
313
  get_ebb_head_tail (pre_header, pre_header, &head, &tail);
314
 
315
  for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
316
    if (INSN_P (insn) && single_set (insn) &&
317
        rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
318
      {
319
        rtx pat = single_set (insn);
320
 
321
        if (GET_CODE (SET_SRC (pat)) == CONST_INT)
322
          {
323
            *count = INTVAL (SET_SRC (pat));
324
            return insn;
325
          }
326
 
327
        return NULL_RTX;
328
      }
329
 
330
  return NULL_RTX;
331
}
332
 
333
/* A very simple resource-based lower bound on the initiation interval.
334
   ??? Improve the accuracy of this bound by considering the
335
   utilization of various units.  */
336
static int
337
res_MII (ddg_ptr g)
338
{
339
  return (g->num_nodes / issue_rate);
340
}
341
 
342
 
343
/* Points to the array that contains the sched data for each node.  */
344
static node_sched_params_ptr node_sched_params;
345
 
346
/* Allocate sched_params for each node and initialize it.  Assumes that
347
   the aux field of each node contain the asap bound (computed earlier),
348
   and copies it into the sched_params field.  */
349
static void
350
set_node_sched_params (ddg_ptr g)
351
{
352
  int i;
353
 
354
  /* Allocate for each node in the DDG a place to hold the "sched_data".  */
355
  /* Initialize ASAP/ALAP/HIGHT to zero.  */
356
  node_sched_params = (node_sched_params_ptr)
357
                       xcalloc (g->num_nodes,
358
                                sizeof (struct node_sched_params));
359
 
360
  /* Set the pointer of the general data of the node to point to the
361
     appropriate sched_params structure.  */
362
  for (i = 0; i < g->num_nodes; i++)
363
    {
364
      /* Watch out for aliasing problems?  */
365
      node_sched_params[i].asap = g->nodes[i].aux.count;
366
      g->nodes[i].aux.info = &node_sched_params[i];
367
    }
368
}
369
 
370
static void
371
print_node_sched_params (FILE * file, int num_nodes)
372
{
373
  int i;
374
 
375
  if (! file)
376
    return;
377
  for (i = 0; i < num_nodes; i++)
378
    {
379
      node_sched_params_ptr nsp = &node_sched_params[i];
380
      rtx reg_move = nsp->first_reg_move;
381
      int j;
382
 
383
      fprintf (file, "Node %d:\n", i);
384
      fprintf (file, " asap = %d:\n", nsp->asap);
385
      fprintf (file, " time = %d:\n", nsp->time);
386
      fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
387
      for (j = 0; j < nsp->nreg_moves; j++)
388
        {
389
          fprintf (file, " reg_move = ");
390
          print_rtl_single (file, reg_move);
391
          reg_move = PREV_INSN (reg_move);
392
        }
393
    }
394
}
395
 
396
/* Calculate an upper bound for II.  SMS should not schedule the loop if it
397
   requires more cycles than this bound.  Currently set to the sum of the
398
   longest latency edge for each node.  Reset based on experiments.  */
399
static int
400
calculate_maxii (ddg_ptr g)
401
{
402
  int i;
403
  int maxii = 0;
404
 
405
  for (i = 0; i < g->num_nodes; i++)
406
    {
407
      ddg_node_ptr u = &g->nodes[i];
408
      ddg_edge_ptr e;
409
      int max_edge_latency = 0;
410
 
411
      for (e = u->out; e; e = e->next_out)
412
        max_edge_latency = MAX (max_edge_latency, e->latency);
413
 
414
      maxii += max_edge_latency;
415
    }
416
  return maxii;
417
}
418
 
419
/*
420
   Breaking intra-loop register anti-dependences:
421
   Each intra-loop register anti-dependence implies a cross-iteration true
422
   dependence of distance 1. Therefore, we can remove such false dependencies
423
   and figure out if the partial schedule broke them by checking if (for a
424
   true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
425
   if so generate a register move.   The number of such moves is equal to:
426
              SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
427
   nreg_moves = ----------------------------------- + 1 - {   dependence.
428
                            ii                          { 1 if not.
429
*/
430
static struct undo_replace_buff_elem *
431
generate_reg_moves (partial_schedule_ptr ps)
432
{
433
  ddg_ptr g = ps->g;
434
  int ii = ps->ii;
435
  int i;
436
  struct undo_replace_buff_elem *reg_move_replaces = NULL;
437
 
438
  for (i = 0; i < g->num_nodes; i++)
439
    {
440
      ddg_node_ptr u = &g->nodes[i];
441
      ddg_edge_ptr e;
442
      int nreg_moves = 0, i_reg_move;
443
      sbitmap *uses_of_defs;
444
      rtx last_reg_move;
445
      rtx prev_reg, old_reg;
446
 
447
      /* Compute the number of reg_moves needed for u, by looking at life
448
         ranges started at u (excluding self-loops).  */
449
      for (e = u->out; e; e = e->next_out)
450
        if (e->type == TRUE_DEP && e->dest != e->src)
451
          {
452
            int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
453
 
454
            if (e->distance == 1)
455
              nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
456
 
457
            /* If dest precedes src in the schedule of the kernel, then dest
458
               will read before src writes and we can save one reg_copy.  */
459
            if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
460
                && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
461
              nreg_moves4e--;
462
 
463
            nreg_moves = MAX (nreg_moves, nreg_moves4e);
464
          }
465
 
466
      if (nreg_moves == 0)
467
        continue;
468
 
469
      /* Every use of the register defined by node may require a different
470
         copy of this register, depending on the time the use is scheduled.
471
         Set a bitmap vector, telling which nodes use each copy of this
472
         register.  */
473
      uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
474
      sbitmap_vector_zero (uses_of_defs, nreg_moves);
475
      for (e = u->out; e; e = e->next_out)
476
        if (e->type == TRUE_DEP && e->dest != e->src)
477
          {
478
            int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
479
 
480
            if (e->distance == 1)
481
              dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
482
 
483
            if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
484
                && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
485
              dest_copy--;
486
 
487
            if (dest_copy)
488
              SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
489
          }
490
 
491
      /* Now generate the reg_moves, attaching relevant uses to them.  */
492
      SCHED_NREG_MOVES (u) = nreg_moves;
493
      old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
494
      last_reg_move = u->insn;
495
 
496
      for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
497
        {
498
          unsigned int i_use = 0;
499
          rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
500
          rtx reg_move = gen_move_insn (new_reg, prev_reg);
501
          sbitmap_iterator sbi;
502
 
503
          add_insn_before (reg_move, last_reg_move);
504
          last_reg_move = reg_move;
505
 
506
          if (!SCHED_FIRST_REG_MOVE (u))
507
            SCHED_FIRST_REG_MOVE (u) = reg_move;
508
 
509
          EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
510
            {
511
              struct undo_replace_buff_elem *rep;
512
 
513
              rep = (struct undo_replace_buff_elem *)
514
                    xcalloc (1, sizeof (struct undo_replace_buff_elem));
515
              rep->insn = g->nodes[i_use].insn;
516
              rep->orig_reg = old_reg;
517
              rep->new_reg = new_reg;
518
 
519
              if (! reg_move_replaces)
520
                reg_move_replaces = rep;
521
              else
522
                {
523
                  rep->next = reg_move_replaces;
524
                  reg_move_replaces = rep;
525
                }
526
 
527
              replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
528
            }
529
 
530
          prev_reg = new_reg;
531
        }
532
      sbitmap_vector_free (uses_of_defs);
533
    }
534
  return reg_move_replaces;
535
}
536
 
537
/* We call this when we want to undo the SMS schedule for a given loop.
538
   One of the things that we do is to delete the register moves generated
539
   for the sake of SMS; this function deletes the register move instructions
540
   recorded in the undo buffer.  */
541
static void
542
undo_generate_reg_moves (partial_schedule_ptr ps,
543
                         struct undo_replace_buff_elem *reg_move_replaces)
544
{
545
  int i,j;
546
 
547
  for (i = 0; i < ps->g->num_nodes; i++)
548
    {
549
      ddg_node_ptr u = &ps->g->nodes[i];
550
      rtx prev;
551
      rtx crr = SCHED_FIRST_REG_MOVE (u);
552
 
553
      for (j = 0; j < SCHED_NREG_MOVES (u); j++)
554
        {
555
          prev = PREV_INSN (crr);
556
          delete_insn (crr);
557
          crr = prev;
558
        }
559
      SCHED_FIRST_REG_MOVE (u) = NULL_RTX;
560
    }
561
 
562
  while (reg_move_replaces)
563
    {
564
      struct undo_replace_buff_elem *rep = reg_move_replaces;
565
 
566
      reg_move_replaces = reg_move_replaces->next;
567
      replace_rtx (rep->insn, rep->new_reg, rep->orig_reg);
568
    }
569
}
570
 
571
/* Free memory allocated for the undo buffer.  */
572
static void
573
free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
574
{
575
 
576
  while (reg_move_replaces)
577
    {
578
      struct undo_replace_buff_elem *rep = reg_move_replaces;
579
 
580
      reg_move_replaces = reg_move_replaces->next;
581
      free (rep);
582
    }
583
}
584
 
585
/* Bump the SCHED_TIMEs of all nodes to start from zero.  Set the values
586
   of SCHED_ROW and SCHED_STAGE.  */
587
static void
588
normalize_sched_times (partial_schedule_ptr ps)
589
{
590
  int i;
591
  ddg_ptr g = ps->g;
592
  int amount = PS_MIN_CYCLE (ps);
593
  int ii = ps->ii;
594
 
595
  /* Don't include the closing branch assuming that it is the last node.  */
596
  for (i = 0; i < g->num_nodes - 1; i++)
597
    {
598
      ddg_node_ptr u = &g->nodes[i];
599
      int normalized_time = SCHED_TIME (u) - amount;
600
 
601
      gcc_assert (normalized_time >= 0);
602
 
603
      SCHED_TIME (u) = normalized_time;
604
      SCHED_ROW (u) = normalized_time % ii;
605
      SCHED_STAGE (u) = normalized_time / ii;
606
    }
607
}
608
 
609
/* Set SCHED_COLUMN of each node according to its position in PS.  */
610
static void
611
set_columns_for_ps (partial_schedule_ptr ps)
612
{
613
  int row;
614
 
615
  for (row = 0; row < ps->ii; row++)
616
    {
617
      ps_insn_ptr cur_insn = ps->rows[row];
618
      int column = 0;
619
 
620
      for (; cur_insn; cur_insn = cur_insn->next_in_row)
621
        SCHED_COLUMN (cur_insn->node) = column++;
622
    }
623
}
624
 
625
/* Permute the insns according to their order in PS, from row 0 to
626
   row ii-1, and position them right before LAST.  This schedules
627
   the insns of the loop kernel.  */
628
static void
629
permute_partial_schedule (partial_schedule_ptr ps, rtx last)
630
{
631
  int ii = ps->ii;
632
  int row;
633
  ps_insn_ptr ps_ij;
634
 
635
  for (row = 0; row < ii ; row++)
636
    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
637
      if (PREV_INSN (last) != ps_ij->node->insn)
638
        reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
639
                            PREV_INSN (last));
640
}
641
 
642
/* As part of undoing SMS we return to the original ordering of the
643
   instructions inside the loop kernel.  Given the partial schedule PS, this
644
   function returns the ordering of the instruction according to their CUID
645
   in the DDG (PS->G), which is the original order of the instruction before
646
   performing SMS.  */
647
static void
648
undo_permute_partial_schedule (partial_schedule_ptr ps, rtx last)
649
{
650
  int i;
651
 
652
  for (i = 0 ; i < ps->g->num_nodes; i++)
653
    if (last == ps->g->nodes[i].insn
654
        || last == ps->g->nodes[i].first_note)
655
      break;
656
    else if (PREV_INSN (last) != ps->g->nodes[i].insn)
657
      reorder_insns_nobb (ps->g->nodes[i].first_note, ps->g->nodes[i].insn,
658
                          PREV_INSN (last));
659
}
660
 
661
/* Used to generate the prologue & epilogue.  Duplicate the subset of
662
   nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive
663
   of both), together with a prefix/suffix of their reg_moves.  */
664
static void
665
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
666
                           int to_stage, int for_prolog)
667
{
668
  int row;
669
  ps_insn_ptr ps_ij;
670
 
671
  for (row = 0; row < ps->ii; row++)
672
    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
673
      {
674
        ddg_node_ptr u_node = ps_ij->node;
675
        int j, i_reg_moves;
676
        rtx reg_move = NULL_RTX;
677
 
678
        if (for_prolog)
679
          {
680
            /* SCHED_STAGE (u_node) >= from_stage == 0.  Generate increasing
681
               number of reg_moves starting with the second occurrence of
682
               u_node, which is generated if its SCHED_STAGE <= to_stage.  */
683
            i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
684
            i_reg_moves = MAX (i_reg_moves, 0);
685
            i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
686
 
687
            /* The reg_moves start from the *first* reg_move backwards.  */
688
            if (i_reg_moves)
689
              {
690
                reg_move = SCHED_FIRST_REG_MOVE (u_node);
691
                for (j = 1; j < i_reg_moves; j++)
692
                  reg_move = PREV_INSN (reg_move);
693
              }
694
          }
695
        else /* It's for the epilog.  */
696
          {
697
            /* SCHED_STAGE (u_node) <= to_stage.  Generate all reg_moves,
698
               starting to decrease one stage after u_node no longer occurs;
699
               that is, generate all reg_moves until
700
               SCHED_STAGE (u_node) == from_stage - 1.  */
701
            i_reg_moves = SCHED_NREG_MOVES (u_node)
702
                       - (from_stage - SCHED_STAGE (u_node) - 1);
703
            i_reg_moves = MAX (i_reg_moves, 0);
704
            i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
705
 
706
            /* The reg_moves start from the *last* reg_move forwards.  */
707
            if (i_reg_moves)
708
              {
709
                reg_move = SCHED_FIRST_REG_MOVE (u_node);
710
                for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
711
                  reg_move = PREV_INSN (reg_move);
712
              }
713
          }
714
 
715
        for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
716
          emit_insn (copy_rtx (PATTERN (reg_move)));
717
        if (SCHED_STAGE (u_node) >= from_stage
718
            && SCHED_STAGE (u_node) <= to_stage)
719
          duplicate_insn_chain (u_node->first_note, u_node->insn);
720
      }
721
}
722
 
723
 
724
/* Generate the instructions (including reg_moves) for prolog & epilog.  */
725
static void
726
generate_prolog_epilog (partial_schedule_ptr ps, struct loop * loop, rtx count_reg)
727
{
728
  int i;
729
  int last_stage = PS_STAGE_COUNT (ps) - 1;
730
  edge e;
731
 
732
  /* Generate the prolog, inserting its insns on the loop-entry edge.  */
733
  start_sequence ();
734
 
735
  if (count_reg)
736
   /* Generate a subtract instruction at the beginning of the prolog to
737
      adjust the loop count by STAGE_COUNT.  */
738
   emit_insn (gen_sub2_insn (count_reg, GEN_INT (last_stage)));
739
 
740
  for (i = 0; i < last_stage; i++)
741
    duplicate_insns_of_cycles (ps, 0, i, 1);
742
 
743
  /* Put the prolog ,  on the one and only entry edge.  */
744
  e = loop_preheader_edge (loop);
745
  loop_split_edge_with(e , get_insns());
746
 
747
  end_sequence ();
748
 
749
  /* Generate the epilog, inserting its insns on the loop-exit edge.  */
750
  start_sequence ();
751
 
752
  for (i = 0; i < last_stage; i++)
753
    duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
754
 
755
  /* Put the epilogue on the one and only one exit edge.  */
756
  gcc_assert (loop->single_exit);
757
  e = loop->single_exit;
758
  loop_split_edge_with(e , get_insns());
759
  end_sequence ();
760
}
761
 
762
/* Return the line note insn preceding INSN, for debugging.  Taken from
763
   emit-rtl.c.  */
764
static rtx
765
find_line_note (rtx insn)
766
{
767
  for (; insn; insn = PREV_INSN (insn))
768
    if (NOTE_P (insn)
769
        && NOTE_LINE_NUMBER (insn) >= 0)
770
      break;
771
 
772
  return insn;
773
}
774
 
775
/* Return true if all the BBs of the loop are empty except the
776
   loop header.  */
777
static bool
778
loop_single_full_bb_p (struct loop *loop)
779
{
780
  unsigned i;
781
  basic_block *bbs = get_loop_body (loop);
782
 
783
  for (i = 0; i < loop->num_nodes ; i++)
784
    {
785
      rtx head, tail;
786
      bool empty_bb = true;
787
 
788
      if (bbs[i] == loop->header)
789
        continue;
790
 
791
      /* Make sure that basic blocks other than the header
792
         have only notes labels or jumps.  */
793
      get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
794
      for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
795
        {
796
          if (NOTE_P (head) || LABEL_P (head)
797
              || (INSN_P (head) && JUMP_P (head)))
798
            continue;
799
          empty_bb = false;
800
          break;
801
        }
802
 
803
      if (! empty_bb)
804
        {
805
          free (bbs);
806
          return false;
807
        }
808
    }
809
  free (bbs);
810
  return true;
811
}
812
 
813
/* A simple loop from SMS point of view; it is a loop that is composed of
814
   either a single basic block or two BBs - a header and a latch.  */
815
#define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 )                     \
816
                                  && (EDGE_COUNT (loop->latch->preds) == 1) \
817
                                  && (EDGE_COUNT (loop->latch->succs) == 1))
818
 
819
/* Return true if the loop is in its canonical form and false if not.
820
   i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
821
static bool
822
loop_canon_p (struct loop *loop)
823
{
824
 
825
  if (loop->inner || ! loop->outer)
826
    return false;
827
 
828
  if (!loop->single_exit)
829
    {
830
      if (dump_file)
831
        {
832
          rtx line_note = find_line_note (BB_END (loop->header));
833
 
834
          fprintf (dump_file, "SMS loop many exits ");
835
          if (line_note)
836
            {
837
              expanded_location xloc;
838
              NOTE_EXPANDED_LOCATION (xloc, line_note);
839
              fprintf (dump_file, " %s %d (file, line)\n",
840
                       xloc.file, xloc.line);
841
            }
842
        }
843
      return false;
844
    }
845
 
846
  if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
847
    {
848
      if (dump_file)
849
        {
850
          rtx line_note = find_line_note (BB_END (loop->header));
851
 
852
          fprintf (dump_file, "SMS loop many BBs. ");
853
          if (line_note)
854
            {
855
              expanded_location xloc;
856
              NOTE_EXPANDED_LOCATION (xloc, line_note);
857
              fprintf (dump_file, " %s %d (file, line)\n",
858
                       xloc.file, xloc.line);
859
            }
860
        }
861
      return false;
862
    }
863
 
864
    return true;
865
}
866
 
867
/* If there are more than one entry for the loop,
868
   make it one by splitting the first entry edge and
869
   redirecting the others to the new BB.  */
870
static void
871
canon_loop (struct loop *loop)
872
{
873
  edge e;
874
  edge_iterator i;
875
 
876
  /* Avoid annoying special cases of edges going to exit
877
     block.  */
878
  FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
879
    if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
880
      loop_split_edge_with (e, NULL_RTX);
881
 
882
  if (loop->latch == loop->header
883
      || EDGE_COUNT (loop->latch->succs) > 1)
884
    {
885
      FOR_EACH_EDGE (e, i, loop->header->preds)
886
        if (e->src == loop->latch)
887
          break;
888
      loop_split_edge_with (e, NULL_RTX);
889
    }
890
}
891
 
892
/* Main entry point, perform SMS scheduling on the loops of the function
893
   that consist of single basic blocks.  */
894
static void
895
sms_schedule (void)
896
{
897
  static int passes = 0;
898
  rtx insn;
899
  ddg_ptr *g_arr, g;
900
  int * node_order;
901
  int maxii;
902
  unsigned i,num_loops;
903
  partial_schedule_ptr ps;
904
  struct df *df;
905
  struct loops *loops;
906
  basic_block bb = NULL;
907
  /* vars to the versioning only if needed*/
908
  struct loop * nloop;
909
  basic_block condition_bb = NULL;
910
  edge latch_edge;
911
  gcov_type trip_count = 0;
912
 
913
  loops = loop_optimizer_init (LOOPS_HAVE_PREHEADERS
914
                               | LOOPS_HAVE_MARKED_SINGLE_EXITS);
915
  if (!loops)
916
    return;  /* There is no loops to schedule.  */
917
 
918
  /* Initialize issue_rate.  */
919
  if (targetm.sched.issue_rate)
920
    {
921
      int temp = reload_completed;
922
 
923
      reload_completed = 1;
924
      issue_rate = targetm.sched.issue_rate ();
925
      reload_completed = temp;
926
    }
927
  else
928
    issue_rate = 1;
929
 
930
  /* Initialize the scheduler.  */
931
  current_sched_info = &sms_sched_info;
932
  sched_init ();
933
 
934
  /* Init Data Flow analysis, to be used in interloop dep calculation.  */
935
  df = df_init (DF_HARD_REGS | DF_EQUIV_NOTES | DF_SUBREGS);
936
  df_rd_add_problem (df, 0);
937
  df_ru_add_problem (df, 0);
938
  df_chain_add_problem (df, DF_DU_CHAIN | DF_UD_CHAIN);
939
  df_analyze (df);
940
 
941
  if (dump_file)
942
    df_dump (df, dump_file);
943
 
944
  /* Allocate memory to hold the DDG array one entry for each loop.
945
     We use loop->num as index into this array.  */
946
  g_arr = XCNEWVEC (ddg_ptr, loops->num);
947
 
948
 
949
  /* Build DDGs for all the relevant loops and hold them in G_ARR
950
     indexed by the loop index.  */
951
  for (i = 0; i < loops->num; i++)
952
    {
953
      rtx head, tail;
954
      rtx count_reg;
955
      struct loop *loop = loops->parray[i];
956
 
957
      /* For debugging.  */
958
      if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
959
        {
960
          if (dump_file)
961
            fprintf (dump_file, "SMS reached MAX_PASSES... \n");
962
 
963
          break;
964
        }
965
 
966
      if (! loop_canon_p (loop))
967
        continue;
968
 
969
      if (! loop_single_full_bb_p (loop))
970
        continue;
971
 
972
      bb = loop->header;
973
 
974
      get_ebb_head_tail (bb, bb, &head, &tail);
975
      latch_edge = loop_latch_edge (loop);
976
      gcc_assert (loop->single_exit);
977
      if (loop->single_exit->count)
978
        trip_count = latch_edge->count / loop->single_exit->count;
979
 
980
      /* Perfrom SMS only on loops that their average count is above threshold.  */
981
 
982
      if ( latch_edge->count
983
          && (latch_edge->count < loop->single_exit->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
984
        {
985
          if (dump_file)
986
            {
987
              rtx line_note = find_line_note (tail);
988
 
989
              if (line_note)
990
                {
991
                  expanded_location xloc;
992
                  NOTE_EXPANDED_LOCATION (xloc, line_note);
993
                  fprintf (dump_file, "SMS bb %s %d (file, line)\n",
994
                           xloc.file, xloc.line);
995
                }
996
              fprintf (dump_file, "SMS single-bb-loop\n");
997
              if (profile_info && flag_branch_probabilities)
998
                {
999
                  fprintf (dump_file, "SMS loop-count ");
1000
                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1001
                           (HOST_WIDEST_INT) bb->count);
1002
                  fprintf (dump_file, "\n");
1003
                  fprintf (dump_file, "SMS trip-count ");
1004
                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1005
                           (HOST_WIDEST_INT) trip_count);
1006
                  fprintf (dump_file, "\n");
1007
                  fprintf (dump_file, "SMS profile-sum-max ");
1008
                  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1009
                           (HOST_WIDEST_INT) profile_info->sum_max);
1010
                  fprintf (dump_file, "\n");
1011
                }
1012
            }
1013
          continue;
1014
        }
1015
 
1016
      /* Make sure this is a doloop.  */
1017
      if ( !(count_reg = doloop_register_get (tail)))
1018
        continue;
1019
 
1020
      /* Don't handle BBs with calls or barriers, or !single_set insns.  */
1021
      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1022
        if (CALL_P (insn)
1023
            || BARRIER_P (insn)
1024
            || (INSN_P (insn) && !JUMP_P (insn)
1025
                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1026
          break;
1027
 
1028
      if (insn != NEXT_INSN (tail))
1029
        {
1030
          if (dump_file)
1031
            {
1032
              if (CALL_P (insn))
1033
                fprintf (dump_file, "SMS loop-with-call\n");
1034
              else if (BARRIER_P (insn))
1035
                fprintf (dump_file, "SMS loop-with-barrier\n");
1036
              else
1037
                fprintf (dump_file, "SMS loop-with-not-single-set\n");
1038
              print_rtl_single (dump_file, insn);
1039
            }
1040
 
1041
          continue;
1042
        }
1043
 
1044
      if (! (g = create_ddg (bb, df, 0)))
1045
        {
1046
          if (dump_file)
1047
            fprintf (dump_file, "SMS doloop\n");
1048
          continue;
1049
        }
1050
 
1051
      g_arr[i] = g;
1052
    }
1053
 
1054
  /* Release Data Flow analysis data structures.  */
1055
  df_finish (df);
1056
  df = NULL;
1057
 
1058
  /* We don't want to perform SMS on new loops - created by versioning.  */
1059
  num_loops = loops->num;
1060
  /* Go over the built DDGs and perfrom SMS for each one of them.  */
1061
  for (i = 0; i < num_loops; i++)
1062
    {
1063
      rtx head, tail;
1064
      rtx count_reg, count_init;
1065
      int mii, rec_mii;
1066
      unsigned stage_count = 0;
1067
      HOST_WIDEST_INT loop_count = 0;
1068
      struct loop *loop = loops->parray[i];
1069
 
1070
      if (! (g = g_arr[i]))
1071
        continue;
1072
 
1073
      if (dump_file)
1074
        print_ddg (dump_file, g);
1075
 
1076
      get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1077
 
1078
      latch_edge = loop_latch_edge (loop);
1079
      gcc_assert (loop->single_exit);
1080
      if (loop->single_exit->count)
1081
        trip_count = latch_edge->count / loop->single_exit->count;
1082
 
1083
      if (dump_file)
1084
        {
1085
          rtx line_note = find_line_note (tail);
1086
 
1087
          if (line_note)
1088
            {
1089
              expanded_location xloc;
1090
              NOTE_EXPANDED_LOCATION (xloc, line_note);
1091
              fprintf (dump_file, "SMS bb %s %d (file, line)\n",
1092
                       xloc.file, xloc.line);
1093
            }
1094
          fprintf (dump_file, "SMS single-bb-loop\n");
1095
          if (profile_info && flag_branch_probabilities)
1096
            {
1097
              fprintf (dump_file, "SMS loop-count ");
1098
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1099
                       (HOST_WIDEST_INT) bb->count);
1100
              fprintf (dump_file, "\n");
1101
              fprintf (dump_file, "SMS profile-sum-max ");
1102
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1103
                       (HOST_WIDEST_INT) profile_info->sum_max);
1104
              fprintf (dump_file, "\n");
1105
            }
1106
          fprintf (dump_file, "SMS doloop\n");
1107
          fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1108
          fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1109
          fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1110
        }
1111
 
1112
 
1113
      /* In case of th loop have doloop register it gets special
1114
         handling.  */
1115
      count_init = NULL_RTX;
1116
      if ((count_reg = doloop_register_get (tail)))
1117
        {
1118
          basic_block pre_header;
1119
 
1120
          pre_header = loop_preheader_edge (loop)->src;
1121
          count_init = const_iteration_count (count_reg, pre_header,
1122
                                              &loop_count);
1123
        }
1124
      gcc_assert (count_reg);
1125
 
1126
      if (dump_file && count_init)
1127
        {
1128
          fprintf (dump_file, "SMS const-doloop ");
1129
          fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1130
                     loop_count);
1131
          fprintf (dump_file, "\n");
1132
        }
1133
 
1134
      node_order = XNEWVEC (int, g->num_nodes);
1135
 
1136
      mii = 1; /* Need to pass some estimate of mii.  */
1137
      rec_mii = sms_order_nodes (g, mii, node_order);
1138
      mii = MAX (res_MII (g), rec_mii);
1139
      maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100;
1140
 
1141
      if (dump_file)
1142
        fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1143
                 rec_mii, mii, maxii);
1144
 
1145
      /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1146
         ASAP.  */
1147
      set_node_sched_params (g);
1148
 
1149
      ps = sms_schedule_by_order (g, mii, maxii, node_order);
1150
 
1151
      if (ps)
1152
        stage_count = PS_STAGE_COUNT (ps);
1153
 
1154
      /* Stage count of 1 means that there is no interleaving between
1155
         iterations, let the scheduling passes do the job.  */
1156
      if (stage_count < 1
1157
          || (count_init && (loop_count <= stage_count))
1158
          || (flag_branch_probabilities && (trip_count <= stage_count)))
1159
        {
1160
          if (dump_file)
1161
            {
1162
              fprintf (dump_file, "SMS failed... \n");
1163
              fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
1164
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1165
              fprintf (dump_file, ", trip-count=");
1166
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
1167
              fprintf (dump_file, ")\n");
1168
            }
1169
          continue;
1170
        }
1171
      else
1172
        {
1173
          int orig_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1174
          int new_cycles;
1175
          struct undo_replace_buff_elem *reg_move_replaces;
1176
 
1177
          if (dump_file)
1178
            {
1179
              fprintf (dump_file,
1180
                       "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1181
                       stage_count);
1182
              print_partial_schedule (ps, dump_file);
1183
              fprintf (dump_file,
1184
                       "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1185
                       g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1186
            }
1187
 
1188
          /* Set the stage boundaries.  If the DDG is built with closing_branch_deps,
1189
             the closing_branch was scheduled and should appear in the last (ii-1)
1190
             row.  Otherwise, we are free to schedule the branch, and we let nodes
1191
             that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1192
             row; this should reduce stage_count to minimum.  */
1193
          normalize_sched_times (ps);
1194
          rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1195
          set_columns_for_ps (ps);
1196
 
1197
          /* Generate the kernel just to be able to measure its cycles.  */
1198
          permute_partial_schedule (ps, g->closing_branch->first_note);
1199
          reg_move_replaces = generate_reg_moves (ps);
1200
 
1201
          /* Get the number of cycles the new kernel expect to execute in.  */
1202
          new_cycles = kernel_number_of_cycles (BB_HEAD (g->bb), BB_END (g->bb));
1203
 
1204
          /* Get back to the original loop so we can do loop versioning.  */
1205
          undo_permute_partial_schedule (ps, g->closing_branch->first_note);
1206
          if (reg_move_replaces)
1207
            undo_generate_reg_moves (ps, reg_move_replaces);
1208
 
1209
          if ( new_cycles >= orig_cycles)
1210
            {
1211
              /* SMS is not profitable so undo the permutation and reg move generation
1212
                 and return the kernel to its original state.  */
1213
              if (dump_file)
1214
                fprintf (dump_file, "Undoing SMS because it is not profitable.\n");
1215
 
1216
            }
1217
          else
1218
            {
1219
              canon_loop (loop);
1220
 
1221
              /* case the BCT count is not known , Do loop-versioning */
1222
              if (count_reg && ! count_init)
1223
                {
1224
                  rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
1225
                                                 GEN_INT(stage_count));
1226
 
1227
                  nloop = loop_version (loops, loop, comp_rtx, &condition_bb,
1228
                                        true);
1229
                }
1230
 
1231
              /* Set new iteration count of loop kernel.  */
1232
              if (count_reg && count_init)
1233
                SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1234
                                                             - stage_count + 1);
1235
 
1236
              /* Now apply the scheduled kernel to the RTL of the loop.  */
1237
              permute_partial_schedule (ps, g->closing_branch->first_note);
1238
 
1239
              /* Mark this loop as software pipelined so the later
1240
              scheduling passes doesn't touch it.  */
1241
              if (! flag_resched_modulo_sched)
1242
                g->bb->flags |= BB_DISABLE_SCHEDULE;
1243
              /* The life-info is not valid any more.  */
1244
              g->bb->flags |= BB_DIRTY;
1245
 
1246
              reg_move_replaces = generate_reg_moves (ps);
1247
              if (dump_file)
1248
                print_node_sched_params (dump_file, g->num_nodes);
1249
              /* Generate prolog and epilog.  */
1250
              if (count_reg && !count_init)
1251
                generate_prolog_epilog (ps, loop, count_reg);
1252
              else
1253
                generate_prolog_epilog (ps, loop, NULL_RTX);
1254
            }
1255
          free_undo_replace_buff (reg_move_replaces);
1256
        }
1257
 
1258
      free_partial_schedule (ps);
1259
      free (node_sched_params);
1260
      free (node_order);
1261
      free_ddg (g);
1262
    }
1263
 
1264
  free (g_arr);
1265
 
1266
  /* Release scheduler data, needed until now because of DFA.  */
1267
  sched_finish ();
1268
  loop_optimizer_finalize (loops);
1269
}
1270
 
1271
/* The SMS scheduling algorithm itself
1272
   -----------------------------------
1273
   Input: 'O' an ordered list of insns of a loop.
1274
   Output: A scheduling of the loop - kernel, prolog, and epilogue.
1275
 
1276
   'Q' is the empty Set
1277
   'PS' is the partial schedule; it holds the currently scheduled nodes with
1278
        their cycle/slot.
1279
   'PSP' previously scheduled predecessors.
1280
   'PSS' previously scheduled successors.
1281
   't(u)' the cycle where u is scheduled.
1282
   'l(u)' is the latency of u.
1283
   'd(v,u)' is the dependence distance from v to u.
1284
   'ASAP(u)' the earliest time at which u could be scheduled as computed in
1285
             the node ordering phase.
1286
   'check_hardware_resources_conflicts(u, PS, c)'
1287
                             run a trace around cycle/slot through DFA model
1288
                             to check resource conflicts involving instruction u
1289
                             at cycle c given the partial schedule PS.
1290
   'add_to_partial_schedule_at_time(u, PS, c)'
1291
                             Add the node/instruction u to the partial schedule
1292
                             PS at time c.
1293
   'calculate_register_pressure(PS)'
1294
                             Given a schedule of instructions, calculate the register
1295
                             pressure it implies.  One implementation could be the
1296
                             maximum number of overlapping live ranges.
1297
   'maxRP' The maximum allowed register pressure, it is usually derived from the number
1298
           registers available in the hardware.
1299
 
1300
   1. II = MII.
1301
   2. PS = empty list
1302
   3. for each node u in O in pre-computed order
1303
   4.   if (PSP(u) != Q && PSS(u) == Q) then
1304
   5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1305
   6.     start = Early_start; end = Early_start + II - 1; step = 1
1306
   11.  else if (PSP(u) == Q && PSS(u) != Q) then
1307
   12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1308
   13.     start = Late_start; end = Late_start - II + 1; step = -1
1309
   14.  else if (PSP(u) != Q && PSS(u) != Q) then
1310
   15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1311
   16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1312
   17.     start = Early_start;
1313
   18.     end = min(Early_start + II - 1 , Late_start);
1314
   19.     step = 1
1315
   20.     else "if (PSP(u) == Q && PSS(u) == Q)"
1316
   21.    start = ASAP(u); end = start + II - 1; step = 1
1317
   22.  endif
1318
 
1319
   23.  success = false
1320
   24.  for (c = start ; c != end ; c += step)
1321
   25.     if check_hardware_resources_conflicts(u, PS, c) then
1322
   26.       add_to_partial_schedule_at_time(u, PS, c)
1323
   27.       success = true
1324
   28.       break
1325
   29.     endif
1326
   30.  endfor
1327
   31.  if (success == false) then
1328
   32.    II = II + 1
1329
   33.    if (II > maxII) then
1330
   34.       finish - failed to schedule
1331
   35.   endif
1332
   36.    goto 2.
1333
   37.  endif
1334
   38. endfor
1335
   39. if (calculate_register_pressure(PS) > maxRP) then
1336
   40.    goto 32.
1337
   41. endif
1338
   42. compute epilogue & prologue
1339
   43. finish - succeeded to schedule
1340
*/
1341
 
1342
/* A limit on the number of cycles that resource conflicts can span.  ??? Should
1343
   be provided by DFA, and be dependent on the type of insn scheduled.  Currently
1344
   set to 0 to save compile time.  */
1345
#define DFA_HISTORY SMS_DFA_HISTORY
1346
 
1347
/* Given the partial schedule PS, this function calculates and returns the
1348
   cycles in which we can schedule the node with the given index I.
1349
   NOTE: Here we do the backtracking in SMS, in some special cases. We have
1350
   noticed that there are several cases in which we fail    to SMS the loop
1351
   because the sched window of a node is empty    due to tight data-deps. In
1352
   such cases we want to unschedule    some of the predecessors/successors
1353
   until we get non-empty    scheduling window.  It returns -1 if the
1354
   scheduling window is empty and zero otherwise.  */
1355
 
1356
static int
1357
get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
1358
                  sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
1359
{
1360
  int start, step, end;
1361
  ddg_edge_ptr e;
1362
  int u = nodes_order [i];
1363
  ddg_node_ptr u_node = &ps->g->nodes[u];
1364
  sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1365
  sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1366
  sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1367
  sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1368
  int psp_not_empty;
1369
  int pss_not_empty;
1370
 
1371
  /* 1. compute sched window for u (start, end, step).  */
1372
  sbitmap_zero (psp);
1373
  sbitmap_zero (pss);
1374
  psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
1375
  pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
1376
 
1377
  if (psp_not_empty && !pss_not_empty)
1378
    {
1379
      int early_start = INT_MIN;
1380
 
1381
      end = INT_MAX;
1382
      for (e = u_node->in; e != 0; e = e->next_in)
1383
        {
1384
          ddg_node_ptr v_node = e->src;
1385
          if (TEST_BIT (sched_nodes, v_node->cuid))
1386
            {
1387
              int node_st = SCHED_TIME (v_node)
1388
                            + e->latency - (e->distance * ii);
1389
 
1390
              early_start = MAX (early_start, node_st);
1391
 
1392
              if (e->data_type == MEM_DEP)
1393
                end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1394
            }
1395
        }
1396
      start = early_start;
1397
      end = MIN (end, early_start + ii);
1398
      step = 1;
1399
    }
1400
 
1401
  else if (!psp_not_empty && pss_not_empty)
1402
    {
1403
      int late_start = INT_MAX;
1404
 
1405
      end = INT_MIN;
1406
      for (e = u_node->out; e != 0; e = e->next_out)
1407
        {
1408
          ddg_node_ptr v_node = e->dest;
1409
          if (TEST_BIT (sched_nodes, v_node->cuid))
1410
            {
1411
              late_start = MIN (late_start,
1412
                                SCHED_TIME (v_node) - e->latency
1413
                                + (e->distance * ii));
1414
              if (e->data_type == MEM_DEP)
1415
                end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1416
            }
1417
        }
1418
      start = late_start;
1419
      end = MAX (end, late_start - ii);
1420
      step = -1;
1421
    }
1422
 
1423
  else if (psp_not_empty && pss_not_empty)
1424
    {
1425
      int early_start = INT_MIN;
1426
      int late_start = INT_MAX;
1427
 
1428
      start = INT_MIN;
1429
      end = INT_MAX;
1430
      for (e = u_node->in; e != 0; e = e->next_in)
1431
        {
1432
          ddg_node_ptr v_node = e->src;
1433
 
1434
          if (TEST_BIT (sched_nodes, v_node->cuid))
1435
            {
1436
              early_start = MAX (early_start,
1437
                                 SCHED_TIME (v_node) + e->latency
1438
                                 - (e->distance * ii));
1439
              if (e->data_type == MEM_DEP)
1440
                end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1441
            }
1442
        }
1443
      for (e = u_node->out; e != 0; e = e->next_out)
1444
        {
1445
          ddg_node_ptr v_node = e->dest;
1446
 
1447
          if (TEST_BIT (sched_nodes, v_node->cuid))
1448
            {
1449
              late_start = MIN (late_start,
1450
                                SCHED_TIME (v_node) - e->latency
1451
                                + (e->distance * ii));
1452
              if (e->data_type == MEM_DEP)
1453
                start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1454
            }
1455
        }
1456
      start = MAX (start, early_start);
1457
      end = MIN (end, MIN (early_start + ii, late_start + 1));
1458
      step = 1;
1459
    }
1460
  else /* psp is empty && pss is empty.  */
1461
    {
1462
      start = SCHED_ASAP (u_node);
1463
      end = start + ii;
1464
      step = 1;
1465
    }
1466
 
1467
  *start_p = start;
1468
  *step_p = step;
1469
  *end_p = end;
1470
  sbitmap_free (psp);
1471
  sbitmap_free (pss);
1472
 
1473
  if ((start >= end && step == 1) || (start <= end && step == -1))
1474
    return -1;
1475
  else
1476
    return 0;
1477
}
1478
 
1479
/* This function implements the scheduling algorithm for SMS according to the
1480
   above algorithm.  */
1481
static partial_schedule_ptr
1482
sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
1483
{
1484
  int ii = mii;
1485
  int i, c, success;
1486
  int try_again_with_larger_ii = true;
1487
  int num_nodes = g->num_nodes;
1488
  ddg_edge_ptr e;
1489
  int start, end, step; /* Place together into one struct?  */
1490
  sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1491
  sbitmap must_precede = sbitmap_alloc (num_nodes);
1492
  sbitmap must_follow = sbitmap_alloc (num_nodes);
1493
  sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
1494
 
1495
  partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1496
 
1497
  sbitmap_ones (tobe_scheduled);
1498
  sbitmap_zero (sched_nodes);
1499
 
1500
  while ((! sbitmap_equal (tobe_scheduled, sched_nodes)
1501
         || try_again_with_larger_ii ) && ii < maxii)
1502
    {
1503
      int j;
1504
      bool unscheduled_nodes = false;
1505
 
1506
      if (dump_file)
1507
        fprintf(dump_file, "Starting with ii=%d\n", ii);
1508
      if (try_again_with_larger_ii)
1509
        {
1510
          try_again_with_larger_ii = false;
1511
          sbitmap_zero (sched_nodes);
1512
        }
1513
 
1514
      for (i = 0; i < num_nodes; i++)
1515
        {
1516
          int u = nodes_order[i];
1517
          ddg_node_ptr u_node = &ps->g->nodes[u];
1518
          rtx insn = u_node->insn;
1519
 
1520
          if (!INSN_P (insn))
1521
            {
1522
              RESET_BIT (tobe_scheduled, u);
1523
              continue;
1524
            }
1525
 
1526
          if (JUMP_P (insn)) /* Closing branch handled later.  */
1527
            {
1528
              RESET_BIT (tobe_scheduled, u);
1529
              continue;
1530
            }
1531
 
1532
          if (TEST_BIT (sched_nodes, u))
1533
            continue;
1534
 
1535
          /* Try to get non-empty scheduling window.  */
1536
          j = i;
1537
          while (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, &step, &end) < 0
1538
                 && j > 0)
1539
            {
1540
              unscheduled_nodes = true;
1541
              if (TEST_BIT (NODE_PREDECESSORS (u_node), nodes_order[j - 1])
1542
                  || TEST_BIT (NODE_SUCCESSORS (u_node), nodes_order[j - 1]))
1543
                {
1544
                  ps_unschedule_node (ps, &ps->g->nodes[nodes_order[j - 1]]);
1545
                  RESET_BIT (sched_nodes, nodes_order [j - 1]);
1546
                }
1547
              j--;
1548
            }
1549
          if (j < 0)
1550
            {
1551
              /* ??? Try backtracking instead of immediately ii++?  */
1552
              ii++;
1553
              try_again_with_larger_ii = true;
1554
              reset_partial_schedule (ps, ii);
1555
              break;
1556
            }
1557
          /* 2. Try scheduling u in window.  */
1558
          if (dump_file)
1559
            fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
1560
                    u, start, end, step);
1561
 
1562
          /* use must_follow & must_precede bitmaps to determine order
1563
             of nodes within the cycle.  */
1564
          sbitmap_zero (must_precede);
1565
          sbitmap_zero (must_follow);
1566
          for (e = u_node->in; e != 0; e = e->next_in)
1567
            if (TEST_BIT (sched_nodes, e->src->cuid)
1568
                && e->latency == (ii * e->distance)
1569
                && start == SCHED_TIME (e->src))
1570
             SET_BIT (must_precede, e->src->cuid);
1571
 
1572
          for (e = u_node->out; e != 0; e = e->next_out)
1573
            if (TEST_BIT (sched_nodes, e->dest->cuid)
1574
                && e->latency == (ii * e->distance)
1575
                && end == SCHED_TIME (e->dest))
1576
             SET_BIT (must_follow, e->dest->cuid);
1577
 
1578
          success = 0;
1579
          if ((step > 0 && start < end) ||  (step < 0 && start > end))
1580
            for (c = start; c != end; c += step)
1581
              {
1582
                ps_insn_ptr psi;
1583
 
1584
                psi = ps_add_node_check_conflicts (ps, u_node, c,
1585
                                                   must_precede,
1586
                                                   must_follow);
1587
 
1588
                if (psi)
1589
                  {
1590
                    SCHED_TIME (u_node) = c;
1591
                    SET_BIT (sched_nodes, u);
1592
                    success = 1;
1593
                    if (dump_file)
1594
                      fprintf(dump_file, "Schedule in %d\n", c);
1595
                    break;
1596
                  }
1597
              }
1598
          if (!success)
1599
            {
1600
              /* ??? Try backtracking instead of immediately ii++?  */
1601
              ii++;
1602
              try_again_with_larger_ii = true;
1603
              reset_partial_schedule (ps, ii);
1604
              break;
1605
            }
1606
          if (unscheduled_nodes)
1607
            break;
1608
 
1609
          /* ??? If (success), check register pressure estimates.  */
1610
        } /* Continue with next node.  */
1611
    } /* While try_again_with_larger_ii.  */
1612
 
1613
  sbitmap_free (sched_nodes);
1614
  sbitmap_free (must_precede);
1615
  sbitmap_free (must_follow);
1616
  sbitmap_free (tobe_scheduled);
1617
 
1618
  if (ii >= maxii)
1619
    {
1620
      free_partial_schedule (ps);
1621
      ps = NULL;
1622
    }
1623
  return ps;
1624
}
1625
 
1626
 
1627
/* This page implements the algorithm for ordering the nodes of a DDG
1628
   for modulo scheduling, activated through the
1629
   "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
1630
 
1631
#define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1632
#define ASAP(x) (ORDER_PARAMS ((x))->asap)
1633
#define ALAP(x) (ORDER_PARAMS ((x))->alap)
1634
#define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1635
#define MOB(x) (ALAP ((x)) - ASAP ((x)))
1636
#define DEPTH(x) (ASAP ((x)))
1637
 
1638
typedef struct node_order_params * nopa;
1639
 
1640
static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1641
static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1642
static nopa  calculate_order_params (ddg_ptr, int mii);
1643
static int find_max_asap (ddg_ptr, sbitmap);
1644
static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1645
static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1646
 
1647
enum sms_direction {BOTTOMUP, TOPDOWN};
1648
 
1649
struct node_order_params
1650
{
1651
  int asap;
1652
  int alap;
1653
  int height;
1654
};
1655
 
1656
/* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
1657
static void
1658
check_nodes_order (int *node_order, int num_nodes)
1659
{
1660
  int i;
1661
  sbitmap tmp = sbitmap_alloc (num_nodes);
1662
 
1663
  sbitmap_zero (tmp);
1664
 
1665
  for (i = 0; i < num_nodes; i++)
1666
    {
1667
      int u = node_order[i];
1668
 
1669
      gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
1670
 
1671
      SET_BIT (tmp, u);
1672
    }
1673
 
1674
  sbitmap_free (tmp);
1675
}
1676
 
1677
/* Order the nodes of G for scheduling and pass the result in
1678
   NODE_ORDER.  Also set aux.count of each node to ASAP.
1679
   Return the recMII for the given DDG.  */
1680
static int
1681
sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1682
{
1683
  int i;
1684
  int rec_mii = 0;
1685
  ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1686
 
1687
  nopa nops = calculate_order_params (g, mii);
1688
 
1689
  order_nodes_of_sccs (sccs, node_order);
1690
 
1691
  if (sccs->num_sccs > 0)
1692
    /* First SCC has the largest recurrence_length.  */
1693
    rec_mii = sccs->sccs[0]->recurrence_length;
1694
 
1695
  /* Save ASAP before destroying node_order_params.  */
1696
  for (i = 0; i < g->num_nodes; i++)
1697
    {
1698
      ddg_node_ptr v = &g->nodes[i];
1699
      v->aux.count = ASAP (v);
1700
    }
1701
 
1702
  free (nops);
1703
  free_ddg_all_sccs (sccs);
1704
  check_nodes_order (node_order, g->num_nodes);
1705
 
1706
  return rec_mii;
1707
}
1708
 
1709
static void
1710
order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1711
{
1712
  int i, pos = 0;
1713
  ddg_ptr g = all_sccs->ddg;
1714
  int num_nodes = g->num_nodes;
1715
  sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1716
  sbitmap on_path = sbitmap_alloc (num_nodes);
1717
  sbitmap tmp = sbitmap_alloc (num_nodes);
1718
  sbitmap ones = sbitmap_alloc (num_nodes);
1719
 
1720
  sbitmap_zero (prev_sccs);
1721
  sbitmap_ones (ones);
1722
 
1723
  /* Perfrom the node ordering starting from the SCC with the highest recMII.
1724
     For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
1725
  for (i = 0; i < all_sccs->num_sccs; i++)
1726
    {
1727
      ddg_scc_ptr scc = all_sccs->sccs[i];
1728
 
1729
      /* Add nodes on paths from previous SCCs to the current SCC.  */
1730
      find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1731
      sbitmap_a_or_b (tmp, scc->nodes, on_path);
1732
 
1733
      /* Add nodes on paths from the current SCC to previous SCCs.  */
1734
      find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1735
      sbitmap_a_or_b (tmp, tmp, on_path);
1736
 
1737
      /* Remove nodes of previous SCCs from current extended SCC.  */
1738
      sbitmap_difference (tmp, tmp, prev_sccs);
1739
 
1740
      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1741
      /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
1742
    }
1743
 
1744
  /* Handle the remaining nodes that do not belong to any scc.  Each call
1745
     to order_nodes_in_scc handles a single connected component.  */
1746
  while (pos < g->num_nodes)
1747
    {
1748
      sbitmap_difference (tmp, ones, prev_sccs);
1749
      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1750
    }
1751
  sbitmap_free (prev_sccs);
1752
  sbitmap_free (on_path);
1753
  sbitmap_free (tmp);
1754
  sbitmap_free (ones);
1755
}
1756
 
1757
/* MII is needed if we consider backarcs (that do not close recursive cycles).  */
1758
static struct node_order_params *
1759
calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1760
{
1761
  int u;
1762
  int max_asap;
1763
  int num_nodes = g->num_nodes;
1764
  ddg_edge_ptr e;
1765
  /* Allocate a place to hold ordering params for each node in the DDG.  */
1766
  nopa node_order_params_arr;
1767
 
1768
  /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
1769
  node_order_params_arr = (nopa) xcalloc (num_nodes,
1770
                                          sizeof (struct node_order_params));
1771
 
1772
  /* Set the aux pointer of each node to point to its order_params structure.  */
1773
  for (u = 0; u < num_nodes; u++)
1774
    g->nodes[u].aux.info = &node_order_params_arr[u];
1775
 
1776
  /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1777
     calculate ASAP, ALAP, mobility, distance, and height for each node
1778
     in the dependence (direct acyclic) graph.  */
1779
 
1780
  /* We assume that the nodes in the array are in topological order.  */
1781
 
1782
  max_asap = 0;
1783
  for (u = 0; u < num_nodes; u++)
1784
    {
1785
      ddg_node_ptr u_node = &g->nodes[u];
1786
 
1787
      ASAP (u_node) = 0;
1788
      for (e = u_node->in; e; e = e->next_in)
1789
        if (e->distance == 0)
1790
          ASAP (u_node) = MAX (ASAP (u_node),
1791
                               ASAP (e->src) + e->latency);
1792
      max_asap = MAX (max_asap, ASAP (u_node));
1793
    }
1794
 
1795
  for (u = num_nodes - 1; u > -1; u--)
1796
    {
1797
      ddg_node_ptr u_node = &g->nodes[u];
1798
 
1799
      ALAP (u_node) = max_asap;
1800
      HEIGHT (u_node) = 0;
1801
      for (e = u_node->out; e; e = e->next_out)
1802
        if (e->distance == 0)
1803
          {
1804
            ALAP (u_node) = MIN (ALAP (u_node),
1805
                                 ALAP (e->dest) - e->latency);
1806
            HEIGHT (u_node) = MAX (HEIGHT (u_node),
1807
                                   HEIGHT (e->dest) + e->latency);
1808
          }
1809
    }
1810
 
1811
  return node_order_params_arr;
1812
}
1813
 
1814
static int
1815
find_max_asap (ddg_ptr g, sbitmap nodes)
1816
{
1817
  unsigned int u = 0;
1818
  int max_asap = -1;
1819
  int result = -1;
1820
  sbitmap_iterator sbi;
1821
 
1822
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1823
    {
1824
      ddg_node_ptr u_node = &g->nodes[u];
1825
 
1826
      if (max_asap < ASAP (u_node))
1827
        {
1828
          max_asap = ASAP (u_node);
1829
          result = u;
1830
        }
1831
    }
1832
  return result;
1833
}
1834
 
1835
static int
1836
find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1837
{
1838
  unsigned int u = 0;
1839
  int max_hv = -1;
1840
  int min_mob = INT_MAX;
1841
  int result = -1;
1842
  sbitmap_iterator sbi;
1843
 
1844
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1845
    {
1846
      ddg_node_ptr u_node = &g->nodes[u];
1847
 
1848
      if (max_hv < HEIGHT (u_node))
1849
        {
1850
          max_hv = HEIGHT (u_node);
1851
          min_mob = MOB (u_node);
1852
          result = u;
1853
        }
1854
      else if ((max_hv == HEIGHT (u_node))
1855
               && (min_mob > MOB (u_node)))
1856
        {
1857
          min_mob = MOB (u_node);
1858
          result = u;
1859
        }
1860
    }
1861
  return result;
1862
}
1863
 
1864
static int
1865
find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1866
{
1867
  unsigned int u = 0;
1868
  int max_dv = -1;
1869
  int min_mob = INT_MAX;
1870
  int result = -1;
1871
  sbitmap_iterator sbi;
1872
 
1873
  EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
1874
    {
1875
      ddg_node_ptr u_node = &g->nodes[u];
1876
 
1877
      if (max_dv < DEPTH (u_node))
1878
        {
1879
          max_dv = DEPTH (u_node);
1880
          min_mob = MOB (u_node);
1881
          result = u;
1882
        }
1883
      else if ((max_dv == DEPTH (u_node))
1884
               && (min_mob > MOB (u_node)))
1885
        {
1886
          min_mob = MOB (u_node);
1887
          result = u;
1888
        }
1889
    }
1890
  return result;
1891
}
1892
 
1893
/* Places the nodes of SCC into the NODE_ORDER array starting
1894
   at position POS, according to the SMS ordering algorithm.
1895
   NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1896
   the NODE_ORDER array, starting from position zero.  */
1897
static int
1898
order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1899
                    int * node_order, int pos)
1900
{
1901
  enum sms_direction dir;
1902
  int num_nodes = g->num_nodes;
1903
  sbitmap workset = sbitmap_alloc (num_nodes);
1904
  sbitmap tmp = sbitmap_alloc (num_nodes);
1905
  sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1906
  sbitmap predecessors = sbitmap_alloc (num_nodes);
1907
  sbitmap successors = sbitmap_alloc (num_nodes);
1908
 
1909
  sbitmap_zero (predecessors);
1910
  find_predecessors (predecessors, g, nodes_ordered);
1911
 
1912
  sbitmap_zero (successors);
1913
  find_successors (successors, g, nodes_ordered);
1914
 
1915
  sbitmap_zero (tmp);
1916
  if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1917
    {
1918
      sbitmap_copy (workset, tmp);
1919
      dir = BOTTOMUP;
1920
    }
1921
  else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1922
    {
1923
      sbitmap_copy (workset, tmp);
1924
      dir = TOPDOWN;
1925
    }
1926
  else
1927
    {
1928
      int u;
1929
 
1930
      sbitmap_zero (workset);
1931
      if ((u = find_max_asap (g, scc)) >= 0)
1932
        SET_BIT (workset, u);
1933
      dir = BOTTOMUP;
1934
    }
1935
 
1936
  sbitmap_zero (zero_bitmap);
1937
  while (!sbitmap_equal (workset, zero_bitmap))
1938
    {
1939
      int v;
1940
      ddg_node_ptr v_node;
1941
      sbitmap v_node_preds;
1942
      sbitmap v_node_succs;
1943
 
1944
      if (dir == TOPDOWN)
1945
        {
1946
          while (!sbitmap_equal (workset, zero_bitmap))
1947
            {
1948
              v = find_max_hv_min_mob (g, workset);
1949
              v_node = &g->nodes[v];
1950
              node_order[pos++] = v;
1951
              v_node_succs = NODE_SUCCESSORS (v_node);
1952
              sbitmap_a_and_b (tmp, v_node_succs, scc);
1953
 
1954
              /* Don't consider the already ordered successors again.  */
1955
              sbitmap_difference (tmp, tmp, nodes_ordered);
1956
              sbitmap_a_or_b (workset, workset, tmp);
1957
              RESET_BIT (workset, v);
1958
              SET_BIT (nodes_ordered, v);
1959
            }
1960
          dir = BOTTOMUP;
1961
          sbitmap_zero (predecessors);
1962
          find_predecessors (predecessors, g, nodes_ordered);
1963
          sbitmap_a_and_b (workset, predecessors, scc);
1964
        }
1965
      else
1966
        {
1967
          while (!sbitmap_equal (workset, zero_bitmap))
1968
            {
1969
              v = find_max_dv_min_mob (g, workset);
1970
              v_node = &g->nodes[v];
1971
              node_order[pos++] = v;
1972
              v_node_preds = NODE_PREDECESSORS (v_node);
1973
              sbitmap_a_and_b (tmp, v_node_preds, scc);
1974
 
1975
              /* Don't consider the already ordered predecessors again.  */
1976
              sbitmap_difference (tmp, tmp, nodes_ordered);
1977
              sbitmap_a_or_b (workset, workset, tmp);
1978
              RESET_BIT (workset, v);
1979
              SET_BIT (nodes_ordered, v);
1980
            }
1981
          dir = TOPDOWN;
1982
          sbitmap_zero (successors);
1983
          find_successors (successors, g, nodes_ordered);
1984
          sbitmap_a_and_b (workset, successors, scc);
1985
        }
1986
    }
1987
  sbitmap_free (tmp);
1988
  sbitmap_free (workset);
1989
  sbitmap_free (zero_bitmap);
1990
  sbitmap_free (predecessors);
1991
  sbitmap_free (successors);
1992
  return pos;
1993
}
1994
 
1995
 
1996
/* This page contains functions for manipulating partial-schedules during
1997
   modulo scheduling.  */
1998
 
1999
/* Create a partial schedule and allocate a memory to hold II rows.  */
2000
 
2001
static partial_schedule_ptr
2002
create_partial_schedule (int ii, ddg_ptr g, int history)
2003
{
2004
  partial_schedule_ptr ps = XNEW (struct partial_schedule);
2005
  ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2006
  ps->ii = ii;
2007
  ps->history = history;
2008
  ps->min_cycle = INT_MAX;
2009
  ps->max_cycle = INT_MIN;
2010
  ps->g = g;
2011
 
2012
  return ps;
2013
}
2014
 
2015
/* Free the PS_INSNs in rows array of the given partial schedule.
2016
   ??? Consider caching the PS_INSN's.  */
2017
static void
2018
free_ps_insns (partial_schedule_ptr ps)
2019
{
2020
  int i;
2021
 
2022
  for (i = 0; i < ps->ii; i++)
2023
    {
2024
      while (ps->rows[i])
2025
        {
2026
          ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2027
 
2028
          free (ps->rows[i]);
2029
          ps->rows[i] = ps_insn;
2030
        }
2031
      ps->rows[i] = NULL;
2032
    }
2033
}
2034
 
2035
/* Free all the memory allocated to the partial schedule.  */
2036
 
2037
static void
2038
free_partial_schedule (partial_schedule_ptr ps)
2039
{
2040
  if (!ps)
2041
    return;
2042
  free_ps_insns (ps);
2043
  free (ps->rows);
2044
  free (ps);
2045
}
2046
 
2047
/* Clear the rows array with its PS_INSNs, and create a new one with
2048
   NEW_II rows.  */
2049
 
2050
static void
2051
reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2052
{
2053
  if (!ps)
2054
    return;
2055
  free_ps_insns (ps);
2056
  if (new_ii == ps->ii)
2057
    return;
2058
  ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2059
                                                 * sizeof (ps_insn_ptr));
2060
  memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2061
  ps->ii = new_ii;
2062
  ps->min_cycle = INT_MAX;
2063
  ps->max_cycle = INT_MIN;
2064
}
2065
 
2066
/* Prints the partial schedule as an ii rows array, for each rows
2067
   print the ids of the insns in it.  */
2068
void
2069
print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2070
{
2071
  int i;
2072
 
2073
  for (i = 0; i < ps->ii; i++)
2074
    {
2075
      ps_insn_ptr ps_i = ps->rows[i];
2076
 
2077
      fprintf (dump, "\n[CYCLE %d ]: ", i);
2078
      while (ps_i)
2079
        {
2080
          fprintf (dump, "%d, ",
2081
                   INSN_UID (ps_i->node->insn));
2082
          ps_i = ps_i->next_in_row;
2083
        }
2084
    }
2085
}
2086
 
2087
/* Creates an object of PS_INSN and initializes it to the given parameters.  */
2088
static ps_insn_ptr
2089
create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
2090
{
2091
  ps_insn_ptr ps_i = XNEW (struct ps_insn);
2092
 
2093
  ps_i->node = node;
2094
  ps_i->next_in_row = NULL;
2095
  ps_i->prev_in_row = NULL;
2096
  ps_i->row_rest_count = rest_count;
2097
  ps_i->cycle = cycle;
2098
 
2099
  return ps_i;
2100
}
2101
 
2102
 
2103
/* Removes the given PS_INSN from the partial schedule.  Returns false if the
2104
   node is not found in the partial schedule, else returns true.  */
2105
static bool
2106
remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
2107
{
2108
  int row;
2109
 
2110
  if (!ps || !ps_i)
2111
    return false;
2112
 
2113
  row = SMODULO (ps_i->cycle, ps->ii);
2114
  if (! ps_i->prev_in_row)
2115
    {
2116
      if (ps_i != ps->rows[row])
2117
        return false;
2118
 
2119
      ps->rows[row] = ps_i->next_in_row;
2120
      if (ps->rows[row])
2121
        ps->rows[row]->prev_in_row = NULL;
2122
    }
2123
  else
2124
    {
2125
      ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
2126
      if (ps_i->next_in_row)
2127
        ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
2128
    }
2129
  free (ps_i);
2130
  return true;
2131
}
2132
 
2133
/* Unlike what literature describes for modulo scheduling (which focuses
2134
   on VLIW machines) the order of the instructions inside a cycle is
2135
   important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
2136
   where the current instruction should go relative to the already
2137
   scheduled instructions in the given cycle.  Go over these
2138
   instructions and find the first possible column to put it in.  */
2139
static bool
2140
ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2141
                     sbitmap must_precede, sbitmap must_follow)
2142
{
2143
  ps_insn_ptr next_ps_i;
2144
  ps_insn_ptr first_must_follow = NULL;
2145
  ps_insn_ptr last_must_precede = NULL;
2146
  int row;
2147
 
2148
  if (! ps_i)
2149
    return false;
2150
 
2151
  row = SMODULO (ps_i->cycle, ps->ii);
2152
 
2153
  /* Find the first must follow and the last must precede
2154
     and insert the node immediately after the must precede
2155
     but make sure that it there is no must follow after it.  */
2156
  for (next_ps_i = ps->rows[row];
2157
       next_ps_i;
2158
       next_ps_i = next_ps_i->next_in_row)
2159
    {
2160
      if (TEST_BIT (must_follow, next_ps_i->node->cuid)
2161
          && ! first_must_follow)
2162
        first_must_follow = next_ps_i;
2163
      if (TEST_BIT (must_precede, next_ps_i->node->cuid))
2164
        {
2165
          /* If we have already met a node that must follow, then
2166
             there is no possible column.  */
2167
          if (first_must_follow)
2168
            return false;
2169
          else
2170
            last_must_precede = next_ps_i;
2171
        }
2172
    }
2173
 
2174
  /* Now insert the node after INSERT_AFTER_PSI.  */
2175
 
2176
  if (! last_must_precede)
2177
    {
2178
      ps_i->next_in_row = ps->rows[row];
2179
      ps_i->prev_in_row = NULL;
2180
      if (ps_i->next_in_row)
2181
        ps_i->next_in_row->prev_in_row = ps_i;
2182
      ps->rows[row] = ps_i;
2183
    }
2184
  else
2185
    {
2186
      ps_i->next_in_row = last_must_precede->next_in_row;
2187
      last_must_precede->next_in_row = ps_i;
2188
      ps_i->prev_in_row = last_must_precede;
2189
      if (ps_i->next_in_row)
2190
        ps_i->next_in_row->prev_in_row = ps_i;
2191
    }
2192
 
2193
  return true;
2194
}
2195
 
2196
/* Advances the PS_INSN one column in its current row; returns false
2197
   in failure and true in success.  Bit N is set in MUST_FOLLOW if
2198
   the node with cuid N must be come after the node pointed to by
2199
   PS_I when scheduled in the same cycle.  */
2200
static int
2201
ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
2202
                        sbitmap must_follow)
2203
{
2204
  ps_insn_ptr prev, next;
2205
  int row;
2206
  ddg_node_ptr next_node;
2207
 
2208
  if (!ps || !ps_i)
2209
    return false;
2210
 
2211
  row = SMODULO (ps_i->cycle, ps->ii);
2212
 
2213
  if (! ps_i->next_in_row)
2214
    return false;
2215
 
2216
  next_node = ps_i->next_in_row->node;
2217
 
2218
  /* Check if next_in_row is dependent on ps_i, both having same sched
2219
     times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
2220
  if (TEST_BIT (must_follow, next_node->cuid))
2221
    return false;
2222
 
2223
  /* Advance PS_I over its next_in_row in the doubly linked list.  */
2224
  prev = ps_i->prev_in_row;
2225
  next = ps_i->next_in_row;
2226
 
2227
  if (ps_i == ps->rows[row])
2228
    ps->rows[row] = next;
2229
 
2230
  ps_i->next_in_row = next->next_in_row;
2231
 
2232
  if (next->next_in_row)
2233
    next->next_in_row->prev_in_row = ps_i;
2234
 
2235
  next->next_in_row = ps_i;
2236
  ps_i->prev_in_row = next;
2237
 
2238
  next->prev_in_row = prev;
2239
  if (prev)
2240
    prev->next_in_row = next;
2241
 
2242
  return true;
2243
}
2244
 
2245
/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2246
   Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
2247
   set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2248
   before/after (respectively) the node pointed to by PS_I when scheduled
2249
   in the same cycle.  */
2250
static ps_insn_ptr
2251
add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2252
                sbitmap must_precede, sbitmap must_follow)
2253
{
2254
  ps_insn_ptr ps_i;
2255
  int rest_count = 1;
2256
  int row = SMODULO (cycle, ps->ii);
2257
 
2258
  if (ps->rows[row]
2259
      && ps->rows[row]->row_rest_count >= issue_rate)
2260
    return NULL;
2261
 
2262
  if (ps->rows[row])
2263
    rest_count += ps->rows[row]->row_rest_count;
2264
 
2265
  ps_i = create_ps_insn (node, rest_count, cycle);
2266
 
2267
  /* Finds and inserts PS_I according to MUST_FOLLOW and
2268
     MUST_PRECEDE.  */
2269
  if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2270
    {
2271
      free (ps_i);
2272
      return NULL;
2273
    }
2274
 
2275
  return ps_i;
2276
}
2277
 
2278
/* Advance time one cycle.  Assumes DFA is being used.  */
2279
static void
2280
advance_one_cycle (void)
2281
{
2282
  if (targetm.sched.dfa_pre_cycle_insn)
2283
    state_transition (curr_state,
2284
                      targetm.sched.dfa_pre_cycle_insn ());
2285
 
2286
  state_transition (curr_state, NULL);
2287
 
2288
  if (targetm.sched.dfa_post_cycle_insn)
2289
    state_transition (curr_state,
2290
                      targetm.sched.dfa_post_cycle_insn ());
2291
}
2292
 
2293
/* Given the kernel of a loop (from FIRST_INSN to LAST_INSN), finds
2294
   the number of cycles according to DFA that the kernel fits in,
2295
   we use this to check if we done well with SMS after we add
2296
   register moves.  In some cases register moves overhead makes
2297
   it even worse than the original loop.  We want SMS to be performed
2298
   when it gives less cycles after register moves are added.  */
2299
static int
2300
kernel_number_of_cycles (rtx first_insn, rtx last_insn)
2301
{
2302
  int cycles = 0;
2303
  rtx insn;
2304
  int can_issue_more = issue_rate;
2305
 
2306
  state_reset (curr_state);
2307
 
2308
  for (insn = first_insn;
2309
       insn != NULL_RTX && insn != last_insn;
2310
       insn = NEXT_INSN (insn))
2311
    {
2312
      if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
2313
        continue;
2314
 
2315
      /* Check if there is room for the current insn.  */
2316
      if (!can_issue_more || state_dead_lock_p (curr_state))
2317
        {
2318
          cycles ++;
2319
          advance_one_cycle ();
2320
          can_issue_more = issue_rate;
2321
        }
2322
 
2323
        /* Update the DFA state and return with failure if the DFA found
2324
           recource conflicts.  */
2325
      if (state_transition (curr_state, insn) >= 0)
2326
        {
2327
          cycles ++;
2328
          advance_one_cycle ();
2329
          can_issue_more = issue_rate;
2330
        }
2331
 
2332
      if (targetm.sched.variable_issue)
2333
        can_issue_more =
2334
          targetm.sched.variable_issue (sched_dump, sched_verbose,
2335
                                        insn, can_issue_more);
2336
      /* A naked CLOBBER or USE generates no instruction, so don't
2337
         let them consume issue slots.  */
2338
      else if (GET_CODE (PATTERN (insn)) != USE
2339
               && GET_CODE (PATTERN (insn)) != CLOBBER)
2340
        can_issue_more--;
2341
    }
2342
  return cycles;
2343
}
2344
 
2345
/* Checks if PS has resource conflicts according to DFA, starting from
2346
   FROM cycle to TO cycle; returns true if there are conflicts and false
2347
   if there are no conflicts.  Assumes DFA is being used.  */
2348
static int
2349
ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2350
{
2351
  int cycle;
2352
 
2353
  state_reset (curr_state);
2354
 
2355
  for (cycle = from; cycle <= to; cycle++)
2356
    {
2357
      ps_insn_ptr crr_insn;
2358
      /* Holds the remaining issue slots in the current row.  */
2359
      int can_issue_more = issue_rate;
2360
 
2361
      /* Walk through the DFA for the current row.  */
2362
      for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2363
           crr_insn;
2364
           crr_insn = crr_insn->next_in_row)
2365
        {
2366
          rtx insn = crr_insn->node->insn;
2367
 
2368
          if (!INSN_P (insn))
2369
            continue;
2370
 
2371
          /* Check if there is room for the current insn.  */
2372
          if (!can_issue_more || state_dead_lock_p (curr_state))
2373
            return true;
2374
 
2375
          /* Update the DFA state and return with failure if the DFA found
2376
             recource conflicts.  */
2377
          if (state_transition (curr_state, insn) >= 0)
2378
            return true;
2379
 
2380
          if (targetm.sched.variable_issue)
2381
            can_issue_more =
2382
              targetm.sched.variable_issue (sched_dump, sched_verbose,
2383
                                            insn, can_issue_more);
2384
          /* A naked CLOBBER or USE generates no instruction, so don't
2385
             let them consume issue slots.  */
2386
          else if (GET_CODE (PATTERN (insn)) != USE
2387
                   && GET_CODE (PATTERN (insn)) != CLOBBER)
2388
            can_issue_more--;
2389
        }
2390
 
2391
      /* Advance the DFA to the next cycle.  */
2392
      advance_one_cycle ();
2393
    }
2394
  return false;
2395
}
2396
 
2397
/* Checks if the given node causes resource conflicts when added to PS at
2398
   cycle C.  If not the node is added to PS and returned; otherwise zero
2399
   is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2400
   cuid N must be come before/after (respectively) the node pointed to by
2401
   PS_I when scheduled in the same cycle.  */
2402
ps_insn_ptr
2403
ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2404
                             int c, sbitmap must_precede,
2405
                             sbitmap must_follow)
2406
{
2407
  int has_conflicts = 0;
2408
  ps_insn_ptr ps_i;
2409
 
2410
  /* First add the node to the PS, if this succeeds check for
2411
     conflicts, trying different issue slots in the same row.  */
2412
  if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2413
    return NULL; /* Failed to insert the node at the given cycle.  */
2414
 
2415
  has_conflicts = ps_has_conflicts (ps, c, c)
2416
                  || (ps->history > 0
2417
                      && ps_has_conflicts (ps,
2418
                                           c - ps->history,
2419
                                           c + ps->history));
2420
 
2421
  /* Try different issue slots to find one that the given node can be
2422
     scheduled in without conflicts.  */
2423
  while (has_conflicts)
2424
    {
2425
      if (! ps_insn_advance_column (ps, ps_i, must_follow))
2426
        break;
2427
      has_conflicts = ps_has_conflicts (ps, c, c)
2428
                      || (ps->history > 0
2429
                          && ps_has_conflicts (ps,
2430
                                               c - ps->history,
2431
                                               c + ps->history));
2432
    }
2433
 
2434
  if (has_conflicts)
2435
    {
2436
      remove_node_from_ps (ps, ps_i);
2437
      return NULL;
2438
    }
2439
 
2440
  ps->min_cycle = MIN (ps->min_cycle, c);
2441
  ps->max_cycle = MAX (ps->max_cycle, c);
2442
  return ps_i;
2443
}
2444
 
2445
/* Rotate the rows of PS such that insns scheduled at time
2446
   START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
2447
void
2448
rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2449
{
2450
  int i, row, backward_rotates;
2451
  int last_row = ps->ii - 1;
2452
 
2453
  if (start_cycle == 0)
2454
    return;
2455
 
2456
  backward_rotates = SMODULO (start_cycle, ps->ii);
2457
 
2458
  /* Revisit later and optimize this into a single loop.  */
2459
  for (i = 0; i < backward_rotates; i++)
2460
    {
2461
      ps_insn_ptr first_row = ps->rows[0];
2462
 
2463
      for (row = 0; row < last_row; row++)
2464
        ps->rows[row] = ps->rows[row+1];
2465
 
2466
      ps->rows[last_row] = first_row;
2467
    }
2468
 
2469
  ps->max_cycle -= start_cycle;
2470
  ps->min_cycle -= start_cycle;
2471
}
2472
 
2473
/* Remove the node N from the partial schedule PS; because we restart the DFA
2474
   each time we want to check for resource conflicts; this is equivalent to
2475
   unscheduling the node N.  */
2476
static bool
2477
ps_unschedule_node (partial_schedule_ptr ps, ddg_node_ptr n)
2478
{
2479
  ps_insn_ptr ps_i;
2480
  int row = SMODULO (SCHED_TIME (n), ps->ii);
2481
 
2482
  if (row < 0 || row > ps->ii)
2483
    return false;
2484
 
2485
  for (ps_i = ps->rows[row];
2486
       ps_i &&  ps_i->node != n;
2487
       ps_i = ps_i->next_in_row);
2488
  if (!ps_i)
2489
    return false;
2490
 
2491
  return remove_node_from_ps (ps, ps_i);
2492
}
2493
#endif /* INSN_SCHEDULING */
2494
 
2495
static bool
2496
gate_handle_sms (void)
2497
{
2498
  return (optimize > 0 && flag_modulo_sched);
2499
}
2500
 
2501
 
2502
/* Run instruction scheduler.  */
2503
/* Perform SMS module scheduling.  */
2504
static unsigned int
2505
rest_of_handle_sms (void)
2506
{
2507
#ifdef INSN_SCHEDULING
2508
  basic_block bb;
2509
 
2510
  /* We want to be able to create new pseudos.  */
2511
  no_new_pseudos = 0;
2512
  /* Collect loop information to be used in SMS.  */
2513
  cfg_layout_initialize (CLEANUP_UPDATE_LIFE);
2514
  sms_schedule ();
2515
 
2516
  /* Update the life information, because we add pseudos.  */
2517
  max_regno = max_reg_num ();
2518
  allocate_reg_info (max_regno, FALSE, FALSE);
2519
  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
2520
                    (PROP_DEATH_NOTES
2521
                     | PROP_REG_INFO
2522
                     | PROP_KILL_DEAD_CODE
2523
                     | PROP_SCAN_DEAD_CODE));
2524
 
2525
  no_new_pseudos = 1;
2526
 
2527
  /* Finalize layout changes.  */
2528
  FOR_EACH_BB (bb)
2529
    if (bb->next_bb != EXIT_BLOCK_PTR)
2530
      bb->aux = bb->next_bb;
2531
  cfg_layout_finalize ();
2532
  free_dominance_info (CDI_DOMINATORS);
2533
#endif /* INSN_SCHEDULING */
2534
  return 0;
2535
}
2536
 
2537
struct tree_opt_pass pass_sms =
2538
{
2539
  "sms",                                /* name */
2540
  gate_handle_sms,                      /* gate */
2541
  rest_of_handle_sms,                   /* execute */
2542
  NULL,                                 /* sub */
2543
  NULL,                                 /* next */
2544
  0,                                    /* static_pass_number */
2545
  TV_SMS,                               /* tv_id */
2546
  0,                                    /* properties_required */
2547
  0,                                    /* properties_provided */
2548
  0,                                    /* properties_destroyed */
2549
  TODO_dump_func,                       /* todo_flags_start */
2550
  TODO_dump_func |
2551
  TODO_ggc_collect,                     /* todo_flags_finish */
2552
  'm'                                   /* letter */
2553
};
2554
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.