OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [loop-unroll.c] - Blame information for rev 826

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Loop unrolling and peeling.
2
   Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010
3
   Free Software Foundation, Inc.
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it under
8
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 3, or (at your option) any later
10
version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13
WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
#include "config.h"
22
#include "system.h"
23
#include "coretypes.h"
24
#include "tm.h"
25
#include "rtl.h"
26
#include "hard-reg-set.h"
27
#include "obstack.h"
28
#include "basic-block.h"
29
#include "cfgloop.h"
30
#include "cfglayout.h"
31
#include "params.h"
32
#include "output.h"
33
#include "expr.h"
34
#include "hashtab.h"
35
#include "recog.h"
36
 
37
/* This pass performs loop unrolling and peeling.  We only perform these
38
   optimizations on innermost loops (with single exception) because
39
   the impact on performance is greatest here, and we want to avoid
40
   unnecessary code size growth.  The gain is caused by greater sequentiality
41
   of code, better code to optimize for further passes and in some cases
42
   by fewer testings of exit conditions.  The main problem is code growth,
43
   that impacts performance negatively due to effect of caches.
44
 
45
   What we do:
46
 
47
   -- complete peeling of once-rolling loops; this is the above mentioned
48
      exception, as this causes loop to be cancelled completely and
49
      does not cause code growth
50
   -- complete peeling of loops that roll (small) constant times.
51
   -- simple peeling of first iterations of loops that do not roll much
52
      (according to profile feedback)
53
   -- unrolling of loops that roll constant times; this is almost always
54
      win, as we get rid of exit condition tests.
55
   -- unrolling of loops that roll number of times that we can compute
56
      in runtime; we also get rid of exit condition tests here, but there
57
      is the extra expense for calculating the number of iterations
58
   -- simple unrolling of remaining loops; this is performed only if we
59
      are asked to, as the gain is questionable in this case and often
60
      it may even slow down the code
61
   For more detailed descriptions of each of those, see comments at
62
   appropriate function below.
63
 
64
   There is a lot of parameters (defined and described in params.def) that
65
   control how much we unroll/peel.
66
 
67
   ??? A great problem is that we don't have a good way how to determine
68
   how many times we should unroll the loop; the experiments I have made
69
   showed that this choice may affect performance in order of several %.
70
   */
71
 
72
/* Information about induction variables to split.  */
73
 
74
struct iv_to_split
75
{
76
  rtx insn;             /* The insn in that the induction variable occurs.  */
77
  rtx base_var;         /* The variable on that the values in the further
78
                           iterations are based.  */
79
  rtx step;             /* Step of the induction variable.  */
80
  struct iv_to_split *next; /* Next entry in walking order.  */
81
  unsigned n_loc;
82
  unsigned loc[3];      /* Location where the definition of the induction
83
                           variable occurs in the insn.  For example if
84
                           N_LOC is 2, the expression is located at
85
                           XEXP (XEXP (single_set, loc[0]), loc[1]).  */
86
};
87
 
88
/* Information about accumulators to expand.  */
89
 
90
struct var_to_expand
91
{
92
  rtx insn;                        /* The insn in that the variable expansion occurs.  */
93
  rtx reg;                         /* The accumulator which is expanded.  */
94
  VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
95
  struct var_to_expand *next;      /* Next entry in walking order.  */
96
  enum rtx_code op;                /* The type of the accumulation - addition, subtraction
97
                                      or multiplication.  */
98
  int expansion_count;             /* Count the number of expansions generated so far.  */
99
  int reuse_expansion;             /* The expansion we intend to reuse to expand
100
                                      the accumulator.  If REUSE_EXPANSION is 0 reuse
101
                                      the original accumulator.  Else use
102
                                      var_expansions[REUSE_EXPANSION - 1].  */
103
  unsigned accum_pos;              /* The position in which the accumulator is placed in
104
                                      the insn src.  For example in x = x + something
105
                                      accum_pos is 0 while in x = something + x accum_pos
106
                                      is 1.  */
107
};
108
 
109
/* Information about optimization applied in
110
   the unrolled loop.  */
111
 
112
struct opt_info
113
{
114
  htab_t insns_to_split;           /* A hashtable of insns to split.  */
115
  struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
116
  struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
117
  htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
118
                                      to expand.  */
119
  struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
120
  struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
121
  unsigned first_new_block;        /* The first basic block that was
122
                                      duplicated.  */
123
  basic_block loop_exit;           /* The loop exit basic block.  */
124
  basic_block loop_preheader;      /* The loop preheader basic block.  */
125
};
126
 
127
static void decide_unrolling_and_peeling (int);
128
static void peel_loops_completely (int);
129
static void decide_peel_simple (struct loop *, int);
130
static void decide_peel_once_rolling (struct loop *, int);
131
static void decide_peel_completely (struct loop *, int);
132
static void decide_unroll_stupid (struct loop *, int);
133
static void decide_unroll_constant_iterations (struct loop *, int);
134
static void decide_unroll_runtime_iterations (struct loop *, int);
135
static void peel_loop_simple (struct loop *);
136
static void peel_loop_completely (struct loop *);
137
static void unroll_loop_stupid (struct loop *);
138
static void unroll_loop_constant_iterations (struct loop *);
139
static void unroll_loop_runtime_iterations (struct loop *);
140
static struct opt_info *analyze_insns_in_loop (struct loop *);
141
static void opt_info_start_duplication (struct opt_info *);
142
static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
143
static void free_opt_info (struct opt_info *);
144
static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
145
static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
146
static struct iv_to_split *analyze_iv_to_split_insn (rtx);
147
static void expand_var_during_unrolling (struct var_to_expand *, rtx);
148
static void insert_var_expansion_initialization (struct var_to_expand *,
149
                                                 basic_block);
150
static void combine_var_copies_in_loop_exit (struct var_to_expand *,
151
                                             basic_block);
152
static rtx get_expansion (struct var_to_expand *);
153
 
154
/* Unroll and/or peel (depending on FLAGS) LOOPS.  */
155
void
156
unroll_and_peel_loops (int flags)
157
{
158
  struct loop *loop;
159
  bool check;
160
  loop_iterator li;
161
 
162
  /* First perform complete loop peeling (it is almost surely a win,
163
     and affects parameters for further decision a lot).  */
164
  peel_loops_completely (flags);
165
 
166
  /* Now decide rest of unrolling and peeling.  */
167
  decide_unrolling_and_peeling (flags);
168
 
169
  /* Scan the loops, inner ones first.  */
170
  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
171
    {
172
      check = true;
173
      /* And perform the appropriate transformations.  */
174
      switch (loop->lpt_decision.decision)
175
        {
176
        case LPT_PEEL_COMPLETELY:
177
          /* Already done.  */
178
          gcc_unreachable ();
179
        case LPT_PEEL_SIMPLE:
180
          peel_loop_simple (loop);
181
          break;
182
        case LPT_UNROLL_CONSTANT:
183
          unroll_loop_constant_iterations (loop);
184
          break;
185
        case LPT_UNROLL_RUNTIME:
186
          unroll_loop_runtime_iterations (loop);
187
          break;
188
        case LPT_UNROLL_STUPID:
189
          unroll_loop_stupid (loop);
190
          break;
191
        case LPT_NONE:
192
          check = false;
193
          break;
194
        default:
195
          gcc_unreachable ();
196
        }
197
      if (check)
198
        {
199
#ifdef ENABLE_CHECKING
200
          verify_dominators (CDI_DOMINATORS);
201
          verify_loop_structure ();
202
#endif
203
        }
204
    }
205
 
206
  iv_analysis_done ();
207
}
208
 
209
/* Check whether exit of the LOOP is at the end of loop body.  */
210
 
211
static bool
212
loop_exit_at_end_p (struct loop *loop)
213
{
214
  struct niter_desc *desc = get_simple_loop_desc (loop);
215
  rtx insn;
216
 
217
  if (desc->in_edge->dest != loop->latch)
218
    return false;
219
 
220
  /* Check that the latch is empty.  */
221
  FOR_BB_INSNS (loop->latch, insn)
222
    {
223
      if (INSN_P (insn))
224
        return false;
225
    }
226
 
227
  return true;
228
}
229
 
230
/* Depending on FLAGS, check whether to peel loops completely and do so.  */
231
static void
232
peel_loops_completely (int flags)
233
{
234
  struct loop *loop;
235
  loop_iterator li;
236
 
237
  /* Scan the loops, the inner ones first.  */
238
  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
239
    {
240
      loop->lpt_decision.decision = LPT_NONE;
241
 
242
      if (dump_file)
243
        fprintf (dump_file,
244
                 "\n;; *** Considering loop %d for complete peeling ***\n",
245
                 loop->num);
246
 
247
      loop->ninsns = num_loop_insns (loop);
248
 
249
      decide_peel_once_rolling (loop, flags);
250
      if (loop->lpt_decision.decision == LPT_NONE)
251
        decide_peel_completely (loop, flags);
252
 
253
      if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
254
        {
255
          peel_loop_completely (loop);
256
#ifdef ENABLE_CHECKING
257
          verify_dominators (CDI_DOMINATORS);
258
          verify_loop_structure ();
259
#endif
260
        }
261
    }
262
}
263
 
264
/* Decide whether unroll or peel loops (depending on FLAGS) and how much.  */
265
static void
266
decide_unrolling_and_peeling (int flags)
267
{
268
  struct loop *loop;
269
  loop_iterator li;
270
 
271
  /* Scan the loops, inner ones first.  */
272
  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
273
    {
274
      loop->lpt_decision.decision = LPT_NONE;
275
 
276
      if (dump_file)
277
        fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
278
 
279
      /* Do not peel cold areas.  */
280
      if (optimize_loop_for_size_p (loop))
281
        {
282
          if (dump_file)
283
            fprintf (dump_file, ";; Not considering loop, cold area\n");
284
          continue;
285
        }
286
 
287
      /* Can the loop be manipulated?  */
288
      if (!can_duplicate_loop_p (loop))
289
        {
290
          if (dump_file)
291
            fprintf (dump_file,
292
                     ";; Not considering loop, cannot duplicate\n");
293
          continue;
294
        }
295
 
296
      /* Skip non-innermost loops.  */
297
      if (loop->inner)
298
        {
299
          if (dump_file)
300
            fprintf (dump_file, ";; Not considering loop, is not innermost\n");
301
          continue;
302
        }
303
 
304
      loop->ninsns = num_loop_insns (loop);
305
      loop->av_ninsns = average_num_loop_insns (loop);
306
 
307
      /* Try transformations one by one in decreasing order of
308
         priority.  */
309
 
310
      decide_unroll_constant_iterations (loop, flags);
311
      if (loop->lpt_decision.decision == LPT_NONE)
312
        decide_unroll_runtime_iterations (loop, flags);
313
      if (loop->lpt_decision.decision == LPT_NONE)
314
        decide_unroll_stupid (loop, flags);
315
      if (loop->lpt_decision.decision == LPT_NONE)
316
        decide_peel_simple (loop, flags);
317
    }
318
}
319
 
320
/* Decide whether the LOOP is once rolling and suitable for complete
321
   peeling.  */
322
static void
323
decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
324
{
325
  struct niter_desc *desc;
326
 
327
  if (dump_file)
328
    fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
329
 
330
  /* Is the loop small enough?  */
331
  if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
332
    {
333
      if (dump_file)
334
        fprintf (dump_file, ";; Not considering loop, is too big\n");
335
      return;
336
    }
337
 
338
  /* Check for simple loops.  */
339
  desc = get_simple_loop_desc (loop);
340
 
341
  /* Check number of iterations.  */
342
  if (!desc->simple_p
343
      || desc->assumptions
344
      || desc->infinite
345
      || !desc->const_iter
346
      || desc->niter != 0)
347
    {
348
      if (dump_file)
349
        fprintf (dump_file,
350
                 ";; Unable to prove that the loop rolls exactly once\n");
351
      return;
352
    }
353
 
354
  /* Success.  */
355
  if (dump_file)
356
    fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
357
  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
358
}
359
 
360
/* Decide whether the LOOP is suitable for complete peeling.  */
361
static void
362
decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
363
{
364
  unsigned npeel;
365
  struct niter_desc *desc;
366
 
367
  if (dump_file)
368
    fprintf (dump_file, "\n;; Considering peeling completely\n");
369
 
370
  /* Skip non-innermost loops.  */
371
  if (loop->inner)
372
    {
373
      if (dump_file)
374
        fprintf (dump_file, ";; Not considering loop, is not innermost\n");
375
      return;
376
    }
377
 
378
  /* Do not peel cold areas.  */
379
  if (optimize_loop_for_size_p (loop))
380
    {
381
      if (dump_file)
382
        fprintf (dump_file, ";; Not considering loop, cold area\n");
383
      return;
384
    }
385
 
386
  /* Can the loop be manipulated?  */
387
  if (!can_duplicate_loop_p (loop))
388
    {
389
      if (dump_file)
390
        fprintf (dump_file,
391
                 ";; Not considering loop, cannot duplicate\n");
392
      return;
393
    }
394
 
395
  /* npeel = number of iterations to peel.  */
396
  npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
397
  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
398
    npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
399
 
400
  /* Is the loop small enough?  */
401
  if (!npeel)
402
    {
403
      if (dump_file)
404
        fprintf (dump_file, ";; Not considering loop, is too big\n");
405
      return;
406
    }
407
 
408
  /* Check for simple loops.  */
409
  desc = get_simple_loop_desc (loop);
410
 
411
  /* Check number of iterations.  */
412
  if (!desc->simple_p
413
      || desc->assumptions
414
      || !desc->const_iter
415
      || desc->infinite)
416
    {
417
      if (dump_file)
418
        fprintf (dump_file,
419
                 ";; Unable to prove that the loop iterates constant times\n");
420
      return;
421
    }
422
 
423
  if (desc->niter > npeel - 1)
424
    {
425
      if (dump_file)
426
        {
427
          fprintf (dump_file,
428
                   ";; Not peeling loop completely, rolls too much (");
429
          fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
430
          fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
431
        }
432
      return;
433
    }
434
 
435
  /* Success.  */
436
  if (dump_file)
437
    fprintf (dump_file, ";; Decided to peel loop completely\n");
438
  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
439
}
440
 
441
/* Peel all iterations of LOOP, remove exit edges and cancel the loop
442
   completely.  The transformation done:
443
 
444
   for (i = 0; i < 4; i++)
445
     body;
446
 
447
   ==>
448
 
449
   i = 0;
450
   body; i++;
451
   body; i++;
452
   body; i++;
453
   body; i++;
454
   */
455
static void
456
peel_loop_completely (struct loop *loop)
457
{
458
  sbitmap wont_exit;
459
  unsigned HOST_WIDE_INT npeel;
460
  unsigned i;
461
  VEC (edge, heap) *remove_edges;
462
  edge ein;
463
  struct niter_desc *desc = get_simple_loop_desc (loop);
464
  struct opt_info *opt_info = NULL;
465
 
466
  npeel = desc->niter;
467
 
468
  if (npeel)
469
    {
470
      bool ok;
471
 
472
      wont_exit = sbitmap_alloc (npeel + 1);
473
      sbitmap_ones (wont_exit);
474
      RESET_BIT (wont_exit, 0);
475
      if (desc->noloop_assumptions)
476
        RESET_BIT (wont_exit, 1);
477
 
478
      remove_edges = NULL;
479
 
480
      if (flag_split_ivs_in_unroller)
481
        opt_info = analyze_insns_in_loop (loop);
482
 
483
      opt_info_start_duplication (opt_info);
484
      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
485
                                          npeel,
486
                                          wont_exit, desc->out_edge,
487
                                          &remove_edges,
488
                                          DLTHE_FLAG_UPDATE_FREQ
489
                                          | DLTHE_FLAG_COMPLETTE_PEEL
490
                                          | (opt_info
491
                                             ? DLTHE_RECORD_COPY_NUMBER : 0));
492
      gcc_assert (ok);
493
 
494
      free (wont_exit);
495
 
496
      if (opt_info)
497
        {
498
          apply_opt_in_copies (opt_info, npeel, false, true);
499
          free_opt_info (opt_info);
500
        }
501
 
502
      /* Remove the exit edges.  */
503
      for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++)
504
        remove_path (ein);
505
      VEC_free (edge, heap, remove_edges);
506
    }
507
 
508
  ein = desc->in_edge;
509
  free_simple_loop_desc (loop);
510
 
511
  /* Now remove the unreachable part of the last iteration and cancel
512
     the loop.  */
513
  remove_path (ein);
514
 
515
  if (dump_file)
516
    fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
517
}
518
 
519
/* Decide whether to unroll LOOP iterating constant number of times
520
   and how much.  */
521
 
522
static void
523
decide_unroll_constant_iterations (struct loop *loop, int flags)
524
{
525
  unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
526
  struct niter_desc *desc;
527
 
528
  if (!(flags & UAP_UNROLL))
529
    {
530
      /* We were not asked to, just return back silently.  */
531
      return;
532
    }
533
 
534
  if (dump_file)
535
    fprintf (dump_file,
536
             "\n;; Considering unrolling loop with constant "
537
             "number of iterations\n");
538
 
539
  /* nunroll = total number of copies of the original loop body in
540
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
541
  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
542
  nunroll_by_av
543
    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
544
  if (nunroll > nunroll_by_av)
545
    nunroll = nunroll_by_av;
546
  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
547
    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
548
 
549
  /* Skip big loops.  */
550
  if (nunroll <= 1)
551
    {
552
      if (dump_file)
553
        fprintf (dump_file, ";; Not considering loop, is too big\n");
554
      return;
555
    }
556
 
557
  /* Check for simple loops.  */
558
  desc = get_simple_loop_desc (loop);
559
 
560
  /* Check number of iterations.  */
561
  if (!desc->simple_p || !desc->const_iter || desc->assumptions)
562
    {
563
      if (dump_file)
564
        fprintf (dump_file,
565
                 ";; Unable to prove that the loop iterates constant times\n");
566
      return;
567
    }
568
 
569
  /* Check whether the loop rolls enough to consider.  */
570
  if (desc->niter < 2 * nunroll)
571
    {
572
      if (dump_file)
573
        fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
574
      return;
575
    }
576
 
577
  /* Success; now compute number of iterations to unroll.  We alter
578
     nunroll so that as few as possible copies of loop body are
579
     necessary, while still not decreasing the number of unrollings
580
     too much (at most by 1).  */
581
  best_copies = 2 * nunroll + 10;
582
 
583
  i = 2 * nunroll + 2;
584
  if (i - 1 >= desc->niter)
585
    i = desc->niter - 2;
586
 
587
  for (; i >= nunroll - 1; i--)
588
    {
589
      unsigned exit_mod = desc->niter % (i + 1);
590
 
591
      if (!loop_exit_at_end_p (loop))
592
        n_copies = exit_mod + i + 1;
593
      else if (exit_mod != (unsigned) i
594
               || desc->noloop_assumptions != NULL_RTX)
595
        n_copies = exit_mod + i + 2;
596
      else
597
        n_copies = i + 1;
598
 
599
      if (n_copies < best_copies)
600
        {
601
          best_copies = n_copies;
602
          best_unroll = i;
603
        }
604
    }
605
 
606
  if (dump_file)
607
    fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
608
             best_unroll + 1, best_copies, nunroll);
609
 
610
  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
611
  loop->lpt_decision.times = best_unroll;
612
 
613
  if (dump_file)
614
    fprintf (dump_file,
615
             ";; Decided to unroll the constant times rolling loop, %d times.\n",
616
             loop->lpt_decision.times);
617
}
618
 
619
/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
620
   times.  The transformation does this:
621
 
622
   for (i = 0; i < 102; i++)
623
     body;
624
 
625
   ==>
626
 
627
   i = 0;
628
   body; i++;
629
   body; i++;
630
   while (i < 102)
631
     {
632
       body; i++;
633
       body; i++;
634
       body; i++;
635
       body; i++;
636
     }
637
  */
638
static void
639
unroll_loop_constant_iterations (struct loop *loop)
640
{
641
  unsigned HOST_WIDE_INT niter;
642
  unsigned exit_mod;
643
  sbitmap wont_exit;
644
  unsigned i;
645
  VEC (edge, heap) *remove_edges;
646
  edge e;
647
  unsigned max_unroll = loop->lpt_decision.times;
648
  struct niter_desc *desc = get_simple_loop_desc (loop);
649
  bool exit_at_end = loop_exit_at_end_p (loop);
650
  struct opt_info *opt_info = NULL;
651
  bool ok;
652
 
653
  niter = desc->niter;
654
 
655
  /* Should not get here (such loop should be peeled instead).  */
656
  gcc_assert (niter > max_unroll + 1);
657
 
658
  exit_mod = niter % (max_unroll + 1);
659
 
660
  wont_exit = sbitmap_alloc (max_unroll + 1);
661
  sbitmap_ones (wont_exit);
662
 
663
  remove_edges = NULL;
664
  if (flag_split_ivs_in_unroller
665
      || flag_variable_expansion_in_unroller)
666
    opt_info = analyze_insns_in_loop (loop);
667
 
668
  if (!exit_at_end)
669
    {
670
      /* The exit is not at the end of the loop; leave exit test
671
         in the first copy, so that the loops that start with test
672
         of exit condition have continuous body after unrolling.  */
673
 
674
      if (dump_file)
675
        fprintf (dump_file, ";; Condition on beginning of loop.\n");
676
 
677
      /* Peel exit_mod iterations.  */
678
      RESET_BIT (wont_exit, 0);
679
      if (desc->noloop_assumptions)
680
        RESET_BIT (wont_exit, 1);
681
 
682
      if (exit_mod)
683
        {
684
          opt_info_start_duplication (opt_info);
685
          ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
686
                                              exit_mod,
687
                                              wont_exit, desc->out_edge,
688
                                              &remove_edges,
689
                                              DLTHE_FLAG_UPDATE_FREQ
690
                                              | (opt_info && exit_mod > 1
691
                                                 ? DLTHE_RECORD_COPY_NUMBER
692
                                                   : 0));
693
          gcc_assert (ok);
694
 
695
          if (opt_info && exit_mod > 1)
696
            apply_opt_in_copies (opt_info, exit_mod, false, false);
697
 
698
          desc->noloop_assumptions = NULL_RTX;
699
          desc->niter -= exit_mod;
700
          desc->niter_max -= exit_mod;
701
        }
702
 
703
      SET_BIT (wont_exit, 1);
704
    }
705
  else
706
    {
707
      /* Leave exit test in last copy, for the same reason as above if
708
         the loop tests the condition at the end of loop body.  */
709
 
710
      if (dump_file)
711
        fprintf (dump_file, ";; Condition on end of loop.\n");
712
 
713
      /* We know that niter >= max_unroll + 2; so we do not need to care of
714
         case when we would exit before reaching the loop.  So just peel
715
         exit_mod + 1 iterations.  */
716
      if (exit_mod != max_unroll
717
          || desc->noloop_assumptions)
718
        {
719
          RESET_BIT (wont_exit, 0);
720
          if (desc->noloop_assumptions)
721
            RESET_BIT (wont_exit, 1);
722
 
723
          opt_info_start_duplication (opt_info);
724
          ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
725
                                              exit_mod + 1,
726
                                              wont_exit, desc->out_edge,
727
                                              &remove_edges,
728
                                              DLTHE_FLAG_UPDATE_FREQ
729
                                              | (opt_info && exit_mod > 0
730
                                                 ? DLTHE_RECORD_COPY_NUMBER
731
                                                   : 0));
732
          gcc_assert (ok);
733
 
734
          if (opt_info && exit_mod > 0)
735
            apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
736
 
737
          desc->niter -= exit_mod + 1;
738
          desc->niter_max -= exit_mod + 1;
739
          desc->noloop_assumptions = NULL_RTX;
740
 
741
          SET_BIT (wont_exit, 0);
742
          SET_BIT (wont_exit, 1);
743
        }
744
 
745
      RESET_BIT (wont_exit, max_unroll);
746
    }
747
 
748
  /* Now unroll the loop.  */
749
 
750
  opt_info_start_duplication (opt_info);
751
  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
752
                                      max_unroll,
753
                                      wont_exit, desc->out_edge,
754
                                      &remove_edges,
755
                                      DLTHE_FLAG_UPDATE_FREQ
756
                                      | (opt_info
757
                                         ? DLTHE_RECORD_COPY_NUMBER
758
                                           : 0));
759
  gcc_assert (ok);
760
 
761
  if (opt_info)
762
    {
763
      apply_opt_in_copies (opt_info, max_unroll, true, true);
764
      free_opt_info (opt_info);
765
    }
766
 
767
  free (wont_exit);
768
 
769
  if (exit_at_end)
770
    {
771
      basic_block exit_block = get_bb_copy (desc->in_edge->src);
772
      /* Find a new in and out edge; they are in the last copy we have made.  */
773
 
774
      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
775
        {
776
          desc->out_edge = EDGE_SUCC (exit_block, 0);
777
          desc->in_edge = EDGE_SUCC (exit_block, 1);
778
        }
779
      else
780
        {
781
          desc->out_edge = EDGE_SUCC (exit_block, 1);
782
          desc->in_edge = EDGE_SUCC (exit_block, 0);
783
        }
784
    }
785
 
786
  desc->niter /= max_unroll + 1;
787
  desc->niter_max /= max_unroll + 1;
788
  desc->niter_expr = GEN_INT (desc->niter);
789
 
790
  /* Remove the edges.  */
791
  for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
792
    remove_path (e);
793
  VEC_free (edge, heap, remove_edges);
794
 
795
  if (dump_file)
796
    fprintf (dump_file,
797
             ";; Unrolled loop %d times, constant # of iterations %i insns\n",
798
             max_unroll, num_loop_insns (loop));
799
}
800
 
801
/* Decide whether to unroll LOOP iterating runtime computable number of times
802
   and how much.  */
803
static void
804
decide_unroll_runtime_iterations (struct loop *loop, int flags)
805
{
806
  unsigned nunroll, nunroll_by_av, i;
807
  struct niter_desc *desc;
808
 
809
  if (!(flags & UAP_UNROLL))
810
    {
811
      /* We were not asked to, just return back silently.  */
812
      return;
813
    }
814
 
815
  if (dump_file)
816
    fprintf (dump_file,
817
             "\n;; Considering unrolling loop with runtime "
818
             "computable number of iterations\n");
819
 
820
  /* nunroll = total number of copies of the original loop body in
821
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
822
  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
823
  nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
824
  if (nunroll > nunroll_by_av)
825
    nunroll = nunroll_by_av;
826
  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
827
    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
828
 
829
  /* Skip big loops.  */
830
  if (nunroll <= 1)
831
    {
832
      if (dump_file)
833
        fprintf (dump_file, ";; Not considering loop, is too big\n");
834
      return;
835
    }
836
 
837
  /* Check for simple loops.  */
838
  desc = get_simple_loop_desc (loop);
839
 
840
  /* Check simpleness.  */
841
  if (!desc->simple_p || desc->assumptions)
842
    {
843
      if (dump_file)
844
        fprintf (dump_file,
845
                 ";; Unable to prove that the number of iterations "
846
                 "can be counted in runtime\n");
847
      return;
848
    }
849
 
850
  if (desc->const_iter)
851
    {
852
      if (dump_file)
853
        fprintf (dump_file, ";; Loop iterates constant times\n");
854
      return;
855
    }
856
 
857
  /* If we have profile feedback, check whether the loop rolls.  */
858
  if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
859
    {
860
      if (dump_file)
861
        fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
862
      return;
863
    }
864
 
865
  /* Success; now force nunroll to be power of 2, as we are unable to
866
     cope with overflows in computation of number of iterations.  */
867
  for (i = 1; 2 * i <= nunroll; i *= 2)
868
    continue;
869
 
870
  loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
871
  loop->lpt_decision.times = i - 1;
872
 
873
  if (dump_file)
874
    fprintf (dump_file,
875
             ";; Decided to unroll the runtime computable "
876
             "times rolling loop, %d times.\n",
877
             loop->lpt_decision.times);
878
}
879
 
880
/* Splits edge E and inserts the sequence of instructions INSNS on it, and
881
   returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
882
   and NULL is returned instead.  */
883
 
884
basic_block
885
split_edge_and_insert (edge e, rtx insns)
886
{
887
  basic_block bb;
888
 
889
  if (!insns)
890
    return NULL;
891
  bb = split_edge (e);
892
  emit_insn_after (insns, BB_END (bb));
893
 
894
  /* ??? We used to assume that INSNS can contain control flow insns, and
895
     that we had to try to find sub basic blocks in BB to maintain a valid
896
     CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
897
     and call break_superblocks when going out of cfglayout mode.  But it
898
     turns out that this never happens; and that if it does ever happen,
899
     the verify_flow_info call in loop_optimizer_finalize would fail.
900
 
901
     There are two reasons why we expected we could have control flow insns
902
     in INSNS.  The first is when a comparison has to be done in parts, and
903
     the second is when the number of iterations is computed for loops with
904
     the number of iterations known at runtime.  In both cases, test cases
905
     to get control flow in INSNS appear to be impossible to construct:
906
 
907
      * If do_compare_rtx_and_jump needs several branches to do comparison
908
        in a mode that needs comparison by parts, we cannot analyze the
909
        number of iterations of the loop, and we never get to unrolling it.
910
 
911
      * The code in expand_divmod that was suspected to cause creation of
912
        branching code seems to be only accessed for signed division.  The
913
        divisions used by # of iterations analysis are always unsigned.
914
        Problems might arise on architectures that emits branching code
915
        for some operations that may appear in the unroller (especially
916
        for division), but we have no such architectures.
917
 
918
     Considering all this, it was decided that we should for now assume
919
     that INSNS can in theory contain control flow insns, but in practice
920
     it never does.  So we don't handle the theoretical case, and should
921
     a real failure ever show up, we have a pretty good clue for how to
922
     fix it.  */
923
 
924
  return bb;
925
}
926
 
927
/* Unroll LOOP for that we are able to count number of iterations in runtime
928
   LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
929
   extra care for case n < 0):
930
 
931
   for (i = 0; i < n; i++)
932
     body;
933
 
934
   ==>
935
 
936
   i = 0;
937
   mod = n % 4;
938
 
939
   switch (mod)
940
     {
941
       case 3:
942
         body; i++;
943
       case 2:
944
         body; i++;
945
       case 1:
946
         body; i++;
947
       case 0: ;
948
     }
949
 
950
   while (i < n)
951
     {
952
       body; i++;
953
       body; i++;
954
       body; i++;
955
       body; i++;
956
     }
957
   */
958
static void
959
unroll_loop_runtime_iterations (struct loop *loop)
960
{
961
  rtx old_niter, niter, init_code, branch_code, tmp;
962
  unsigned i, j, p;
963
  basic_block preheader, *body, swtch, ezc_swtch;
964
  VEC (basic_block, heap) *dom_bbs;
965
  sbitmap wont_exit;
966
  int may_exit_copy;
967
  unsigned n_peel;
968
  VEC (edge, heap) *remove_edges;
969
  edge e;
970
  bool extra_zero_check, last_may_exit;
971
  unsigned max_unroll = loop->lpt_decision.times;
972
  struct niter_desc *desc = get_simple_loop_desc (loop);
973
  bool exit_at_end = loop_exit_at_end_p (loop);
974
  struct opt_info *opt_info = NULL;
975
  bool ok;
976
 
977
  if (flag_split_ivs_in_unroller
978
      || flag_variable_expansion_in_unroller)
979
    opt_info = analyze_insns_in_loop (loop);
980
 
981
  /* Remember blocks whose dominators will have to be updated.  */
982
  dom_bbs = NULL;
983
 
984
  body = get_loop_body (loop);
985
  for (i = 0; i < loop->num_nodes; i++)
986
    {
987
      VEC (basic_block, heap) *ldom;
988
      basic_block bb;
989
 
990
      ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
991
      for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++)
992
        if (!flow_bb_inside_loop_p (loop, bb))
993
          VEC_safe_push (basic_block, heap, dom_bbs, bb);
994
 
995
      VEC_free (basic_block, heap, ldom);
996
    }
997
  free (body);
998
 
999
  if (!exit_at_end)
1000
    {
1001
      /* Leave exit in first copy (for explanation why see comment in
1002
         unroll_loop_constant_iterations).  */
1003
      may_exit_copy = 0;
1004
      n_peel = max_unroll - 1;
1005
      extra_zero_check = true;
1006
      last_may_exit = false;
1007
    }
1008
  else
1009
    {
1010
      /* Leave exit in last copy (for explanation why see comment in
1011
         unroll_loop_constant_iterations).  */
1012
      may_exit_copy = max_unroll;
1013
      n_peel = max_unroll;
1014
      extra_zero_check = false;
1015
      last_may_exit = true;
1016
    }
1017
 
1018
  /* Get expression for number of iterations.  */
1019
  start_sequence ();
1020
  old_niter = niter = gen_reg_rtx (desc->mode);
1021
  tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1022
  if (tmp != niter)
1023
    emit_move_insn (niter, tmp);
1024
 
1025
  /* Count modulo by ANDing it with max_unroll; we use the fact that
1026
     the number of unrollings is a power of two, and thus this is correct
1027
     even if there is overflow in the computation.  */
1028
  niter = expand_simple_binop (desc->mode, AND,
1029
                               niter,
1030
                               GEN_INT (max_unroll),
1031
                               NULL_RTX, 0, OPTAB_LIB_WIDEN);
1032
 
1033
  init_code = get_insns ();
1034
  end_sequence ();
1035
  unshare_all_rtl_in_chain (init_code);
1036
 
1037
  /* Precondition the loop.  */
1038
  split_edge_and_insert (loop_preheader_edge (loop), init_code);
1039
 
1040
  remove_edges = NULL;
1041
 
1042
  wont_exit = sbitmap_alloc (max_unroll + 2);
1043
 
1044
  /* Peel the first copy of loop body (almost always we must leave exit test
1045
     here; the only exception is when we have extra zero check and the number
1046
     of iterations is reliable.  Also record the place of (possible) extra
1047
     zero check.  */
1048
  sbitmap_zero (wont_exit);
1049
  if (extra_zero_check
1050
      && !desc->noloop_assumptions)
1051
    SET_BIT (wont_exit, 1);
1052
  ezc_swtch = loop_preheader_edge (loop)->src;
1053
  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1054
                                      1, wont_exit, desc->out_edge,
1055
                                      &remove_edges,
1056
                                      DLTHE_FLAG_UPDATE_FREQ);
1057
  gcc_assert (ok);
1058
 
1059
  /* Record the place where switch will be built for preconditioning.  */
1060
  swtch = split_edge (loop_preheader_edge (loop));
1061
 
1062
  for (i = 0; i < n_peel; i++)
1063
    {
1064
      /* Peel the copy.  */
1065
      sbitmap_zero (wont_exit);
1066
      if (i != n_peel - 1 || !last_may_exit)
1067
        SET_BIT (wont_exit, 1);
1068
      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1069
                                          1, wont_exit, desc->out_edge,
1070
                                          &remove_edges,
1071
                                          DLTHE_FLAG_UPDATE_FREQ);
1072
      gcc_assert (ok);
1073
 
1074
      /* Create item for switch.  */
1075
      j = n_peel - i - (extra_zero_check ? 0 : 1);
1076
      p = REG_BR_PROB_BASE / (i + 2);
1077
 
1078
      preheader = split_edge (loop_preheader_edge (loop));
1079
      branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1080
                                          block_label (preheader), p,
1081
                                          NULL_RTX);
1082
 
1083
      /* We rely on the fact that the compare and jump cannot be optimized out,
1084
         and hence the cfg we create is correct.  */
1085
      gcc_assert (branch_code != NULL_RTX);
1086
 
1087
      swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1088
      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1089
      single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1090
      e = make_edge (swtch, preheader,
1091
                     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1092
      e->probability = p;
1093
    }
1094
 
1095
  if (extra_zero_check)
1096
    {
1097
      /* Add branch for zero iterations.  */
1098
      p = REG_BR_PROB_BASE / (max_unroll + 1);
1099
      swtch = ezc_swtch;
1100
      preheader = split_edge (loop_preheader_edge (loop));
1101
      branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1102
                                          block_label (preheader), p,
1103
                                          NULL_RTX);
1104
      gcc_assert (branch_code != NULL_RTX);
1105
 
1106
      swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1107
      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1108
      single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1109
      e = make_edge (swtch, preheader,
1110
                     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1111
      e->probability = p;
1112
    }
1113
 
1114
  /* Recount dominators for outer blocks.  */
1115
  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1116
 
1117
  /* And unroll loop.  */
1118
 
1119
  sbitmap_ones (wont_exit);
1120
  RESET_BIT (wont_exit, may_exit_copy);
1121
  opt_info_start_duplication (opt_info);
1122
 
1123
  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1124
                                      max_unroll,
1125
                                      wont_exit, desc->out_edge,
1126
                                      &remove_edges,
1127
                                      DLTHE_FLAG_UPDATE_FREQ
1128
                                      | (opt_info
1129
                                         ? DLTHE_RECORD_COPY_NUMBER
1130
                                           : 0));
1131
  gcc_assert (ok);
1132
 
1133
  if (opt_info)
1134
    {
1135
      apply_opt_in_copies (opt_info, max_unroll, true, true);
1136
      free_opt_info (opt_info);
1137
    }
1138
 
1139
  free (wont_exit);
1140
 
1141
  if (exit_at_end)
1142
    {
1143
      basic_block exit_block = get_bb_copy (desc->in_edge->src);
1144
      /* Find a new in and out edge; they are in the last copy we have
1145
         made.  */
1146
 
1147
      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1148
        {
1149
          desc->out_edge = EDGE_SUCC (exit_block, 0);
1150
          desc->in_edge = EDGE_SUCC (exit_block, 1);
1151
        }
1152
      else
1153
        {
1154
          desc->out_edge = EDGE_SUCC (exit_block, 1);
1155
          desc->in_edge = EDGE_SUCC (exit_block, 0);
1156
        }
1157
    }
1158
 
1159
  /* Remove the edges.  */
1160
  for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
1161
    remove_path (e);
1162
  VEC_free (edge, heap, remove_edges);
1163
 
1164
  /* We must be careful when updating the number of iterations due to
1165
     preconditioning and the fact that the value must be valid at entry
1166
     of the loop.  After passing through the above code, we see that
1167
     the correct new number of iterations is this:  */
1168
  gcc_assert (!desc->const_iter);
1169
  desc->niter_expr =
1170
    simplify_gen_binary (UDIV, desc->mode, old_niter,
1171
                         GEN_INT (max_unroll + 1));
1172
  desc->niter_max /= max_unroll + 1;
1173
  if (exit_at_end)
1174
    {
1175
      desc->niter_expr =
1176
        simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1177
      desc->noloop_assumptions = NULL_RTX;
1178
      desc->niter_max--;
1179
    }
1180
 
1181
  if (dump_file)
1182
    fprintf (dump_file,
1183
             ";; Unrolled loop %d times, counting # of iterations "
1184
             "in runtime, %i insns\n",
1185
             max_unroll, num_loop_insns (loop));
1186
 
1187
  VEC_free (basic_block, heap, dom_bbs);
1188
}
1189
 
1190
/* Decide whether to simply peel LOOP and how much.  */
1191
static void
1192
decide_peel_simple (struct loop *loop, int flags)
1193
{
1194
  unsigned npeel;
1195
  struct niter_desc *desc;
1196
 
1197
  if (!(flags & UAP_PEEL))
1198
    {
1199
      /* We were not asked to, just return back silently.  */
1200
      return;
1201
    }
1202
 
1203
  if (dump_file)
1204
    fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1205
 
1206
  /* npeel = number of iterations to peel.  */
1207
  npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1208
  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1209
    npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1210
 
1211
  /* Skip big loops.  */
1212
  if (!npeel)
1213
    {
1214
      if (dump_file)
1215
        fprintf (dump_file, ";; Not considering loop, is too big\n");
1216
      return;
1217
    }
1218
 
1219
  /* Check for simple loops.  */
1220
  desc = get_simple_loop_desc (loop);
1221
 
1222
  /* Check number of iterations.  */
1223
  if (desc->simple_p && !desc->assumptions && desc->const_iter)
1224
    {
1225
      if (dump_file)
1226
        fprintf (dump_file, ";; Loop iterates constant times\n");
1227
      return;
1228
    }
1229
 
1230
  /* Do not simply peel loops with branches inside -- it increases number
1231
     of mispredicts.  */
1232
  if (num_loop_branches (loop) > 1)
1233
    {
1234
      if (dump_file)
1235
        fprintf (dump_file, ";; Not peeling, contains branches\n");
1236
      return;
1237
    }
1238
 
1239
  if (loop->header->count)
1240
    {
1241
      unsigned niter = expected_loop_iterations (loop);
1242
      if (niter + 1 > npeel)
1243
        {
1244
          if (dump_file)
1245
            {
1246
              fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1247
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1248
                       (HOST_WIDEST_INT) (niter + 1));
1249
              fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1250
                       npeel);
1251
            }
1252
          return;
1253
        }
1254
      npeel = niter + 1;
1255
    }
1256
  else
1257
    {
1258
      /* For now we have no good heuristics to decide whether loop peeling
1259
         will be effective, so disable it.  */
1260
      if (dump_file)
1261
        fprintf (dump_file,
1262
                 ";; Not peeling loop, no evidence it will be profitable\n");
1263
      return;
1264
    }
1265
 
1266
  /* Success.  */
1267
  loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1268
  loop->lpt_decision.times = npeel;
1269
 
1270
  if (dump_file)
1271
    fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1272
             loop->lpt_decision.times);
1273
}
1274
 
1275
/* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1276
   while (cond)
1277
     body;
1278
 
1279
   ==>
1280
 
1281
   if (!cond) goto end;
1282
   body;
1283
   if (!cond) goto end;
1284
   body;
1285
   while (cond)
1286
     body;
1287
   end: ;
1288
   */
1289
static void
1290
peel_loop_simple (struct loop *loop)
1291
{
1292
  sbitmap wont_exit;
1293
  unsigned npeel = loop->lpt_decision.times;
1294
  struct niter_desc *desc = get_simple_loop_desc (loop);
1295
  struct opt_info *opt_info = NULL;
1296
  bool ok;
1297
 
1298
  if (flag_split_ivs_in_unroller && npeel > 1)
1299
    opt_info = analyze_insns_in_loop (loop);
1300
 
1301
  wont_exit = sbitmap_alloc (npeel + 1);
1302
  sbitmap_zero (wont_exit);
1303
 
1304
  opt_info_start_duplication (opt_info);
1305
 
1306
  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1307
                                      npeel, wont_exit, NULL,
1308
                                      NULL, DLTHE_FLAG_UPDATE_FREQ
1309
                                      | (opt_info
1310
                                         ? DLTHE_RECORD_COPY_NUMBER
1311
                                           : 0));
1312
  gcc_assert (ok);
1313
 
1314
  free (wont_exit);
1315
 
1316
  if (opt_info)
1317
    {
1318
      apply_opt_in_copies (opt_info, npeel, false, false);
1319
      free_opt_info (opt_info);
1320
    }
1321
 
1322
  if (desc->simple_p)
1323
    {
1324
      if (desc->const_iter)
1325
        {
1326
          desc->niter -= npeel;
1327
          desc->niter_expr = GEN_INT (desc->niter);
1328
          desc->noloop_assumptions = NULL_RTX;
1329
        }
1330
      else
1331
        {
1332
          /* We cannot just update niter_expr, as its value might be clobbered
1333
             inside loop.  We could handle this by counting the number into
1334
             temporary just like we do in runtime unrolling, but it does not
1335
             seem worthwhile.  */
1336
          free_simple_loop_desc (loop);
1337
        }
1338
    }
1339
  if (dump_file)
1340
    fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1341
}
1342
 
1343
/* Decide whether to unroll LOOP stupidly and how much.  */
1344
static void
1345
decide_unroll_stupid (struct loop *loop, int flags)
1346
{
1347
  unsigned nunroll, nunroll_by_av, i;
1348
  struct niter_desc *desc;
1349
 
1350
  if (!(flags & UAP_UNROLL_ALL))
1351
    {
1352
      /* We were not asked to, just return back silently.  */
1353
      return;
1354
    }
1355
 
1356
  if (dump_file)
1357
    fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1358
 
1359
  /* nunroll = total number of copies of the original loop body in
1360
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1361
  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1362
  nunroll_by_av
1363
    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1364
  if (nunroll > nunroll_by_av)
1365
    nunroll = nunroll_by_av;
1366
  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1367
    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1368
 
1369
  /* Skip big loops.  */
1370
  if (nunroll <= 1)
1371
    {
1372
      if (dump_file)
1373
        fprintf (dump_file, ";; Not considering loop, is too big\n");
1374
      return;
1375
    }
1376
 
1377
  /* Check for simple loops.  */
1378
  desc = get_simple_loop_desc (loop);
1379
 
1380
  /* Check simpleness.  */
1381
  if (desc->simple_p && !desc->assumptions)
1382
    {
1383
      if (dump_file)
1384
        fprintf (dump_file, ";; The loop is simple\n");
1385
      return;
1386
    }
1387
 
1388
  /* Do not unroll loops with branches inside -- it increases number
1389
     of mispredicts.  */
1390
  if (num_loop_branches (loop) > 1)
1391
    {
1392
      if (dump_file)
1393
        fprintf (dump_file, ";; Not unrolling, contains branches\n");
1394
      return;
1395
    }
1396
 
1397
  /* If we have profile feedback, check whether the loop rolls.  */
1398
  if (loop->header->count
1399
      && expected_loop_iterations (loop) < 2 * nunroll)
1400
    {
1401
      if (dump_file)
1402
        fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1403
      return;
1404
    }
1405
 
1406
  /* Success.  Now force nunroll to be power of 2, as it seems that this
1407
     improves results (partially because of better alignments, partially
1408
     because of some dark magic).  */
1409
  for (i = 1; 2 * i <= nunroll; i *= 2)
1410
    continue;
1411
 
1412
  loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1413
  loop->lpt_decision.times = i - 1;
1414
 
1415
  if (dump_file)
1416
    fprintf (dump_file,
1417
             ";; Decided to unroll the loop stupidly, %d times.\n",
1418
             loop->lpt_decision.times);
1419
}
1420
 
1421
/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1422
   while (cond)
1423
     body;
1424
 
1425
   ==>
1426
 
1427
   while (cond)
1428
     {
1429
       body;
1430
       if (!cond) break;
1431
       body;
1432
       if (!cond) break;
1433
       body;
1434
       if (!cond) break;
1435
       body;
1436
     }
1437
   */
1438
static void
1439
unroll_loop_stupid (struct loop *loop)
1440
{
1441
  sbitmap wont_exit;
1442
  unsigned nunroll = loop->lpt_decision.times;
1443
  struct niter_desc *desc = get_simple_loop_desc (loop);
1444
  struct opt_info *opt_info = NULL;
1445
  bool ok;
1446
 
1447
  if (flag_split_ivs_in_unroller
1448
      || flag_variable_expansion_in_unroller)
1449
    opt_info = analyze_insns_in_loop (loop);
1450
 
1451
 
1452
  wont_exit = sbitmap_alloc (nunroll + 1);
1453
  sbitmap_zero (wont_exit);
1454
  opt_info_start_duplication (opt_info);
1455
 
1456
  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1457
                                      nunroll, wont_exit,
1458
                                      NULL, NULL,
1459
                                      DLTHE_FLAG_UPDATE_FREQ
1460
                                      | (opt_info
1461
                                         ? DLTHE_RECORD_COPY_NUMBER
1462
                                           : 0));
1463
  gcc_assert (ok);
1464
 
1465
  if (opt_info)
1466
    {
1467
      apply_opt_in_copies (opt_info, nunroll, true, true);
1468
      free_opt_info (opt_info);
1469
    }
1470
 
1471
  free (wont_exit);
1472
 
1473
  if (desc->simple_p)
1474
    {
1475
      /* We indeed may get here provided that there are nontrivial assumptions
1476
         for a loop to be really simple.  We could update the counts, but the
1477
         problem is that we are unable to decide which exit will be taken
1478
         (not really true in case the number of iterations is constant,
1479
         but noone will do anything with this information, so we do not
1480
         worry about it).  */
1481
      desc->simple_p = false;
1482
    }
1483
 
1484
  if (dump_file)
1485
    fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1486
             nunroll, num_loop_insns (loop));
1487
}
1488
 
1489
/* A hash function for information about insns to split.  */
1490
 
1491
static hashval_t
1492
si_info_hash (const void *ivts)
1493
{
1494
  return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn);
1495
}
1496
 
1497
/* An equality functions for information about insns to split.  */
1498
 
1499
static int
1500
si_info_eq (const void *ivts1, const void *ivts2)
1501
{
1502
  const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1;
1503
  const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2;
1504
 
1505
  return i1->insn == i2->insn;
1506
}
1507
 
1508
/* Return a hash for VES, which is really a "var_to_expand *".  */
1509
 
1510
static hashval_t
1511
ve_info_hash (const void *ves)
1512
{
1513
  return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn);
1514
}
1515
 
1516
/* Return true if IVTS1 and IVTS2 (which are really both of type
1517
   "var_to_expand *") refer to the same instruction.  */
1518
 
1519
static int
1520
ve_info_eq (const void *ivts1, const void *ivts2)
1521
{
1522
  const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1;
1523
  const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2;
1524
 
1525
  return i1->insn == i2->insn;
1526
}
1527
 
1528
/* Returns true if REG is referenced in one nondebug insn in LOOP.
1529
   Set *DEBUG_USES to the number of debug insns that reference the
1530
   variable.  */
1531
 
1532
bool
1533
referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1534
                                  int *debug_uses)
1535
{
1536
  basic_block *body, bb;
1537
  unsigned i;
1538
  int count_ref = 0;
1539
  rtx insn;
1540
 
1541
  body = get_loop_body (loop);
1542
  for (i = 0; i < loop->num_nodes; i++)
1543
    {
1544
      bb = body[i];
1545
 
1546
      FOR_BB_INSNS (bb, insn)
1547
        if (!rtx_referenced_p (reg, insn))
1548
          continue;
1549
        else if (DEBUG_INSN_P (insn))
1550
          ++*debug_uses;
1551
        else if (++count_ref > 1)
1552
          break;
1553
    }
1554
  free (body);
1555
  return (count_ref  == 1);
1556
}
1557
 
1558
/* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1559
 
1560
static void
1561
reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1562
{
1563
  basic_block *body, bb;
1564
  unsigned i;
1565
  rtx insn;
1566
 
1567
  body = get_loop_body (loop);
1568
  for (i = 0; debug_uses && i < loop->num_nodes; i++)
1569
    {
1570
      bb = body[i];
1571
 
1572
      FOR_BB_INSNS (bb, insn)
1573
        if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1574
          continue;
1575
        else
1576
          {
1577
            validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1578
                             gen_rtx_UNKNOWN_VAR_LOC (), 0);
1579
            if (!--debug_uses)
1580
              break;
1581
          }
1582
    }
1583
  free (body);
1584
}
1585
 
1586
/* Determine whether INSN contains an accumulator
1587
   which can be expanded into separate copies,
1588
   one for each copy of the LOOP body.
1589
 
1590
   for (i = 0 ; i < n; i++)
1591
     sum += a[i];
1592
 
1593
   ==>
1594
 
1595
   sum += a[i]
1596
   ....
1597
   i = i+1;
1598
   sum1 += a[i]
1599
   ....
1600
   i = i+1
1601
   sum2 += a[i];
1602
   ....
1603
 
1604
   Return NULL if INSN contains no opportunity for expansion of accumulator.
1605
   Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1606
   information and return a pointer to it.
1607
*/
1608
 
1609
static struct var_to_expand *
1610
analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1611
{
1612
  rtx set, dest, src, op1, op2, something;
1613
  struct var_to_expand *ves;
1614
  enum machine_mode mode1, mode2;
1615
  unsigned accum_pos;
1616
  int debug_uses = 0;
1617
 
1618
  set = single_set (insn);
1619
  if (!set)
1620
    return NULL;
1621
 
1622
  dest = SET_DEST (set);
1623
  src = SET_SRC (set);
1624
 
1625
  if (GET_CODE (src) != PLUS
1626
      && GET_CODE (src) != MINUS
1627
      && GET_CODE (src) != MULT)
1628
    return NULL;
1629
 
1630
  /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1631
     in MD.  But if there is no optab to generate the insn, we can not
1632
     perform the variable expansion.  This can happen if an MD provides
1633
     an insn but not a named pattern to generate it, for example to avoid
1634
     producing code that needs additional mode switches like for x87/mmx.
1635
 
1636
     So we check have_insn_for which looks for an optab for the operation
1637
     in SRC.  If it doesn't exist, we can't perform the expansion even
1638
     though INSN is valid.  */
1639
  if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1640
    return NULL;
1641
 
1642
  op1 = XEXP (src, 0);
1643
  op2 = XEXP (src, 1);
1644
 
1645
  if (!REG_P (dest)
1646
      && !(GET_CODE (dest) == SUBREG
1647
           && REG_P (SUBREG_REG (dest))))
1648
    return NULL;
1649
 
1650
  if (rtx_equal_p (dest, op1))
1651
    accum_pos = 0;
1652
  else if (rtx_equal_p (dest, op2))
1653
    accum_pos = 1;
1654
  else
1655
    return NULL;
1656
 
1657
  /* The method of expansion that we are using; which includes
1658
     the initialization of the expansions with zero and the summation of
1659
     the expansions at the end of the computation will yield wrong results
1660
     for (x = something - x) thus avoid using it in that case.  */
1661
  if (accum_pos == 1
1662
      && GET_CODE (src) == MINUS)
1663
   return NULL;
1664
 
1665
  something = (accum_pos == 0) ? op2 : op1;
1666
 
1667
  if (rtx_referenced_p (dest, something))
1668
    return NULL;
1669
 
1670
  if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1671
    return NULL;
1672
 
1673
  mode1 = GET_MODE (dest);
1674
  mode2 = GET_MODE (something);
1675
  if ((FLOAT_MODE_P (mode1)
1676
       || FLOAT_MODE_P (mode2))
1677
      && !flag_associative_math)
1678
    return NULL;
1679
 
1680
  if (dump_file)
1681
  {
1682
    fprintf (dump_file,
1683
    "\n;; Expanding Accumulator ");
1684
    print_rtl (dump_file, dest);
1685
    fprintf (dump_file, "\n");
1686
  }
1687
 
1688
  if (debug_uses)
1689
    /* Instead of resetting the debug insns, we could replace each
1690
       debug use in the loop with the sum or product of all expanded
1691
       accummulators.  Since we'll only know of all expansions at the
1692
       end, we'd have to keep track of which vars_to_expand a debug
1693
       insn in the loop references, take note of each copy of the
1694
       debug insn during unrolling, and when it's all done, compute
1695
       the sum or product of each variable and adjust the original
1696
       debug insn and each copy thereof.  What a pain!  */
1697
    reset_debug_uses_in_loop (loop, dest, debug_uses);
1698
 
1699
  /* Record the accumulator to expand.  */
1700
  ves = XNEW (struct var_to_expand);
1701
  ves->insn = insn;
1702
  ves->reg = copy_rtx (dest);
1703
  ves->var_expansions = VEC_alloc (rtx, heap, 1);
1704
  ves->next = NULL;
1705
  ves->op = GET_CODE (src);
1706
  ves->expansion_count = 0;
1707
  ves->reuse_expansion = 0;
1708
  ves->accum_pos = accum_pos;
1709
  return ves;
1710
}
1711
 
1712
/* Determine whether there is an induction variable in INSN that
1713
   we would like to split during unrolling.
1714
 
1715
   I.e. replace
1716
 
1717
   i = i + 1;
1718
   ...
1719
   i = i + 1;
1720
   ...
1721
   i = i + 1;
1722
   ...
1723
 
1724
   type chains by
1725
 
1726
   i0 = i + 1
1727
   ...
1728
   i = i0 + 1
1729
   ...
1730
   i = i0 + 2
1731
   ...
1732
 
1733
   Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1734
   an IV_TO_SPLIT structure, fill it with the relevant information and return a
1735
   pointer to it.  */
1736
 
1737
static struct iv_to_split *
1738
analyze_iv_to_split_insn (rtx insn)
1739
{
1740
  rtx set, dest;
1741
  struct rtx_iv iv;
1742
  struct iv_to_split *ivts;
1743
  bool ok;
1744
 
1745
  /* For now we just split the basic induction variables.  Later this may be
1746
     extended for example by selecting also addresses of memory references.  */
1747
  set = single_set (insn);
1748
  if (!set)
1749
    return NULL;
1750
 
1751
  dest = SET_DEST (set);
1752
  if (!REG_P (dest))
1753
    return NULL;
1754
 
1755
  if (!biv_p (insn, dest))
1756
    return NULL;
1757
 
1758
  ok = iv_analyze_result (insn, dest, &iv);
1759
 
1760
  /* This used to be an assert under the assumption that if biv_p returns
1761
     true that iv_analyze_result must also return true.  However, that
1762
     assumption is not strictly correct as evidenced by pr25569.
1763
 
1764
     Returning NULL when iv_analyze_result returns false is safe and
1765
     avoids the problems in pr25569 until the iv_analyze_* routines
1766
     can be fixed, which is apparently hard and time consuming
1767
     according to their author.  */
1768
  if (! ok)
1769
    return NULL;
1770
 
1771
  if (iv.step == const0_rtx
1772
      || iv.mode != iv.extend_mode)
1773
    return NULL;
1774
 
1775
  /* Record the insn to split.  */
1776
  ivts = XNEW (struct iv_to_split);
1777
  ivts->insn = insn;
1778
  ivts->base_var = NULL_RTX;
1779
  ivts->step = iv.step;
1780
  ivts->next = NULL;
1781
  ivts->n_loc = 1;
1782
  ivts->loc[0] = 1;
1783
 
1784
  return ivts;
1785
}
1786
 
1787
/* Determines which of insns in LOOP can be optimized.
1788
   Return a OPT_INFO struct with the relevant hash tables filled
1789
   with all insns to be optimized.  The FIRST_NEW_BLOCK field
1790
   is undefined for the return value.  */
1791
 
1792
static struct opt_info *
1793
analyze_insns_in_loop (struct loop *loop)
1794
{
1795
  basic_block *body, bb;
1796
  unsigned i;
1797
  struct opt_info *opt_info = XCNEW (struct opt_info);
1798
  rtx insn;
1799
  struct iv_to_split *ivts = NULL;
1800
  struct var_to_expand *ves = NULL;
1801
  PTR *slot1;
1802
  PTR *slot2;
1803
  VEC (edge, heap) *edges = get_loop_exit_edges (loop);
1804
  edge exit;
1805
  bool can_apply = false;
1806
 
1807
  iv_analysis_loop_init (loop);
1808
 
1809
  body = get_loop_body (loop);
1810
 
1811
  if (flag_split_ivs_in_unroller)
1812
    {
1813
      opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1814
                                              si_info_hash, si_info_eq, free);
1815
      opt_info->iv_to_split_head = NULL;
1816
      opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1817
    }
1818
 
1819
  /* Record the loop exit bb and loop preheader before the unrolling.  */
1820
  opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1821
 
1822
  if (VEC_length (edge, edges) == 1)
1823
    {
1824
      exit = VEC_index (edge, edges, 0);
1825
      if (!(exit->flags & EDGE_COMPLEX))
1826
        {
1827
          opt_info->loop_exit = split_edge (exit);
1828
          can_apply = true;
1829
        }
1830
    }
1831
 
1832
  if (flag_variable_expansion_in_unroller
1833
      && can_apply)
1834
    {
1835
      opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1836
                                                        ve_info_hash,
1837
                                                        ve_info_eq, free);
1838
      opt_info->var_to_expand_head = NULL;
1839
      opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1840
    }
1841
 
1842
  for (i = 0; i < loop->num_nodes; i++)
1843
    {
1844
      bb = body[i];
1845
      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1846
        continue;
1847
 
1848
      FOR_BB_INSNS (bb, insn)
1849
      {
1850
        if (!INSN_P (insn))
1851
          continue;
1852
 
1853
        if (opt_info->insns_to_split)
1854
          ivts = analyze_iv_to_split_insn (insn);
1855
 
1856
        if (ivts)
1857
          {
1858
            slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1859
            gcc_assert (*slot1 == NULL);
1860
            *slot1 = ivts;
1861
            *opt_info->iv_to_split_tail = ivts;
1862
            opt_info->iv_to_split_tail = &ivts->next;
1863
            continue;
1864
          }
1865
 
1866
        if (opt_info->insns_with_var_to_expand)
1867
          ves = analyze_insn_to_expand_var (loop, insn);
1868
 
1869
        if (ves)
1870
          {
1871
            slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1872
            gcc_assert (*slot2 == NULL);
1873
            *slot2 = ves;
1874
            *opt_info->var_to_expand_tail = ves;
1875
            opt_info->var_to_expand_tail = &ves->next;
1876
          }
1877
      }
1878
    }
1879
 
1880
  VEC_free (edge, heap, edges);
1881
  free (body);
1882
  return opt_info;
1883
}
1884
 
1885
/* Called just before loop duplication.  Records start of duplicated area
1886
   to OPT_INFO.  */
1887
 
1888
static void
1889
opt_info_start_duplication (struct opt_info *opt_info)
1890
{
1891
  if (opt_info)
1892
    opt_info->first_new_block = last_basic_block;
1893
}
1894
 
1895
/* Determine the number of iterations between initialization of the base
1896
   variable and the current copy (N_COPY).  N_COPIES is the total number
1897
   of newly created copies.  UNROLLING is true if we are unrolling
1898
   (not peeling) the loop.  */
1899
 
1900
static unsigned
1901
determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1902
{
1903
  if (unrolling)
1904
    {
1905
      /* If we are unrolling, initialization is done in the original loop
1906
         body (number 0).  */
1907
      return n_copy;
1908
    }
1909
  else
1910
    {
1911
      /* If we are peeling, the copy in that the initialization occurs has
1912
         number 1.  The original loop (number 0) is the last.  */
1913
      if (n_copy)
1914
        return n_copy - 1;
1915
      else
1916
        return n_copies;
1917
    }
1918
}
1919
 
1920
/* Locate in EXPR the expression corresponding to the location recorded
1921
   in IVTS, and return a pointer to the RTX for this location.  */
1922
 
1923
static rtx *
1924
get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1925
{
1926
  unsigned i;
1927
  rtx *ret = &expr;
1928
 
1929
  for (i = 0; i < ivts->n_loc; i++)
1930
    ret = &XEXP (*ret, ivts->loc[i]);
1931
 
1932
  return ret;
1933
}
1934
 
1935
/* Allocate basic variable for the induction variable chain.  */
1936
 
1937
static void
1938
allocate_basic_variable (struct iv_to_split *ivts)
1939
{
1940
  rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1941
 
1942
  ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1943
}
1944
 
1945
/* Insert initialization of basic variable of IVTS before INSN, taking
1946
   the initial value from INSN.  */
1947
 
1948
static void
1949
insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1950
{
1951
  rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1952
  rtx seq;
1953
 
1954
  start_sequence ();
1955
  expr = force_operand (expr, ivts->base_var);
1956
  if (expr != ivts->base_var)
1957
    emit_move_insn (ivts->base_var, expr);
1958
  seq = get_insns ();
1959
  end_sequence ();
1960
 
1961
  emit_insn_before (seq, insn);
1962
}
1963
 
1964
/* Replace the use of induction variable described in IVTS in INSN
1965
   by base variable + DELTA * step.  */
1966
 
1967
static void
1968
split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1969
{
1970
  rtx expr, *loc, seq, incr, var;
1971
  enum machine_mode mode = GET_MODE (ivts->base_var);
1972
  rtx src, dest, set;
1973
 
1974
  /* Construct base + DELTA * step.  */
1975
  if (!delta)
1976
    expr = ivts->base_var;
1977
  else
1978
    {
1979
      incr = simplify_gen_binary (MULT, mode,
1980
                                  ivts->step, gen_int_mode (delta, mode));
1981
      expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1982
                                  ivts->base_var, incr);
1983
    }
1984
 
1985
  /* Figure out where to do the replacement.  */
1986
  loc = get_ivts_expr (single_set (insn), ivts);
1987
 
1988
  /* If we can make the replacement right away, we're done.  */
1989
  if (validate_change (insn, loc, expr, 0))
1990
    return;
1991
 
1992
  /* Otherwise, force EXPR into a register and try again.  */
1993
  start_sequence ();
1994
  var = gen_reg_rtx (mode);
1995
  expr = force_operand (expr, var);
1996
  if (expr != var)
1997
    emit_move_insn (var, expr);
1998
  seq = get_insns ();
1999
  end_sequence ();
2000
  emit_insn_before (seq, insn);
2001
 
2002
  if (validate_change (insn, loc, var, 0))
2003
    return;
2004
 
2005
  /* The last chance.  Try recreating the assignment in insn
2006
     completely from scratch.  */
2007
  set = single_set (insn);
2008
  gcc_assert (set);
2009
 
2010
  start_sequence ();
2011
  *loc = var;
2012
  src = copy_rtx (SET_SRC (set));
2013
  dest = copy_rtx (SET_DEST (set));
2014
  src = force_operand (src, dest);
2015
  if (src != dest)
2016
    emit_move_insn (dest, src);
2017
  seq = get_insns ();
2018
  end_sequence ();
2019
 
2020
  emit_insn_before (seq, insn);
2021
  delete_insn (insn);
2022
}
2023
 
2024
 
2025
/* Return one expansion of the accumulator recorded in struct VE.  */
2026
 
2027
static rtx
2028
get_expansion (struct var_to_expand *ve)
2029
{
2030
  rtx reg;
2031
 
2032
  if (ve->reuse_expansion == 0)
2033
    reg = ve->reg;
2034
  else
2035
    reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
2036
 
2037
  if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
2038
    ve->reuse_expansion = 0;
2039
  else
2040
    ve->reuse_expansion++;
2041
 
2042
  return reg;
2043
}
2044
 
2045
 
2046
/* Given INSN replace the uses of the accumulator recorded in VE
2047
   with a new register.  */
2048
 
2049
static void
2050
expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
2051
{
2052
  rtx new_reg, set;
2053
  bool really_new_expansion = false;
2054
 
2055
  set = single_set (insn);
2056
  gcc_assert (set);
2057
 
2058
  /* Generate a new register only if the expansion limit has not been
2059
     reached.  Else reuse an already existing expansion.  */
2060
  if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
2061
    {
2062
      really_new_expansion = true;
2063
      new_reg = gen_reg_rtx (GET_MODE (ve->reg));
2064
    }
2065
  else
2066
    new_reg = get_expansion (ve);
2067
 
2068
  validate_change (insn, &SET_DEST (set), new_reg, 1);
2069
  validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
2070
 
2071
  if (apply_change_group ())
2072
    if (really_new_expansion)
2073
      {
2074
        VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
2075
        ve->expansion_count++;
2076
      }
2077
}
2078
 
2079
/* Initialize the variable expansions in loop preheader.  PLACE is the
2080
   loop-preheader basic block where the initialization of the
2081
   expansions should take place.  The expansions are initialized with
2082
   (-0) when the operation is plus or minus to honor sign zero.  This
2083
   way we can prevent cases where the sign of the final result is
2084
   effected by the sign of the expansion.  Here is an example to
2085
   demonstrate this:
2086
 
2087
   for (i = 0 ; i < n; i++)
2088
     sum += something;
2089
 
2090
   ==>
2091
 
2092
   sum += something
2093
   ....
2094
   i = i+1;
2095
   sum1 += something
2096
   ....
2097
   i = i+1
2098
   sum2 += something;
2099
   ....
2100
 
2101
   When SUM is initialized with -zero and SOMETHING is also -zero; the
2102
   final result of sum should be -zero thus the expansions sum1 and sum2
2103
   should be initialized with -zero as well (otherwise we will get +zero
2104
   as the final result).  */
2105
 
2106
static void
2107
insert_var_expansion_initialization (struct var_to_expand *ve,
2108
                                     basic_block place)
2109
{
2110
  rtx seq, var, zero_init, insn;
2111
  unsigned i;
2112
  enum machine_mode mode = GET_MODE (ve->reg);
2113
  bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
2114
 
2115
  if (VEC_length (rtx, ve->var_expansions) == 0)
2116
    return;
2117
 
2118
  start_sequence ();
2119
  if (ve->op == PLUS || ve->op == MINUS)
2120
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2121
      {
2122
        if (honor_signed_zero_p)
2123
          zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
2124
        else
2125
          zero_init = CONST0_RTX (mode);
2126
 
2127
        emit_move_insn (var, zero_init);
2128
      }
2129
  else if (ve->op == MULT)
2130
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2131
      {
2132
        zero_init =  CONST1_RTX (GET_MODE (var));
2133
        emit_move_insn (var, zero_init);
2134
      }
2135
 
2136
  seq = get_insns ();
2137
  end_sequence ();
2138
 
2139
  insn = BB_HEAD (place);
2140
  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2141
    insn = NEXT_INSN (insn);
2142
 
2143
  emit_insn_after (seq, insn);
2144
}
2145
 
2146
/* Combine the variable expansions at the loop exit.  PLACE is the
2147
   loop exit basic block where the summation of the expansions should
2148
   take place.  */
2149
 
2150
static void
2151
combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
2152
{
2153
  rtx sum = ve->reg;
2154
  rtx expr, seq, var, insn;
2155
  unsigned i;
2156
 
2157
  if (VEC_length (rtx, ve->var_expansions) == 0)
2158
    return;
2159
 
2160
  start_sequence ();
2161
  if (ve->op == PLUS || ve->op == MINUS)
2162
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2163
      {
2164
        sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2165
                                   var, sum);
2166
      }
2167
  else if (ve->op == MULT)
2168
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2169
      {
2170
        sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2171
                                   var, sum);
2172
      }
2173
 
2174
  expr = force_operand (sum, ve->reg);
2175
  if (expr != ve->reg)
2176
    emit_move_insn (ve->reg, expr);
2177
  seq = get_insns ();
2178
  end_sequence ();
2179
 
2180
  insn = BB_HEAD (place);
2181
  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2182
    insn = NEXT_INSN (insn);
2183
 
2184
  emit_insn_after (seq, insn);
2185
}
2186
 
2187
/* Apply loop optimizations in loop copies using the
2188
   data which gathered during the unrolling.  Structure
2189
   OPT_INFO record that data.
2190
 
2191
   UNROLLING is true if we unrolled (not peeled) the loop.
2192
   REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2193
   the loop (as it should happen in complete unrolling, but not in ordinary
2194
   peeling of the loop).  */
2195
 
2196
static void
2197
apply_opt_in_copies (struct opt_info *opt_info,
2198
                     unsigned n_copies, bool unrolling,
2199
                     bool rewrite_original_loop)
2200
{
2201
  unsigned i, delta;
2202
  basic_block bb, orig_bb;
2203
  rtx insn, orig_insn, next;
2204
  struct iv_to_split ivts_templ, *ivts;
2205
  struct var_to_expand ve_templ, *ves;
2206
 
2207
  /* Sanity check -- we need to put initialization in the original loop
2208
     body.  */
2209
  gcc_assert (!unrolling || rewrite_original_loop);
2210
 
2211
  /* Allocate the basic variables (i0).  */
2212
  if (opt_info->insns_to_split)
2213
    for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2214
      allocate_basic_variable (ivts);
2215
 
2216
  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2217
    {
2218
      bb = BASIC_BLOCK (i);
2219
      orig_bb = get_bb_original (bb);
2220
 
2221
      /* bb->aux holds position in copy sequence initialized by
2222
         duplicate_loop_to_header_edge.  */
2223
      delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2224
                                        unrolling);
2225
      bb->aux = 0;
2226
      orig_insn = BB_HEAD (orig_bb);
2227
      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2228
        {
2229
          next = NEXT_INSN (insn);
2230
          if (!INSN_P (insn))
2231
            continue;
2232
 
2233
          while (!INSN_P (orig_insn))
2234
            orig_insn = NEXT_INSN (orig_insn);
2235
 
2236
          ivts_templ.insn = orig_insn;
2237
          ve_templ.insn = orig_insn;
2238
 
2239
          /* Apply splitting iv optimization.  */
2240
          if (opt_info->insns_to_split)
2241
            {
2242
              ivts = (struct iv_to_split *)
2243
                htab_find (opt_info->insns_to_split, &ivts_templ);
2244
 
2245
              if (ivts)
2246
                {
2247
                  gcc_assert (GET_CODE (PATTERN (insn))
2248
                              == GET_CODE (PATTERN (orig_insn)));
2249
 
2250
                  if (!delta)
2251
                    insert_base_initialization (ivts, insn);
2252
                  split_iv (ivts, insn, delta);
2253
                }
2254
            }
2255
          /* Apply variable expansion optimization.  */
2256
          if (unrolling && opt_info->insns_with_var_to_expand)
2257
            {
2258
              ves = (struct var_to_expand *)
2259
                htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2260
              if (ves)
2261
                {
2262
                  gcc_assert (GET_CODE (PATTERN (insn))
2263
                              == GET_CODE (PATTERN (orig_insn)));
2264
                  expand_var_during_unrolling (ves, insn);
2265
                }
2266
            }
2267
          orig_insn = NEXT_INSN (orig_insn);
2268
        }
2269
    }
2270
 
2271
  if (!rewrite_original_loop)
2272
    return;
2273
 
2274
  /* Initialize the variable expansions in the loop preheader
2275
     and take care of combining them at the loop exit.  */
2276
  if (opt_info->insns_with_var_to_expand)
2277
    {
2278
      for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2279
        insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2280
      for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2281
        combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2282
    }
2283
 
2284
  /* Rewrite also the original loop body.  Find them as originals of the blocks
2285
     in the last copied iteration, i.e. those that have
2286
     get_bb_copy (get_bb_original (bb)) == bb.  */
2287
  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2288
    {
2289
      bb = BASIC_BLOCK (i);
2290
      orig_bb = get_bb_original (bb);
2291
      if (get_bb_copy (orig_bb) != bb)
2292
        continue;
2293
 
2294
      delta = determine_split_iv_delta (0, n_copies, unrolling);
2295
      for (orig_insn = BB_HEAD (orig_bb);
2296
           orig_insn != NEXT_INSN (BB_END (bb));
2297
           orig_insn = next)
2298
        {
2299
          next = NEXT_INSN (orig_insn);
2300
 
2301
          if (!INSN_P (orig_insn))
2302
            continue;
2303
 
2304
          ivts_templ.insn = orig_insn;
2305
          if (opt_info->insns_to_split)
2306
            {
2307
              ivts = (struct iv_to_split *)
2308
                htab_find (opt_info->insns_to_split, &ivts_templ);
2309
              if (ivts)
2310
                {
2311
                  if (!delta)
2312
                    insert_base_initialization (ivts, orig_insn);
2313
                  split_iv (ivts, orig_insn, delta);
2314
                  continue;
2315
                }
2316
            }
2317
 
2318
        }
2319
    }
2320
}
2321
 
2322
/* Release OPT_INFO.  */
2323
 
2324
static void
2325
free_opt_info (struct opt_info *opt_info)
2326
{
2327
  if (opt_info->insns_to_split)
2328
    htab_delete (opt_info->insns_to_split);
2329
  if (opt_info->insns_with_var_to_expand)
2330
    {
2331
      struct var_to_expand *ves;
2332
 
2333
      for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2334
        VEC_free (rtx, heap, ves->var_expansions);
2335
      htab_delete (opt_info->insns_with_var_to_expand);
2336
    }
2337
  free (opt_info);
2338
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.