OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [loop-unroll.c] - Blame information for rev 318

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Loop unrolling and peeling.
2
   Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
3
 
4
This file is part of GCC.
5
 
6
GCC is free software; you can redistribute it and/or modify it under
7
the terms of the GNU General Public License as published by the Free
8
Software Foundation; either version 3, or (at your option) any later
9
version.
10
 
11
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12
WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GCC; see the file COPYING3.  If not see
18
<http://www.gnu.org/licenses/>.  */
19
 
20
#include "config.h"
21
#include "system.h"
22
#include "coretypes.h"
23
#include "tm.h"
24
#include "rtl.h"
25
#include "hard-reg-set.h"
26
#include "obstack.h"
27
#include "basic-block.h"
28
#include "cfgloop.h"
29
#include "cfglayout.h"
30
#include "params.h"
31
#include "output.h"
32
#include "expr.h"
33
#include "hashtab.h"
34
#include "recog.h"    
35
 
36
/* This pass performs loop unrolling and peeling.  We only perform these
37
   optimizations on innermost loops (with single exception) because
38
   the impact on performance is greatest here, and we want to avoid
39
   unnecessary code size growth.  The gain is caused by greater sequentiality
40
   of code, better code to optimize for further passes and in some cases
41
   by fewer testings of exit conditions.  The main problem is code growth,
42
   that impacts performance negatively due to effect of caches.
43
 
44
   What we do:
45
 
46
   -- complete peeling of once-rolling loops; this is the above mentioned
47
      exception, as this causes loop to be cancelled completely and
48
      does not cause code growth
49
   -- complete peeling of loops that roll (small) constant times.
50
   -- simple peeling of first iterations of loops that do not roll much
51
      (according to profile feedback)
52
   -- unrolling of loops that roll constant times; this is almost always
53
      win, as we get rid of exit condition tests.
54
   -- unrolling of loops that roll number of times that we can compute
55
      in runtime; we also get rid of exit condition tests here, but there
56
      is the extra expense for calculating the number of iterations
57
   -- simple unrolling of remaining loops; this is performed only if we
58
      are asked to, as the gain is questionable in this case and often
59
      it may even slow down the code
60
   For more detailed descriptions of each of those, see comments at
61
   appropriate function below.
62
 
63
   There is a lot of parameters (defined and described in params.def) that
64
   control how much we unroll/peel.
65
 
66
   ??? A great problem is that we don't have a good way how to determine
67
   how many times we should unroll the loop; the experiments I have made
68
   showed that this choice may affect performance in order of several %.
69
   */
70
 
71
/* Information about induction variables to split.  */
72
 
73
struct iv_to_split
74
{
75
  rtx insn;             /* The insn in that the induction variable occurs.  */
76
  rtx base_var;         /* The variable on that the values in the further
77
                           iterations are based.  */
78
  rtx step;             /* Step of the induction variable.  */
79
  unsigned n_loc;
80
  unsigned loc[3];      /* Location where the definition of the induction
81
                           variable occurs in the insn.  For example if
82
                           N_LOC is 2, the expression is located at
83
                           XEXP (XEXP (single_set, loc[0]), loc[1]).  */
84
};
85
 
86
/* Information about accumulators to expand.  */
87
 
88
struct var_to_expand
89
{
90
  rtx insn;                        /* The insn in that the variable expansion occurs.  */
91
  rtx reg;                         /* The accumulator which is expanded.  */
92
  VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
93
  enum rtx_code op;                /* The type of the accumulation - addition, subtraction
94
                                      or multiplication.  */
95
  int expansion_count;             /* Count the number of expansions generated so far.  */
96
  int reuse_expansion;             /* The expansion we intend to reuse to expand
97
                                      the accumulator.  If REUSE_EXPANSION is 0 reuse
98
                                      the original accumulator.  Else use
99
                                      var_expansions[REUSE_EXPANSION - 1].  */
100
};
101
 
102
/* Information about optimization applied in
103
   the unrolled loop.  */
104
 
105
struct opt_info
106
{
107
  htab_t insns_to_split;           /* A hashtable of insns to split.  */
108
  htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
109
                                      to expand.  */
110
  unsigned first_new_block;        /* The first basic block that was
111
                                      duplicated.  */
112
  basic_block loop_exit;           /* The loop exit basic block.  */
113
  basic_block loop_preheader;      /* The loop preheader basic block.  */
114
};
115
 
116
static void decide_unrolling_and_peeling (struct loops *, int);
117
static void peel_loops_completely (struct loops *, int);
118
static void decide_peel_simple (struct loop *, int);
119
static void decide_peel_once_rolling (struct loop *, int);
120
static void decide_peel_completely (struct loop *, int);
121
static void decide_unroll_stupid (struct loop *, int);
122
static void decide_unroll_constant_iterations (struct loop *, int);
123
static void decide_unroll_runtime_iterations (struct loop *, int);
124
static void peel_loop_simple (struct loops *, struct loop *);
125
static void peel_loop_completely (struct loops *, struct loop *);
126
static void unroll_loop_stupid (struct loops *, struct loop *);
127
static void unroll_loop_constant_iterations (struct loops *, struct loop *);
128
static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
129
static struct opt_info *analyze_insns_in_loop (struct loop *);
130
static void opt_info_start_duplication (struct opt_info *);
131
static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
132
static void free_opt_info (struct opt_info *);
133
static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
134
static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
135
static struct iv_to_split *analyze_iv_to_split_insn (rtx);
136
static void expand_var_during_unrolling (struct var_to_expand *, rtx);
137
static int insert_var_expansion_initialization (void **, void *);
138
static int combine_var_copies_in_loop_exit (void **, void *);
139
static int release_var_copies (void **, void *);
140
static rtx get_expansion (struct var_to_expand *);
141
 
142
/* Unroll and/or peel (depending on FLAGS) LOOPS.  */
143
void
144
unroll_and_peel_loops (struct loops *loops, int flags)
145
{
146
  struct loop *loop, *next;
147
  bool check;
148
 
149
  /* First perform complete loop peeling (it is almost surely a win,
150
     and affects parameters for further decision a lot).  */
151
  peel_loops_completely (loops, flags);
152
 
153
  /* Now decide rest of unrolling and peeling.  */
154
  decide_unrolling_and_peeling (loops, flags);
155
 
156
  loop = loops->tree_root;
157
  while (loop->inner)
158
    loop = loop->inner;
159
 
160
  /* Scan the loops, inner ones first.  */
161
  while (loop != loops->tree_root)
162
    {
163
      if (loop->next)
164
        {
165
          next = loop->next;
166
          while (next->inner)
167
            next = next->inner;
168
        }
169
      else
170
        next = loop->outer;
171
 
172
      check = true;
173
      /* And perform the appropriate transformations.  */
174
      switch (loop->lpt_decision.decision)
175
        {
176
        case LPT_PEEL_COMPLETELY:
177
          /* Already done.  */
178
          gcc_unreachable ();
179
        case LPT_PEEL_SIMPLE:
180
          peel_loop_simple (loops, loop);
181
          break;
182
        case LPT_UNROLL_CONSTANT:
183
          unroll_loop_constant_iterations (loops, loop);
184
          break;
185
        case LPT_UNROLL_RUNTIME:
186
          unroll_loop_runtime_iterations (loops, loop);
187
          break;
188
        case LPT_UNROLL_STUPID:
189
          unroll_loop_stupid (loops, loop);
190
          break;
191
        case LPT_NONE:
192
          check = false;
193
          break;
194
        default:
195
          gcc_unreachable ();
196
        }
197
      if (check)
198
        {
199
#ifdef ENABLE_CHECKING
200
          verify_dominators (CDI_DOMINATORS);
201
          verify_loop_structure (loops);
202
#endif
203
        }
204
      loop = next;
205
    }
206
 
207
  iv_analysis_done ();
208
}
209
 
210
/* Check whether exit of the LOOP is at the end of loop body.  */
211
 
212
static bool
213
loop_exit_at_end_p (struct loop *loop)
214
{
215
  struct niter_desc *desc = get_simple_loop_desc (loop);
216
  rtx insn;
217
 
218
  if (desc->in_edge->dest != loop->latch)
219
    return false;
220
 
221
  /* Check that the latch is empty.  */
222
  FOR_BB_INSNS (loop->latch, insn)
223
    {
224
      if (INSN_P (insn))
225
        return false;
226
    }
227
 
228
  return true;
229
}
230
 
231
/* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
232
static void
233
peel_loops_completely (struct loops *loops, int flags)
234
{
235
  struct loop *loop;
236
  unsigned i;
237
 
238
  /* Scan the loops, the inner ones first.  */
239
  for (i = loops->num - 1; i > 0; i--)
240
    {
241
      loop = loops->parray[i];
242
      if (!loop)
243
        continue;
244
 
245
      loop->lpt_decision.decision = LPT_NONE;
246
 
247
      if (dump_file)
248
        fprintf (dump_file,
249
                 "\n;; *** Considering loop %d for complete peeling ***\n",
250
                 loop->num);
251
 
252
      loop->ninsns = num_loop_insns (loop);
253
 
254
      decide_peel_once_rolling (loop, flags);
255
      if (loop->lpt_decision.decision == LPT_NONE)
256
        decide_peel_completely (loop, flags);
257
 
258
      if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
259
        {
260
          peel_loop_completely (loops, loop);
261
#ifdef ENABLE_CHECKING
262
          verify_dominators (CDI_DOMINATORS);
263
          verify_loop_structure (loops);
264
#endif
265
        }
266
    }
267
}
268
 
269
/* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
270
static void
271
decide_unrolling_and_peeling (struct loops *loops, int flags)
272
{
273
  struct loop *loop = loops->tree_root, *next;
274
 
275
  while (loop->inner)
276
    loop = loop->inner;
277
 
278
  /* Scan the loops, inner ones first.  */
279
  while (loop != loops->tree_root)
280
    {
281
      if (loop->next)
282
        {
283
          next = loop->next;
284
          while (next->inner)
285
            next = next->inner;
286
        }
287
      else
288
        next = loop->outer;
289
 
290
      loop->lpt_decision.decision = LPT_NONE;
291
 
292
      if (dump_file)
293
        fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
294
 
295
      /* Do not peel cold areas.  */
296
      if (!maybe_hot_bb_p (loop->header))
297
        {
298
          if (dump_file)
299
            fprintf (dump_file, ";; Not considering loop, cold area\n");
300
          loop = next;
301
          continue;
302
        }
303
 
304
      /* Can the loop be manipulated?  */
305
      if (!can_duplicate_loop_p (loop))
306
        {
307
          if (dump_file)
308
            fprintf (dump_file,
309
                     ";; Not considering loop, cannot duplicate\n");
310
          loop = next;
311
          continue;
312
        }
313
 
314
      /* Skip non-innermost loops.  */
315
      if (loop->inner)
316
        {
317
          if (dump_file)
318
            fprintf (dump_file, ";; Not considering loop, is not innermost\n");
319
          loop = next;
320
          continue;
321
        }
322
 
323
      loop->ninsns = num_loop_insns (loop);
324
      loop->av_ninsns = average_num_loop_insns (loop);
325
 
326
      /* Try transformations one by one in decreasing order of
327
         priority.  */
328
 
329
      decide_unroll_constant_iterations (loop, flags);
330
      if (loop->lpt_decision.decision == LPT_NONE)
331
        decide_unroll_runtime_iterations (loop, flags);
332
      if (loop->lpt_decision.decision == LPT_NONE)
333
        decide_unroll_stupid (loop, flags);
334
      if (loop->lpt_decision.decision == LPT_NONE)
335
        decide_peel_simple (loop, flags);
336
 
337
      loop = next;
338
    }
339
}
340
 
341
/* Decide whether the LOOP is once rolling and suitable for complete
342
   peeling.  */
343
static void
344
decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
345
{
346
  struct niter_desc *desc;
347
 
348
  if (dump_file)
349
    fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
350
 
351
  /* Is the loop small enough?  */
352
  if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
353
    {
354
      if (dump_file)
355
        fprintf (dump_file, ";; Not considering loop, is too big\n");
356
      return;
357
    }
358
 
359
  /* Check for simple loops.  */
360
  desc = get_simple_loop_desc (loop);
361
 
362
  /* Check number of iterations.  */
363
  if (!desc->simple_p
364
      || desc->assumptions
365
      || desc->infinite
366
      || !desc->const_iter
367
      || desc->niter != 0)
368
    {
369
      if (dump_file)
370
        fprintf (dump_file,
371
                 ";; Unable to prove that the loop rolls exactly once\n");
372
      return;
373
    }
374
 
375
  /* Success.  */
376
  if (dump_file)
377
    fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
378
  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
379
}
380
 
381
/* Decide whether the LOOP is suitable for complete peeling.  */
382
static void
383
decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
384
{
385
  unsigned npeel;
386
  struct niter_desc *desc;
387
 
388
  if (dump_file)
389
    fprintf (dump_file, "\n;; Considering peeling completely\n");
390
 
391
  /* Skip non-innermost loops.  */
392
  if (loop->inner)
393
    {
394
      if (dump_file)
395
        fprintf (dump_file, ";; Not considering loop, is not innermost\n");
396
      return;
397
    }
398
 
399
  /* Do not peel cold areas.  */
400
  if (!maybe_hot_bb_p (loop->header))
401
    {
402
      if (dump_file)
403
        fprintf (dump_file, ";; Not considering loop, cold area\n");
404
      return;
405
    }
406
 
407
  /* Can the loop be manipulated?  */
408
  if (!can_duplicate_loop_p (loop))
409
    {
410
      if (dump_file)
411
        fprintf (dump_file,
412
                 ";; Not considering loop, cannot duplicate\n");
413
      return;
414
    }
415
 
416
  /* npeel = number of iterations to peel.  */
417
  npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
418
  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
419
    npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
420
 
421
  /* Is the loop small enough?  */
422
  if (!npeel)
423
    {
424
      if (dump_file)
425
        fprintf (dump_file, ";; Not considering loop, is too big\n");
426
      return;
427
    }
428
 
429
  /* Check for simple loops.  */
430
  desc = get_simple_loop_desc (loop);
431
 
432
  /* Check number of iterations.  */
433
  if (!desc->simple_p
434
      || desc->assumptions
435
      || !desc->const_iter
436
      || desc->infinite)
437
    {
438
      if (dump_file)
439
        fprintf (dump_file,
440
                 ";; Unable to prove that the loop iterates constant times\n");
441
      return;
442
    }
443
 
444
  if (desc->niter > npeel - 1)
445
    {
446
      if (dump_file)
447
        {
448
          fprintf (dump_file,
449
                   ";; Not peeling loop completely, rolls too much (");
450
          fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
451
          fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
452
        }
453
      return;
454
    }
455
 
456
  /* Success.  */
457
  if (dump_file)
458
    fprintf (dump_file, ";; Decided to peel loop completely\n");
459
  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
460
}
461
 
462
/* Peel all iterations of LOOP, remove exit edges and cancel the loop
463
   completely.  The transformation done:
464
 
465
   for (i = 0; i < 4; i++)
466
     body;
467
 
468
   ==>
469
 
470
   i = 0;
471
   body; i++;
472
   body; i++;
473
   body; i++;
474
   body; i++;
475
   */
476
static void
477
peel_loop_completely (struct loops *loops, struct loop *loop)
478
{
479
  sbitmap wont_exit;
480
  unsigned HOST_WIDE_INT npeel;
481
  unsigned n_remove_edges, i;
482
  edge *remove_edges, ein;
483
  struct niter_desc *desc = get_simple_loop_desc (loop);
484
  struct opt_info *opt_info = NULL;
485
 
486
  npeel = desc->niter;
487
 
488
  if (npeel)
489
    {
490
      bool ok;
491
 
492
      wont_exit = sbitmap_alloc (npeel + 1);
493
      sbitmap_ones (wont_exit);
494
      RESET_BIT (wont_exit, 0);
495
      if (desc->noloop_assumptions)
496
        RESET_BIT (wont_exit, 1);
497
 
498
      remove_edges = XCNEWVEC (edge, npeel);
499
      n_remove_edges = 0;
500
 
501
      if (flag_split_ivs_in_unroller)
502
        opt_info = analyze_insns_in_loop (loop);
503
 
504
      opt_info_start_duplication (opt_info);
505
      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
506
                                          loops, npeel,
507
                                          wont_exit, desc->out_edge,
508
                                          remove_edges, &n_remove_edges,
509
                                          DLTHE_FLAG_UPDATE_FREQ
510
                                          | DLTHE_FLAG_COMPLETTE_PEEL
511
                                          | (opt_info
512
                                             ? DLTHE_RECORD_COPY_NUMBER : 0));
513
      gcc_assert (ok);
514
 
515
      free (wont_exit);
516
 
517
      if (opt_info)
518
        {
519
          apply_opt_in_copies (opt_info, npeel, false, true);
520
          free_opt_info (opt_info);
521
        }
522
 
523
      /* Remove the exit edges.  */
524
      for (i = 0; i < n_remove_edges; i++)
525
        remove_path (loops, remove_edges[i]);
526
      free (remove_edges);
527
    }
528
 
529
  ein = desc->in_edge;
530
  free_simple_loop_desc (loop);
531
 
532
  /* Now remove the unreachable part of the last iteration and cancel
533
     the loop.  */
534
  remove_path (loops, ein);
535
 
536
  if (dump_file)
537
    fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
538
}
539
 
540
/* Decide whether to unroll LOOP iterating constant number of times
541
   and how much.  */
542
 
543
static void
544
decide_unroll_constant_iterations (struct loop *loop, int flags)
545
{
546
  unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
547
  struct niter_desc *desc;
548
 
549
  if (!(flags & UAP_UNROLL))
550
    {
551
      /* We were not asked to, just return back silently.  */
552
      return;
553
    }
554
 
555
  if (dump_file)
556
    fprintf (dump_file,
557
             "\n;; Considering unrolling loop with constant "
558
             "number of iterations\n");
559
 
560
  /* nunroll = total number of copies of the original loop body in
561
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
562
  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
563
  nunroll_by_av
564
    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
565
  if (nunroll > nunroll_by_av)
566
    nunroll = nunroll_by_av;
567
  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
568
    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
569
 
570
  /* Skip big loops.  */
571
  if (nunroll <= 1)
572
    {
573
      if (dump_file)
574
        fprintf (dump_file, ";; Not considering loop, is too big\n");
575
      return;
576
    }
577
 
578
  /* Check for simple loops.  */
579
  desc = get_simple_loop_desc (loop);
580
 
581
  /* Check number of iterations.  */
582
  if (!desc->simple_p || !desc->const_iter || desc->assumptions)
583
    {
584
      if (dump_file)
585
        fprintf (dump_file,
586
                 ";; Unable to prove that the loop iterates constant times\n");
587
      return;
588
    }
589
 
590
  /* Check whether the loop rolls enough to consider.  */
591
  if (desc->niter < 2 * nunroll)
592
    {
593
      if (dump_file)
594
        fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
595
      return;
596
    }
597
 
598
  /* Success; now compute number of iterations to unroll.  We alter
599
     nunroll so that as few as possible copies of loop body are
600
     necessary, while still not decreasing the number of unrollings
601
     too much (at most by 1).  */
602
  best_copies = 2 * nunroll + 10;
603
 
604
  i = 2 * nunroll + 2;
605
  if (i - 1 >= desc->niter)
606
    i = desc->niter - 2;
607
 
608
  for (; i >= nunroll - 1; i--)
609
    {
610
      unsigned exit_mod = desc->niter % (i + 1);
611
 
612
      if (!loop_exit_at_end_p (loop))
613
        n_copies = exit_mod + i + 1;
614
      else if (exit_mod != (unsigned) i
615
               || desc->noloop_assumptions != NULL_RTX)
616
        n_copies = exit_mod + i + 2;
617
      else
618
        n_copies = i + 1;
619
 
620
      if (n_copies < best_copies)
621
        {
622
          best_copies = n_copies;
623
          best_unroll = i;
624
        }
625
    }
626
 
627
  if (dump_file)
628
    fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
629
             best_unroll + 1, best_copies, nunroll);
630
 
631
  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
632
  loop->lpt_decision.times = best_unroll;
633
 
634
  if (dump_file)
635
    fprintf (dump_file,
636
             ";; Decided to unroll the constant times rolling loop, %d times.\n",
637
             loop->lpt_decision.times);
638
}
639
 
640
/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
641
   times.  The transformation does this:
642
 
643
   for (i = 0; i < 102; i++)
644
     body;
645
 
646
   ==>
647
 
648
   i = 0;
649
   body; i++;
650
   body; i++;
651
   while (i < 102)
652
     {
653
       body; i++;
654
       body; i++;
655
       body; i++;
656
       body; i++;
657
     }
658
  */
659
static void
660
unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
661
{
662
  unsigned HOST_WIDE_INT niter;
663
  unsigned exit_mod;
664
  sbitmap wont_exit;
665
  unsigned n_remove_edges, i;
666
  edge *remove_edges;
667
  unsigned max_unroll = loop->lpt_decision.times;
668
  struct niter_desc *desc = get_simple_loop_desc (loop);
669
  bool exit_at_end = loop_exit_at_end_p (loop);
670
  struct opt_info *opt_info = NULL;
671
  bool ok;
672
 
673
  niter = desc->niter;
674
 
675
  /* Should not get here (such loop should be peeled instead).  */
676
  gcc_assert (niter > max_unroll + 1);
677
 
678
  exit_mod = niter % (max_unroll + 1);
679
 
680
  wont_exit = sbitmap_alloc (max_unroll + 1);
681
  sbitmap_ones (wont_exit);
682
 
683
  remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
684
  n_remove_edges = 0;
685
  if (flag_split_ivs_in_unroller
686
      || flag_variable_expansion_in_unroller)
687
    opt_info = analyze_insns_in_loop (loop);
688
 
689
  if (!exit_at_end)
690
    {
691
      /* The exit is not at the end of the loop; leave exit test
692
         in the first copy, so that the loops that start with test
693
         of exit condition have continuous body after unrolling.  */
694
 
695
      if (dump_file)
696
        fprintf (dump_file, ";; Condition on beginning of loop.\n");
697
 
698
      /* Peel exit_mod iterations.  */
699
      RESET_BIT (wont_exit, 0);
700
      if (desc->noloop_assumptions)
701
        RESET_BIT (wont_exit, 1);
702
 
703
      if (exit_mod)
704
        {
705
          opt_info_start_duplication (opt_info);
706
          ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
707
                                              loops, exit_mod,
708
                                              wont_exit, desc->out_edge,
709
                                              remove_edges, &n_remove_edges,
710
                                              DLTHE_FLAG_UPDATE_FREQ
711
                                              | (opt_info && exit_mod > 1
712
                                                 ? DLTHE_RECORD_COPY_NUMBER
713
                                                   : 0));
714
          gcc_assert (ok);
715
 
716
          if (opt_info && exit_mod > 1)
717
            apply_opt_in_copies (opt_info, exit_mod, false, false);
718
 
719
          desc->noloop_assumptions = NULL_RTX;
720
          desc->niter -= exit_mod;
721
          desc->niter_max -= exit_mod;
722
        }
723
 
724
      SET_BIT (wont_exit, 1);
725
    }
726
  else
727
    {
728
      /* Leave exit test in last copy, for the same reason as above if
729
         the loop tests the condition at the end of loop body.  */
730
 
731
      if (dump_file)
732
        fprintf (dump_file, ";; Condition on end of loop.\n");
733
 
734
      /* We know that niter >= max_unroll + 2; so we do not need to care of
735
         case when we would exit before reaching the loop.  So just peel
736
         exit_mod + 1 iterations.  */
737
      if (exit_mod != max_unroll
738
          || desc->noloop_assumptions)
739
        {
740
          RESET_BIT (wont_exit, 0);
741
          if (desc->noloop_assumptions)
742
            RESET_BIT (wont_exit, 1);
743
 
744
          opt_info_start_duplication (opt_info);
745
          ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
746
                                              loops, exit_mod + 1,
747
                                              wont_exit, desc->out_edge,
748
                                              remove_edges, &n_remove_edges,
749
                                              DLTHE_FLAG_UPDATE_FREQ
750
                                              | (opt_info && exit_mod > 0
751
                                                 ? DLTHE_RECORD_COPY_NUMBER
752
                                                   : 0));
753
          gcc_assert (ok);
754
 
755
          if (opt_info && exit_mod > 0)
756
            apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
757
 
758
          desc->niter -= exit_mod + 1;
759
          desc->niter_max -= exit_mod + 1;
760
          desc->noloop_assumptions = NULL_RTX;
761
 
762
          SET_BIT (wont_exit, 0);
763
          SET_BIT (wont_exit, 1);
764
        }
765
 
766
      RESET_BIT (wont_exit, max_unroll);
767
    }
768
 
769
  /* Now unroll the loop.  */
770
 
771
  opt_info_start_duplication (opt_info);
772
  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
773
                                      loops, max_unroll,
774
                                      wont_exit, desc->out_edge,
775
                                      remove_edges, &n_remove_edges,
776
                                      DLTHE_FLAG_UPDATE_FREQ
777
                                      | (opt_info
778
                                         ? DLTHE_RECORD_COPY_NUMBER
779
                                           : 0));
780
  gcc_assert (ok);
781
 
782
  if (opt_info)
783
    {
784
      apply_opt_in_copies (opt_info, max_unroll, true, true);
785
      free_opt_info (opt_info);
786
    }
787
 
788
  free (wont_exit);
789
 
790
  if (exit_at_end)
791
    {
792
      basic_block exit_block = get_bb_copy (desc->in_edge->src);
793
      /* Find a new in and out edge; they are in the last copy we have made.  */
794
 
795
      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
796
        {
797
          desc->out_edge = EDGE_SUCC (exit_block, 0);
798
          desc->in_edge = EDGE_SUCC (exit_block, 1);
799
        }
800
      else
801
        {
802
          desc->out_edge = EDGE_SUCC (exit_block, 1);
803
          desc->in_edge = EDGE_SUCC (exit_block, 0);
804
        }
805
    }
806
 
807
  desc->niter /= max_unroll + 1;
808
  desc->niter_max /= max_unroll + 1;
809
  desc->niter_expr = GEN_INT (desc->niter);
810
 
811
  /* Remove the edges.  */
812
  for (i = 0; i < n_remove_edges; i++)
813
    remove_path (loops, remove_edges[i]);
814
  free (remove_edges);
815
 
816
  if (dump_file)
817
    fprintf (dump_file,
818
             ";; Unrolled loop %d times, constant # of iterations %i insns\n",
819
             max_unroll, num_loop_insns (loop));
820
}
821
 
822
/* Decide whether to unroll LOOP iterating runtime computable number of times
823
   and how much.  */
824
static void
825
decide_unroll_runtime_iterations (struct loop *loop, int flags)
826
{
827
  unsigned nunroll, nunroll_by_av, i;
828
  struct niter_desc *desc;
829
 
830
  if (!(flags & UAP_UNROLL))
831
    {
832
      /* We were not asked to, just return back silently.  */
833
      return;
834
    }
835
 
836
  if (dump_file)
837
    fprintf (dump_file,
838
             "\n;; Considering unrolling loop with runtime "
839
             "computable number of iterations\n");
840
 
841
  /* nunroll = total number of copies of the original loop body in
842
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
843
  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
844
  nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
845
  if (nunroll > nunroll_by_av)
846
    nunroll = nunroll_by_av;
847
  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
848
    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
849
 
850
  /* Skip big loops.  */
851
  if (nunroll <= 1)
852
    {
853
      if (dump_file)
854
        fprintf (dump_file, ";; Not considering loop, is too big\n");
855
      return;
856
    }
857
 
858
  /* Check for simple loops.  */
859
  desc = get_simple_loop_desc (loop);
860
 
861
  /* Check simpleness.  */
862
  if (!desc->simple_p || desc->assumptions)
863
    {
864
      if (dump_file)
865
        fprintf (dump_file,
866
                 ";; Unable to prove that the number of iterations "
867
                 "can be counted in runtime\n");
868
      return;
869
    }
870
 
871
  if (desc->const_iter)
872
    {
873
      if (dump_file)
874
        fprintf (dump_file, ";; Loop iterates constant times\n");
875
      return;
876
    }
877
 
878
  /* If we have profile feedback, check whether the loop rolls.  */
879
  if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
880
    {
881
      if (dump_file)
882
        fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
883
      return;
884
    }
885
 
886
  /* Success; now force nunroll to be power of 2, as we are unable to
887
     cope with overflows in computation of number of iterations.  */
888
  for (i = 1; 2 * i <= nunroll; i *= 2)
889
    continue;
890
 
891
  loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
892
  loop->lpt_decision.times = i - 1;
893
 
894
  if (dump_file)
895
    fprintf (dump_file,
896
             ";; Decided to unroll the runtime computable "
897
             "times rolling loop, %d times.\n",
898
             loop->lpt_decision.times);
899
}
900
 
901
/* Unroll LOOP for that we are able to count number of iterations in runtime
902
   LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
903
   extra care for case n < 0):
904
 
905
   for (i = 0; i < n; i++)
906
     body;
907
 
908
   ==>
909
 
910
   i = 0;
911
   mod = n % 4;
912
 
913
   switch (mod)
914
     {
915
       case 3:
916
         body; i++;
917
       case 2:
918
         body; i++;
919
       case 1:
920
         body; i++;
921
       case 0: ;
922
     }
923
 
924
   while (i < n)
925
     {
926
       body; i++;
927
       body; i++;
928
       body; i++;
929
       body; i++;
930
     }
931
   */
932
static void
933
unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
934
{
935
  rtx old_niter, niter, init_code, branch_code, tmp;
936
  unsigned i, j, p;
937
  basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
938
  unsigned n_dom_bbs;
939
  sbitmap wont_exit;
940
  int may_exit_copy;
941
  unsigned n_peel, n_remove_edges;
942
  edge *remove_edges, e;
943
  bool extra_zero_check, last_may_exit;
944
  unsigned max_unroll = loop->lpt_decision.times;
945
  struct niter_desc *desc = get_simple_loop_desc (loop);
946
  bool exit_at_end = loop_exit_at_end_p (loop);
947
  struct opt_info *opt_info = NULL;
948
  bool ok;
949
 
950
  if (flag_split_ivs_in_unroller
951
      || flag_variable_expansion_in_unroller)
952
    opt_info = analyze_insns_in_loop (loop);
953
 
954
  /* Remember blocks whose dominators will have to be updated.  */
955
  dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
956
  n_dom_bbs = 0;
957
 
958
  body = get_loop_body (loop);
959
  for (i = 0; i < loop->num_nodes; i++)
960
    {
961
      unsigned nldom;
962
      basic_block *ldom;
963
 
964
      nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
965
      for (j = 0; j < nldom; j++)
966
        if (!flow_bb_inside_loop_p (loop, ldom[j]))
967
          dom_bbs[n_dom_bbs++] = ldom[j];
968
 
969
      free (ldom);
970
    }
971
  free (body);
972
 
973
  if (!exit_at_end)
974
    {
975
      /* Leave exit in first copy (for explanation why see comment in
976
         unroll_loop_constant_iterations).  */
977
      may_exit_copy = 0;
978
      n_peel = max_unroll - 1;
979
      extra_zero_check = true;
980
      last_may_exit = false;
981
    }
982
  else
983
    {
984
      /* Leave exit in last copy (for explanation why see comment in
985
         unroll_loop_constant_iterations).  */
986
      may_exit_copy = max_unroll;
987
      n_peel = max_unroll;
988
      extra_zero_check = false;
989
      last_may_exit = true;
990
    }
991
 
992
  /* Get expression for number of iterations.  */
993
  start_sequence ();
994
  old_niter = niter = gen_reg_rtx (desc->mode);
995
  tmp = force_operand (copy_rtx (desc->niter_expr), niter);
996
  if (tmp != niter)
997
    emit_move_insn (niter, tmp);
998
 
999
  /* Count modulo by ANDing it with max_unroll; we use the fact that
1000
     the number of unrollings is a power of two, and thus this is correct
1001
     even if there is overflow in the computation.  */
1002
  niter = expand_simple_binop (desc->mode, AND,
1003
                               niter,
1004
                               GEN_INT (max_unroll),
1005
                               NULL_RTX, 0, OPTAB_LIB_WIDEN);
1006
 
1007
  init_code = get_insns ();
1008
  end_sequence ();
1009
 
1010
  /* Precondition the loop.  */
1011
  loop_split_edge_with (loop_preheader_edge (loop), init_code);
1012
 
1013
  remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
1014
  n_remove_edges = 0;
1015
 
1016
  wont_exit = sbitmap_alloc (max_unroll + 2);
1017
 
1018
  /* Peel the first copy of loop body (almost always we must leave exit test
1019
     here; the only exception is when we have extra zero check and the number
1020
     of iterations is reliable.  Also record the place of (possible) extra
1021
     zero check.  */
1022
  sbitmap_zero (wont_exit);
1023
  if (extra_zero_check
1024
      && !desc->noloop_assumptions)
1025
    SET_BIT (wont_exit, 1);
1026
  ezc_swtch = loop_preheader_edge (loop)->src;
1027
  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1028
                                      loops, 1,
1029
                                      wont_exit, desc->out_edge,
1030
                                      remove_edges, &n_remove_edges,
1031
                                      DLTHE_FLAG_UPDATE_FREQ);
1032
  gcc_assert (ok);
1033
 
1034
  /* Record the place where switch will be built for preconditioning.  */
1035
  swtch = loop_split_edge_with (loop_preheader_edge (loop),
1036
                                NULL_RTX);
1037
 
1038
  for (i = 0; i < n_peel; i++)
1039
    {
1040
      /* Peel the copy.  */
1041
      sbitmap_zero (wont_exit);
1042
      if (i != n_peel - 1 || !last_may_exit)
1043
        SET_BIT (wont_exit, 1);
1044
      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1045
                                          loops, 1,
1046
                                          wont_exit, desc->out_edge,
1047
                                          remove_edges, &n_remove_edges,
1048
                                          DLTHE_FLAG_UPDATE_FREQ);
1049
      gcc_assert (ok);
1050
 
1051
      /* Create item for switch.  */
1052
      j = n_peel - i - (extra_zero_check ? 0 : 1);
1053
      p = REG_BR_PROB_BASE / (i + 2);
1054
 
1055
      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1056
      branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1057
                                          block_label (preheader), p,
1058
                                          NULL_RTX);
1059
 
1060
      swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1061
      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1062
      single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1063
      e = make_edge (swtch, preheader,
1064
                     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1065
      e->probability = p;
1066
    }
1067
 
1068
  if (extra_zero_check)
1069
    {
1070
      /* Add branch for zero iterations.  */
1071
      p = REG_BR_PROB_BASE / (max_unroll + 1);
1072
      swtch = ezc_swtch;
1073
      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1074
      branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1075
                                          block_label (preheader), p,
1076
                                          NULL_RTX);
1077
 
1078
      swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1079
      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1080
      single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1081
      e = make_edge (swtch, preheader,
1082
                     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1083
      e->probability = p;
1084
    }
1085
 
1086
  /* Recount dominators for outer blocks.  */
1087
  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1088
 
1089
  /* And unroll loop.  */
1090
 
1091
  sbitmap_ones (wont_exit);
1092
  RESET_BIT (wont_exit, may_exit_copy);
1093
  opt_info_start_duplication (opt_info);
1094
 
1095
  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1096
                                      loops, max_unroll,
1097
                                      wont_exit, desc->out_edge,
1098
                                      remove_edges, &n_remove_edges,
1099
                                      DLTHE_FLAG_UPDATE_FREQ
1100
                                      | (opt_info
1101
                                         ? DLTHE_RECORD_COPY_NUMBER
1102
                                           : 0));
1103
  gcc_assert (ok);
1104
 
1105
  if (opt_info)
1106
    {
1107
      apply_opt_in_copies (opt_info, max_unroll, true, true);
1108
      free_opt_info (opt_info);
1109
    }
1110
 
1111
  free (wont_exit);
1112
 
1113
  if (exit_at_end)
1114
    {
1115
      basic_block exit_block = get_bb_copy (desc->in_edge->src);
1116
      /* Find a new in and out edge; they are in the last copy we have
1117
         made.  */
1118
 
1119
      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1120
        {
1121
          desc->out_edge = EDGE_SUCC (exit_block, 0);
1122
          desc->in_edge = EDGE_SUCC (exit_block, 1);
1123
        }
1124
      else
1125
        {
1126
          desc->out_edge = EDGE_SUCC (exit_block, 1);
1127
          desc->in_edge = EDGE_SUCC (exit_block, 0);
1128
        }
1129
    }
1130
 
1131
  /* Remove the edges.  */
1132
  for (i = 0; i < n_remove_edges; i++)
1133
    remove_path (loops, remove_edges[i]);
1134
  free (remove_edges);
1135
 
1136
  /* We must be careful when updating the number of iterations due to
1137
     preconditioning and the fact that the value must be valid at entry
1138
     of the loop.  After passing through the above code, we see that
1139
     the correct new number of iterations is this:  */
1140
  gcc_assert (!desc->const_iter);
1141
  desc->niter_expr =
1142
    simplify_gen_binary (UDIV, desc->mode, old_niter,
1143
                         GEN_INT (max_unroll + 1));
1144
  desc->niter_max /= max_unroll + 1;
1145
  if (exit_at_end)
1146
    {
1147
      desc->niter_expr =
1148
        simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1149
      desc->noloop_assumptions = NULL_RTX;
1150
      desc->niter_max--;
1151
    }
1152
 
1153
  if (dump_file)
1154
    fprintf (dump_file,
1155
             ";; Unrolled loop %d times, counting # of iterations "
1156
             "in runtime, %i insns\n",
1157
             max_unroll, num_loop_insns (loop));
1158
 
1159
  if (dom_bbs)
1160
    free (dom_bbs);
1161
}
1162
 
1163
/* Decide whether to simply peel LOOP and how much.  */
1164
static void
1165
decide_peel_simple (struct loop *loop, int flags)
1166
{
1167
  unsigned npeel;
1168
  struct niter_desc *desc;
1169
 
1170
  if (!(flags & UAP_PEEL))
1171
    {
1172
      /* We were not asked to, just return back silently.  */
1173
      return;
1174
    }
1175
 
1176
  if (dump_file)
1177
    fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1178
 
1179
  /* npeel = number of iterations to peel.  */
1180
  npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1181
  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1182
    npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1183
 
1184
  /* Skip big loops.  */
1185
  if (!npeel)
1186
    {
1187
      if (dump_file)
1188
        fprintf (dump_file, ";; Not considering loop, is too big\n");
1189
      return;
1190
    }
1191
 
1192
  /* Check for simple loops.  */
1193
  desc = get_simple_loop_desc (loop);
1194
 
1195
  /* Check number of iterations.  */
1196
  if (desc->simple_p && !desc->assumptions && desc->const_iter)
1197
    {
1198
      if (dump_file)
1199
        fprintf (dump_file, ";; Loop iterates constant times\n");
1200
      return;
1201
    }
1202
 
1203
  /* Do not simply peel loops with branches inside -- it increases number
1204
     of mispredicts.  */
1205
  if (num_loop_branches (loop) > 1)
1206
    {
1207
      if (dump_file)
1208
        fprintf (dump_file, ";; Not peeling, contains branches\n");
1209
      return;
1210
    }
1211
 
1212
  if (loop->header->count)
1213
    {
1214
      unsigned niter = expected_loop_iterations (loop);
1215
      if (niter + 1 > npeel)
1216
        {
1217
          if (dump_file)
1218
            {
1219
              fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1220
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1221
                       (HOST_WIDEST_INT) (niter + 1));
1222
              fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1223
                       npeel);
1224
            }
1225
          return;
1226
        }
1227
      npeel = niter + 1;
1228
    }
1229
  else
1230
    {
1231
      /* For now we have no good heuristics to decide whether loop peeling
1232
         will be effective, so disable it.  */
1233
      if (dump_file)
1234
        fprintf (dump_file,
1235
                 ";; Not peeling loop, no evidence it will be profitable\n");
1236
      return;
1237
    }
1238
 
1239
  /* Success.  */
1240
  loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1241
  loop->lpt_decision.times = npeel;
1242
 
1243
  if (dump_file)
1244
    fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1245
             loop->lpt_decision.times);
1246
}
1247
 
1248
/* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1249
   while (cond)
1250
     body;
1251
 
1252
   ==>
1253
 
1254
   if (!cond) goto end;
1255
   body;
1256
   if (!cond) goto end;
1257
   body;
1258
   while (cond)
1259
     body;
1260
   end: ;
1261
   */
1262
static void
1263
peel_loop_simple (struct loops *loops, struct loop *loop)
1264
{
1265
  sbitmap wont_exit;
1266
  unsigned npeel = loop->lpt_decision.times;
1267
  struct niter_desc *desc = get_simple_loop_desc (loop);
1268
  struct opt_info *opt_info = NULL;
1269
  bool ok;
1270
 
1271
  if (flag_split_ivs_in_unroller && npeel > 1)
1272
    opt_info = analyze_insns_in_loop (loop);
1273
 
1274
  wont_exit = sbitmap_alloc (npeel + 1);
1275
  sbitmap_zero (wont_exit);
1276
 
1277
  opt_info_start_duplication (opt_info);
1278
 
1279
  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1280
                                      loops, npeel, wont_exit,
1281
                                      NULL, NULL,
1282
                                      NULL, DLTHE_FLAG_UPDATE_FREQ
1283
                                      | (opt_info
1284
                                         ? DLTHE_RECORD_COPY_NUMBER
1285
                                           : 0));
1286
  gcc_assert (ok);
1287
 
1288
  free (wont_exit);
1289
 
1290
  if (opt_info)
1291
    {
1292
      apply_opt_in_copies (opt_info, npeel, false, false);
1293
      free_opt_info (opt_info);
1294
    }
1295
 
1296
  if (desc->simple_p)
1297
    {
1298
      if (desc->const_iter)
1299
        {
1300
          desc->niter -= npeel;
1301
          desc->niter_expr = GEN_INT (desc->niter);
1302
          desc->noloop_assumptions = NULL_RTX;
1303
        }
1304
      else
1305
        {
1306
          /* We cannot just update niter_expr, as its value might be clobbered
1307
             inside loop.  We could handle this by counting the number into
1308
             temporary just like we do in runtime unrolling, but it does not
1309
             seem worthwhile.  */
1310
          free_simple_loop_desc (loop);
1311
        }
1312
    }
1313
  if (dump_file)
1314
    fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1315
}
1316
 
1317
/* Decide whether to unroll LOOP stupidly and how much.  */
1318
static void
1319
decide_unroll_stupid (struct loop *loop, int flags)
1320
{
1321
  unsigned nunroll, nunroll_by_av, i;
1322
  struct niter_desc *desc;
1323
 
1324
  if (!(flags & UAP_UNROLL_ALL))
1325
    {
1326
      /* We were not asked to, just return back silently.  */
1327
      return;
1328
    }
1329
 
1330
  if (dump_file)
1331
    fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1332
 
1333
  /* nunroll = total number of copies of the original loop body in
1334
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1335
  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1336
  nunroll_by_av
1337
    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1338
  if (nunroll > nunroll_by_av)
1339
    nunroll = nunroll_by_av;
1340
  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1341
    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1342
 
1343
  /* Skip big loops.  */
1344
  if (nunroll <= 1)
1345
    {
1346
      if (dump_file)
1347
        fprintf (dump_file, ";; Not considering loop, is too big\n");
1348
      return;
1349
    }
1350
 
1351
  /* Check for simple loops.  */
1352
  desc = get_simple_loop_desc (loop);
1353
 
1354
  /* Check simpleness.  */
1355
  if (desc->simple_p && !desc->assumptions)
1356
    {
1357
      if (dump_file)
1358
        fprintf (dump_file, ";; The loop is simple\n");
1359
      return;
1360
    }
1361
 
1362
  /* Do not unroll loops with branches inside -- it increases number
1363
     of mispredicts.  */
1364
  if (num_loop_branches (loop) > 1)
1365
    {
1366
      if (dump_file)
1367
        fprintf (dump_file, ";; Not unrolling, contains branches\n");
1368
      return;
1369
    }
1370
 
1371
  /* If we have profile feedback, check whether the loop rolls.  */
1372
  if (loop->header->count
1373
      && expected_loop_iterations (loop) < 2 * nunroll)
1374
    {
1375
      if (dump_file)
1376
        fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1377
      return;
1378
    }
1379
 
1380
  /* Success.  Now force nunroll to be power of 2, as it seems that this
1381
     improves results (partially because of better alignments, partially
1382
     because of some dark magic).  */
1383
  for (i = 1; 2 * i <= nunroll; i *= 2)
1384
    continue;
1385
 
1386
  loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1387
  loop->lpt_decision.times = i - 1;
1388
 
1389
  if (dump_file)
1390
    fprintf (dump_file,
1391
             ";; Decided to unroll the loop stupidly, %d times.\n",
1392
             loop->lpt_decision.times);
1393
}
1394
 
1395
/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1396
   while (cond)
1397
     body;
1398
 
1399
   ==>
1400
 
1401
   while (cond)
1402
     {
1403
       body;
1404
       if (!cond) break;
1405
       body;
1406
       if (!cond) break;
1407
       body;
1408
       if (!cond) break;
1409
       body;
1410
     }
1411
   */
1412
static void
1413
unroll_loop_stupid (struct loops *loops, struct loop *loop)
1414
{
1415
  sbitmap wont_exit;
1416
  unsigned nunroll = loop->lpt_decision.times;
1417
  struct niter_desc *desc = get_simple_loop_desc (loop);
1418
  struct opt_info *opt_info = NULL;
1419
  bool ok;
1420
 
1421
  if (flag_split_ivs_in_unroller
1422
      || flag_variable_expansion_in_unroller)
1423
    opt_info = analyze_insns_in_loop (loop);
1424
 
1425
 
1426
  wont_exit = sbitmap_alloc (nunroll + 1);
1427
  sbitmap_zero (wont_exit);
1428
  opt_info_start_duplication (opt_info);
1429
 
1430
  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1431
                                      loops, nunroll, wont_exit,
1432
                                      NULL, NULL, NULL,
1433
                                      DLTHE_FLAG_UPDATE_FREQ
1434
                                      | (opt_info
1435
                                         ? DLTHE_RECORD_COPY_NUMBER
1436
                                           : 0));
1437
  gcc_assert (ok);
1438
 
1439
  if (opt_info)
1440
    {
1441
      apply_opt_in_copies (opt_info, nunroll, true, true);
1442
      free_opt_info (opt_info);
1443
    }
1444
 
1445
  free (wont_exit);
1446
 
1447
  if (desc->simple_p)
1448
    {
1449
      /* We indeed may get here provided that there are nontrivial assumptions
1450
         for a loop to be really simple.  We could update the counts, but the
1451
         problem is that we are unable to decide which exit will be taken
1452
         (not really true in case the number of iterations is constant,
1453
         but noone will do anything with this information, so we do not
1454
         worry about it).  */
1455
      desc->simple_p = false;
1456
    }
1457
 
1458
  if (dump_file)
1459
    fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1460
             nunroll, num_loop_insns (loop));
1461
}
1462
 
1463
/* A hash function for information about insns to split.  */
1464
 
1465
static hashval_t
1466
si_info_hash (const void *ivts)
1467
{
1468
  return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn);
1469
}
1470
 
1471
/* An equality functions for information about insns to split.  */
1472
 
1473
static int
1474
si_info_eq (const void *ivts1, const void *ivts2)
1475
{
1476
  const struct iv_to_split *i1 = ivts1;
1477
  const struct iv_to_split *i2 = ivts2;
1478
 
1479
  return i1->insn == i2->insn;
1480
}
1481
 
1482
/* Return a hash for VES, which is really a "var_to_expand *".  */
1483
 
1484
static hashval_t
1485
ve_info_hash (const void *ves)
1486
{
1487
  return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn);
1488
}
1489
 
1490
/* Return true if IVTS1 and IVTS2 (which are really both of type
1491
   "var_to_expand *") refer to the same instruction.  */
1492
 
1493
static int
1494
ve_info_eq (const void *ivts1, const void *ivts2)
1495
{
1496
  const struct var_to_expand *i1 = ivts1;
1497
  const struct var_to_expand *i2 = ivts2;
1498
 
1499
  return i1->insn == i2->insn;
1500
}
1501
 
1502
/* Returns true if REG is referenced in one insn in LOOP.  */
1503
 
1504
bool
1505
referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1506
{
1507
  basic_block *body, bb;
1508
  unsigned i;
1509
  int count_ref = 0;
1510
  rtx insn;
1511
 
1512
  body = get_loop_body (loop);
1513
  for (i = 0; i < loop->num_nodes; i++)
1514
    {
1515
      bb = body[i];
1516
 
1517
      FOR_BB_INSNS (bb, insn)
1518
      {
1519
        if (rtx_referenced_p (reg, insn))
1520
          count_ref++;
1521
      }
1522
    }
1523
  return (count_ref  == 1);
1524
}
1525
 
1526
/* Determine whether INSN contains an accumulator
1527
   which can be expanded into separate copies,
1528
   one for each copy of the LOOP body.
1529
 
1530
   for (i = 0 ; i < n; i++)
1531
     sum += a[i];
1532
 
1533
   ==>
1534
 
1535
   sum += a[i]
1536
   ....
1537
   i = i+1;
1538
   sum1 += a[i]
1539
   ....
1540
   i = i+1
1541
   sum2 += a[i];
1542
   ....
1543
 
1544
   Return NULL if INSN contains no opportunity for expansion of accumulator.
1545
   Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1546
   information and return a pointer to it.
1547
*/
1548
 
1549
static struct var_to_expand *
1550
analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1551
{
1552
  rtx set, dest, src, op1;
1553
  struct var_to_expand *ves;
1554
  enum machine_mode mode1, mode2;
1555
 
1556
  set = single_set (insn);
1557
  if (!set)
1558
    return NULL;
1559
 
1560
  dest = SET_DEST (set);
1561
  src = SET_SRC (set);
1562
 
1563
  if (GET_CODE (src) != PLUS
1564
      && GET_CODE (src) != MINUS
1565
      && GET_CODE (src) != MULT)
1566
    return NULL;
1567
 
1568
  /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1569
     in MD.  But if there is no optab to generate the insn, we can not
1570
     perform the variable expansion.  This can happen if an MD provides
1571
     an insn but not a named pattern to generate it, for example to avoid
1572
     producing code that needs additional mode switches like for x87/mmx.
1573
 
1574
     So we check have_insn_for which looks for an optab for the operation
1575
     in SRC.  If it doesn't exist, we can't perform the expansion even
1576
     though INSN is valid.  */
1577
  if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1578
    return NULL;
1579
 
1580
  if (!XEXP (src, 0))
1581
    return NULL;
1582
 
1583
  op1 = XEXP (src, 0);
1584
 
1585
  if (!REG_P (dest)
1586
      && !(GET_CODE (dest) == SUBREG
1587
           && REG_P (SUBREG_REG (dest))))
1588
    return NULL;
1589
 
1590
  if (!rtx_equal_p (dest, op1))
1591
    return NULL;
1592
 
1593
  if (!referenced_in_one_insn_in_loop_p (loop, dest))
1594
    return NULL;
1595
 
1596
  if (rtx_referenced_p (dest, XEXP (src, 1)))
1597
    return NULL;
1598
 
1599
  mode1 = GET_MODE (dest);
1600
  mode2 = GET_MODE (XEXP (src, 1));
1601
  if ((FLOAT_MODE_P (mode1)
1602
       || FLOAT_MODE_P (mode2))
1603
      && !flag_unsafe_math_optimizations)
1604
    return NULL;
1605
 
1606
  /* Record the accumulator to expand.  */
1607
  ves = XNEW (struct var_to_expand);
1608
  ves->insn = insn;
1609
  ves->var_expansions = VEC_alloc (rtx, heap, 1);
1610
  ves->reg = copy_rtx (dest);
1611
  ves->op = GET_CODE (src);
1612
  ves->expansion_count = 0;
1613
  ves->reuse_expansion = 0;
1614
  return ves;
1615
}
1616
 
1617
/* Determine whether there is an induction variable in INSN that
1618
   we would like to split during unrolling.
1619
 
1620
   I.e. replace
1621
 
1622
   i = i + 1;
1623
   ...
1624
   i = i + 1;
1625
   ...
1626
   i = i + 1;
1627
   ...
1628
 
1629
   type chains by
1630
 
1631
   i0 = i + 1
1632
   ...
1633
   i = i0 + 1
1634
   ...
1635
   i = i0 + 2
1636
   ...
1637
 
1638
   Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1639
   an IV_TO_SPLIT structure, fill it with the relevant information and return a
1640
   pointer to it.  */
1641
 
1642
static struct iv_to_split *
1643
analyze_iv_to_split_insn (rtx insn)
1644
{
1645
  rtx set, dest;
1646
  struct rtx_iv iv;
1647
  struct iv_to_split *ivts;
1648
  bool ok;
1649
 
1650
  /* For now we just split the basic induction variables.  Later this may be
1651
     extended for example by selecting also addresses of memory references.  */
1652
  set = single_set (insn);
1653
  if (!set)
1654
    return NULL;
1655
 
1656
  dest = SET_DEST (set);
1657
  if (!REG_P (dest))
1658
    return NULL;
1659
 
1660
  if (!biv_p (insn, dest))
1661
    return NULL;
1662
 
1663
  ok = iv_analyze_result (insn, dest, &iv);
1664
 
1665
  /* This used to be an assert under the assumption that if biv_p returns
1666
     true that iv_analyze_result must also return true.  However, that
1667
     assumption is not strictly correct as evidenced by pr25569.
1668
 
1669
     Returning NULL when iv_analyze_result returns false is safe and
1670
     avoids the problems in pr25569 until the iv_analyze_* routines
1671
     can be fixed, which is apparently hard and time consuming
1672
     according to their author.  */
1673
  if (! ok)
1674
    return NULL;
1675
 
1676
  if (iv.step == const0_rtx
1677
      || iv.mode != iv.extend_mode)
1678
    return NULL;
1679
 
1680
  /* Record the insn to split.  */
1681
  ivts = XNEW (struct iv_to_split);
1682
  ivts->insn = insn;
1683
  ivts->base_var = NULL_RTX;
1684
  ivts->step = iv.step;
1685
  ivts->n_loc = 1;
1686
  ivts->loc[0] = 1;
1687
 
1688
  return ivts;
1689
}
1690
 
1691
/* Determines which of insns in LOOP can be optimized.
1692
   Return a OPT_INFO struct with the relevant hash tables filled
1693
   with all insns to be optimized.  The FIRST_NEW_BLOCK field
1694
   is undefined for the return value.  */
1695
 
1696
static struct opt_info *
1697
analyze_insns_in_loop (struct loop *loop)
1698
{
1699
  basic_block *body, bb;
1700
  unsigned i, num_edges = 0;
1701
  struct opt_info *opt_info = XCNEW (struct opt_info);
1702
  rtx insn;
1703
  struct iv_to_split *ivts = NULL;
1704
  struct var_to_expand *ves = NULL;
1705
  PTR *slot1;
1706
  PTR *slot2;
1707
  edge *edges = get_loop_exit_edges (loop, &num_edges);
1708
  bool can_apply = false;
1709
 
1710
  iv_analysis_loop_init (loop);
1711
 
1712
  body = get_loop_body (loop);
1713
 
1714
  if (flag_split_ivs_in_unroller)
1715
    opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1716
                                            si_info_hash, si_info_eq, free);
1717
 
1718
  /* Record the loop exit bb and loop preheader before the unrolling.  */
1719
  if (!loop_preheader_edge (loop)->src)
1720
    {
1721
      loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1722
      opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1723
    }
1724
  else
1725
    opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1726
 
1727
  if (num_edges == 1
1728
      && !(edges[0]->flags & EDGE_COMPLEX))
1729
    {
1730
      opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1731
      can_apply = true;
1732
    }
1733
 
1734
  if (flag_variable_expansion_in_unroller
1735
      && can_apply)
1736
    opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1737
                                                      ve_info_hash, ve_info_eq, free);
1738
 
1739
  for (i = 0; i < loop->num_nodes; i++)
1740
    {
1741
      bb = body[i];
1742
      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1743
        continue;
1744
 
1745
      FOR_BB_INSNS (bb, insn)
1746
      {
1747
        if (!INSN_P (insn))
1748
          continue;
1749
 
1750
        if (opt_info->insns_to_split)
1751
          ivts = analyze_iv_to_split_insn (insn);
1752
 
1753
        if (ivts)
1754
          {
1755
            slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1756
            *slot1 = ivts;
1757
            continue;
1758
          }
1759
 
1760
        if (opt_info->insns_with_var_to_expand)
1761
          ves = analyze_insn_to_expand_var (loop, insn);
1762
 
1763
        if (ves)
1764
          {
1765
            slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1766
            *slot2 = ves;
1767
          }
1768
      }
1769
    }
1770
 
1771
  free (edges);
1772
  free (body);
1773
  return opt_info;
1774
}
1775
 
1776
/* Called just before loop duplication.  Records start of duplicated area
1777
   to OPT_INFO.  */
1778
 
1779
static void
1780
opt_info_start_duplication (struct opt_info *opt_info)
1781
{
1782
  if (opt_info)
1783
    opt_info->first_new_block = last_basic_block;
1784
}
1785
 
1786
/* Determine the number of iterations between initialization of the base
1787
   variable and the current copy (N_COPY).  N_COPIES is the total number
1788
   of newly created copies.  UNROLLING is true if we are unrolling
1789
   (not peeling) the loop.  */
1790
 
1791
static unsigned
1792
determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1793
{
1794
  if (unrolling)
1795
    {
1796
      /* If we are unrolling, initialization is done in the original loop
1797
         body (number 0).  */
1798
      return n_copy;
1799
    }
1800
  else
1801
    {
1802
      /* If we are peeling, the copy in that the initialization occurs has
1803
         number 1.  The original loop (number 0) is the last.  */
1804
      if (n_copy)
1805
        return n_copy - 1;
1806
      else
1807
        return n_copies;
1808
    }
1809
}
1810
 
1811
/* Locate in EXPR the expression corresponding to the location recorded
1812
   in IVTS, and return a pointer to the RTX for this location.  */
1813
 
1814
static rtx *
1815
get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1816
{
1817
  unsigned i;
1818
  rtx *ret = &expr;
1819
 
1820
  for (i = 0; i < ivts->n_loc; i++)
1821
    ret = &XEXP (*ret, ivts->loc[i]);
1822
 
1823
  return ret;
1824
}
1825
 
1826
/* Allocate basic variable for the induction variable chain.  Callback for
1827
   htab_traverse.  */
1828
 
1829
static int
1830
allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1831
{
1832
  struct iv_to_split *ivts = *slot;
1833
  rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1834
 
1835
  ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1836
 
1837
  return 1;
1838
}
1839
 
1840
/* Insert initialization of basic variable of IVTS before INSN, taking
1841
   the initial value from INSN.  */
1842
 
1843
static void
1844
insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1845
{
1846
  rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1847
  rtx seq;
1848
 
1849
  start_sequence ();
1850
  expr = force_operand (expr, ivts->base_var);
1851
  if (expr != ivts->base_var)
1852
    emit_move_insn (ivts->base_var, expr);
1853
  seq = get_insns ();
1854
  end_sequence ();
1855
 
1856
  emit_insn_before (seq, insn);
1857
}
1858
 
1859
/* Replace the use of induction variable described in IVTS in INSN
1860
   by base variable + DELTA * step.  */
1861
 
1862
static void
1863
split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1864
{
1865
  rtx expr, *loc, seq, incr, var;
1866
  enum machine_mode mode = GET_MODE (ivts->base_var);
1867
  rtx src, dest, set;
1868
 
1869
  /* Construct base + DELTA * step.  */
1870
  if (!delta)
1871
    expr = ivts->base_var;
1872
  else
1873
    {
1874
      incr = simplify_gen_binary (MULT, mode,
1875
                                  ivts->step, gen_int_mode (delta, mode));
1876
      expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1877
                                  ivts->base_var, incr);
1878
    }
1879
 
1880
  /* Figure out where to do the replacement.  */
1881
  loc = get_ivts_expr (single_set (insn), ivts);
1882
 
1883
  /* If we can make the replacement right away, we're done.  */
1884
  if (validate_change (insn, loc, expr, 0))
1885
    return;
1886
 
1887
  /* Otherwise, force EXPR into a register and try again.  */
1888
  start_sequence ();
1889
  var = gen_reg_rtx (mode);
1890
  expr = force_operand (expr, var);
1891
  if (expr != var)
1892
    emit_move_insn (var, expr);
1893
  seq = get_insns ();
1894
  end_sequence ();
1895
  emit_insn_before (seq, insn);
1896
 
1897
  if (validate_change (insn, loc, var, 0))
1898
    return;
1899
 
1900
  /* The last chance.  Try recreating the assignment in insn
1901
     completely from scratch.  */
1902
  set = single_set (insn);
1903
  gcc_assert (set);
1904
 
1905
  start_sequence ();
1906
  *loc = var;
1907
  src = copy_rtx (SET_SRC (set));
1908
  dest = copy_rtx (SET_DEST (set));
1909
  src = force_operand (src, dest);
1910
  if (src != dest)
1911
    emit_move_insn (dest, src);
1912
  seq = get_insns ();
1913
  end_sequence ();
1914
 
1915
  emit_insn_before (seq, insn);
1916
  delete_insn (insn);
1917
}
1918
 
1919
 
1920
/* Return one expansion of the accumulator recorded in struct VE.  */
1921
 
1922
static rtx
1923
get_expansion (struct var_to_expand *ve)
1924
{
1925
  rtx reg;
1926
 
1927
  if (ve->reuse_expansion == 0)
1928
    reg = ve->reg;
1929
  else
1930
    reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1931
 
1932
  if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1933
    ve->reuse_expansion = 0;
1934
  else
1935
    ve->reuse_expansion++;
1936
 
1937
  return reg;
1938
}
1939
 
1940
 
1941
/* Given INSN replace the uses of the accumulator recorded in VE
1942
   with a new register.  */
1943
 
1944
static void
1945
expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1946
{
1947
  rtx new_reg, set;
1948
  bool really_new_expansion = false;
1949
 
1950
  set = single_set (insn);
1951
  gcc_assert (set);
1952
 
1953
  /* Generate a new register only if the expansion limit has not been
1954
     reached.  Else reuse an already existing expansion.  */
1955
  if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1956
    {
1957
      really_new_expansion = true;
1958
      new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1959
    }
1960
  else
1961
    new_reg = get_expansion (ve);
1962
 
1963
  validate_change (insn, &SET_DEST (set), new_reg, 1);
1964
  validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1965
 
1966
  if (apply_change_group ())
1967
    if (really_new_expansion)
1968
      {
1969
        VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1970
        ve->expansion_count++;
1971
      }
1972
}
1973
 
1974
/* Initialize the variable expansions in loop preheader.
1975
   Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1976
   basic block where the initialization of the expansions
1977
   should take place.  */
1978
 
1979
static int
1980
insert_var_expansion_initialization (void **slot, void *place_p)
1981
{
1982
  struct var_to_expand *ve = *slot;
1983
  basic_block place = (basic_block)place_p;
1984
  rtx seq, var, zero_init, insn;
1985
  unsigned i;
1986
 
1987
  if (VEC_length (rtx, ve->var_expansions) == 0)
1988
    return 1;
1989
 
1990
  start_sequence ();
1991
  if (ve->op == PLUS || ve->op == MINUS)
1992
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1993
      {
1994
        zero_init =  CONST0_RTX (GET_MODE (var));
1995
        emit_move_insn (var, zero_init);
1996
      }
1997
  else if (ve->op == MULT)
1998
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1999
      {
2000
        zero_init =  CONST1_RTX (GET_MODE (var));
2001
        emit_move_insn (var, zero_init);
2002
      }
2003
 
2004
  seq = get_insns ();
2005
  end_sequence ();
2006
 
2007
  insn = BB_HEAD (place);
2008
  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2009
    insn = NEXT_INSN (insn);
2010
 
2011
  emit_insn_after (seq, insn);
2012
  /* Continue traversing the hash table.  */
2013
  return 1;
2014
}
2015
 
2016
/*  Combine the variable expansions at the loop exit.
2017
    Callbacks for htab_traverse.  PLACE_P is the loop exit
2018
    basic block where the summation of the expansions should
2019
    take place.  */
2020
 
2021
static int
2022
combine_var_copies_in_loop_exit (void **slot, void *place_p)
2023
{
2024
  struct var_to_expand *ve = *slot;
2025
  basic_block place = (basic_block)place_p;
2026
  rtx sum = ve->reg;
2027
  rtx expr, seq, var, insn;
2028
  unsigned i;
2029
 
2030
  if (VEC_length (rtx, ve->var_expansions) == 0)
2031
    return 1;
2032
 
2033
  start_sequence ();
2034
  if (ve->op == PLUS || ve->op == MINUS)
2035
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2036
      {
2037
        sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2038
                                   var, sum);
2039
      }
2040
  else if (ve->op == MULT)
2041
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2042
      {
2043
        sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2044
                                   var, sum);
2045
      }
2046
 
2047
  expr = force_operand (sum, ve->reg);
2048
  if (expr != ve->reg)
2049
    emit_move_insn (ve->reg, expr);
2050
  seq = get_insns ();
2051
  end_sequence ();
2052
 
2053
  insn = BB_HEAD (place);
2054
  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2055
    insn = NEXT_INSN (insn);
2056
 
2057
  emit_insn_after (seq, insn);
2058
 
2059
  /* Continue traversing the hash table.  */
2060
  return 1;
2061
}
2062
 
2063
/* Apply loop optimizations in loop copies using the
2064
   data which gathered during the unrolling.  Structure
2065
   OPT_INFO record that data.
2066
 
2067
   UNROLLING is true if we unrolled (not peeled) the loop.
2068
   REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2069
   the loop (as it should happen in complete unrolling, but not in ordinary
2070
   peeling of the loop).  */
2071
 
2072
static void
2073
apply_opt_in_copies (struct opt_info *opt_info,
2074
                     unsigned n_copies, bool unrolling,
2075
                     bool rewrite_original_loop)
2076
{
2077
  unsigned i, delta;
2078
  basic_block bb, orig_bb;
2079
  rtx insn, orig_insn, next;
2080
  struct iv_to_split ivts_templ, *ivts;
2081
  struct var_to_expand ve_templ, *ves;
2082
 
2083
  /* Sanity check -- we need to put initialization in the original loop
2084
     body.  */
2085
  gcc_assert (!unrolling || rewrite_original_loop);
2086
 
2087
  /* Allocate the basic variables (i0).  */
2088
  if (opt_info->insns_to_split)
2089
    htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2090
 
2091
  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2092
    {
2093
      bb = BASIC_BLOCK (i);
2094
      orig_bb = get_bb_original (bb);
2095
 
2096
      /* bb->aux holds position in copy sequence initialized by
2097
         duplicate_loop_to_header_edge.  */
2098
      delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2099
                                        unrolling);
2100
      bb->aux = 0;
2101
      orig_insn = BB_HEAD (orig_bb);
2102
      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2103
        {
2104
          next = NEXT_INSN (insn);
2105
          if (!INSN_P (insn))
2106
            continue;
2107
 
2108
          while (!INSN_P (orig_insn))
2109
            orig_insn = NEXT_INSN (orig_insn);
2110
 
2111
          ivts_templ.insn = orig_insn;
2112
          ve_templ.insn = orig_insn;
2113
 
2114
          /* Apply splitting iv optimization.  */
2115
          if (opt_info->insns_to_split)
2116
            {
2117
              ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2118
 
2119
              if (ivts)
2120
                {
2121
                  gcc_assert (GET_CODE (PATTERN (insn))
2122
                              == GET_CODE (PATTERN (orig_insn)));
2123
 
2124
                  if (!delta)
2125
                    insert_base_initialization (ivts, insn);
2126
                  split_iv (ivts, insn, delta);
2127
                }
2128
            }
2129
          /* Apply variable expansion optimization.  */
2130
          if (unrolling && opt_info->insns_with_var_to_expand)
2131
            {
2132
              ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2133
              if (ves)
2134
                {
2135
                  gcc_assert (GET_CODE (PATTERN (insn))
2136
                              == GET_CODE (PATTERN (orig_insn)));
2137
                  expand_var_during_unrolling (ves, insn);
2138
                }
2139
            }
2140
          orig_insn = NEXT_INSN (orig_insn);
2141
        }
2142
    }
2143
 
2144
  if (!rewrite_original_loop)
2145
    return;
2146
 
2147
  /* Initialize the variable expansions in the loop preheader
2148
     and take care of combining them at the loop exit.  */
2149
  if (opt_info->insns_with_var_to_expand)
2150
    {
2151
      htab_traverse (opt_info->insns_with_var_to_expand,
2152
                     insert_var_expansion_initialization,
2153
                     opt_info->loop_preheader);
2154
      htab_traverse (opt_info->insns_with_var_to_expand,
2155
                     combine_var_copies_in_loop_exit,
2156
                     opt_info->loop_exit);
2157
    }
2158
 
2159
  /* Rewrite also the original loop body.  Find them as originals of the blocks
2160
     in the last copied iteration, i.e. those that have
2161
     get_bb_copy (get_bb_original (bb)) == bb.  */
2162
  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2163
    {
2164
      bb = BASIC_BLOCK (i);
2165
      orig_bb = get_bb_original (bb);
2166
      if (get_bb_copy (orig_bb) != bb)
2167
        continue;
2168
 
2169
      delta = determine_split_iv_delta (0, n_copies, unrolling);
2170
      for (orig_insn = BB_HEAD (orig_bb);
2171
           orig_insn != NEXT_INSN (BB_END (bb));
2172
           orig_insn = next)
2173
        {
2174
          next = NEXT_INSN (orig_insn);
2175
 
2176
          if (!INSN_P (orig_insn))
2177
            continue;
2178
 
2179
          ivts_templ.insn = orig_insn;
2180
          if (opt_info->insns_to_split)
2181
            {
2182
              ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2183
              if (ivts)
2184
                {
2185
                  if (!delta)
2186
                    insert_base_initialization (ivts, orig_insn);
2187
                  split_iv (ivts, orig_insn, delta);
2188
                  continue;
2189
                }
2190
            }
2191
 
2192
        }
2193
    }
2194
}
2195
 
2196
/*  Release the data structures used for the variable expansion
2197
    optimization.  Callbacks for htab_traverse.  */
2198
 
2199
static int
2200
release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2201
{
2202
  struct var_to_expand *ve = *slot;
2203
 
2204
  VEC_free (rtx, heap, ve->var_expansions);
2205
 
2206
  /* Continue traversing the hash table.  */
2207
  return 1;
2208
}
2209
 
2210
/* Release OPT_INFO.  */
2211
 
2212
static void
2213
free_opt_info (struct opt_info *opt_info)
2214
{
2215
  if (opt_info->insns_to_split)
2216
    htab_delete (opt_info->insns_to_split);
2217
  if (opt_info->insns_with_var_to_expand)
2218
    {
2219
      htab_traverse (opt_info->insns_with_var_to_expand,
2220
                     release_var_copies, NULL);
2221
      htab_delete (opt_info->insns_with_var_to_expand);
2222
    }
2223
  free (opt_info);
2224
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.