OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [loop-unroll.c] - Blame information for rev 20

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
/* Loop unrolling and peeling.
2
   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3
 
4
This file is part of GCC.
5
 
6
GCC is free software; you can redistribute it and/or modify it under
7
the terms of the GNU General Public License as published by the Free
8
Software Foundation; either version 2, or (at your option) any later
9
version.
10
 
11
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12
WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GCC; see the file COPYING.  If not, write to the Free
18
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301, USA.  */
20
 
21
#include "config.h"
22
#include "system.h"
23
#include "coretypes.h"
24
#include "tm.h"
25
#include "rtl.h"
26
#include "hard-reg-set.h"
27
#include "obstack.h"
28
#include "basic-block.h"
29
#include "cfgloop.h"
30
#include "cfglayout.h"
31
#include "params.h"
32
#include "output.h"
33
#include "expr.h"
34
#include "hashtab.h"
35
#include "recog.h"    
36
#include "varray.h"                        
37
 
38
/* This pass performs loop unrolling and peeling.  We only perform these
39
   optimizations on innermost loops (with single exception) because
40
   the impact on performance is greatest here, and we want to avoid
41
   unnecessary code size growth.  The gain is caused by greater sequentiality
42
   of code, better code to optimize for further passes and in some cases
43
   by fewer testings of exit conditions.  The main problem is code growth,
44
   that impacts performance negatively due to effect of caches.
45
 
46
   What we do:
47
 
48
   -- complete peeling of once-rolling loops; this is the above mentioned
49
      exception, as this causes loop to be cancelled completely and
50
      does not cause code growth
51
   -- complete peeling of loops that roll (small) constant times.
52
   -- simple peeling of first iterations of loops that do not roll much
53
      (according to profile feedback)
54
   -- unrolling of loops that roll constant times; this is almost always
55
      win, as we get rid of exit condition tests.
56
   -- unrolling of loops that roll number of times that we can compute
57
      in runtime; we also get rid of exit condition tests here, but there
58
      is the extra expense for calculating the number of iterations
59
   -- simple unrolling of remaining loops; this is performed only if we
60
      are asked to, as the gain is questionable in this case and often
61
      it may even slow down the code
62
   For more detailed descriptions of each of those, see comments at
63
   appropriate function below.
64
 
65
   There is a lot of parameters (defined and described in params.def) that
66
   control how much we unroll/peel.
67
 
68
   ??? A great problem is that we don't have a good way how to determine
69
   how many times we should unroll the loop; the experiments I have made
70
   showed that this choice may affect performance in order of several %.
71
   */
72
 
73
/* Information about induction variables to split.  */
74
 
75
struct iv_to_split
76
{
77
  rtx insn;             /* The insn in that the induction variable occurs.  */
78
  rtx base_var;         /* The variable on that the values in the further
79
                           iterations are based.  */
80
  rtx step;             /* Step of the induction variable.  */
81
  unsigned n_loc;
82
  unsigned loc[3];      /* Location where the definition of the induction
83
                           variable occurs in the insn.  For example if
84
                           N_LOC is 2, the expression is located at
85
                           XEXP (XEXP (single_set, loc[0]), loc[1]).  */
86
};
87
 
88
DEF_VEC_P(rtx);
89
DEF_VEC_ALLOC_P(rtx,heap);
90
 
91
/* Information about accumulators to expand.  */
92
 
93
struct var_to_expand
94
{
95
  rtx insn;                        /* The insn in that the variable expansion occurs.  */
96
  rtx reg;                         /* The accumulator which is expanded.  */
97
  VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
98
  enum rtx_code op;                /* The type of the accumulation - addition, subtraction
99
                                      or multiplication.  */
100
  int expansion_count;             /* Count the number of expansions generated so far.  */
101
  int reuse_expansion;             /* The expansion we intend to reuse to expand
102
                                      the accumulator.  If REUSE_EXPANSION is 0 reuse
103
                                      the original accumulator.  Else use
104
                                      var_expansions[REUSE_EXPANSION - 1].  */
105
};
106
 
107
/* Information about optimization applied in
108
   the unrolled loop.  */
109
 
110
struct opt_info
111
{
112
  htab_t insns_to_split;           /* A hashtable of insns to split.  */
113
  htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
114
                                      to expand.  */
115
  unsigned first_new_block;        /* The first basic block that was
116
                                      duplicated.  */
117
  basic_block loop_exit;           /* The loop exit basic block.  */
118
  basic_block loop_preheader;      /* The loop preheader basic block.  */
119
};
120
 
121
static void decide_unrolling_and_peeling (struct loops *, int);
122
static void peel_loops_completely (struct loops *, int);
123
static void decide_peel_simple (struct loop *, int);
124
static void decide_peel_once_rolling (struct loop *, int);
125
static void decide_peel_completely (struct loop *, int);
126
static void decide_unroll_stupid (struct loop *, int);
127
static void decide_unroll_constant_iterations (struct loop *, int);
128
static void decide_unroll_runtime_iterations (struct loop *, int);
129
static void peel_loop_simple (struct loops *, struct loop *);
130
static void peel_loop_completely (struct loops *, struct loop *);
131
static void unroll_loop_stupid (struct loops *, struct loop *);
132
static void unroll_loop_constant_iterations (struct loops *, struct loop *);
133
static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
134
static struct opt_info *analyze_insns_in_loop (struct loop *);
135
static void opt_info_start_duplication (struct opt_info *);
136
static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
137
static void free_opt_info (struct opt_info *);
138
static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
139
static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
140
static struct iv_to_split *analyze_iv_to_split_insn (rtx);
141
static void expand_var_during_unrolling (struct var_to_expand *, rtx);
142
static int insert_var_expansion_initialization (void **, void *);
143
static int combine_var_copies_in_loop_exit (void **, void *);
144
static int release_var_copies (void **, void *);
145
static rtx get_expansion (struct var_to_expand *);
146
 
147
/* Unroll and/or peel (depending on FLAGS) LOOPS.  */
148
void
149
unroll_and_peel_loops (struct loops *loops, int flags)
150
{
151
  struct loop *loop, *next;
152
  bool check;
153
 
154
  /* First perform complete loop peeling (it is almost surely a win,
155
     and affects parameters for further decision a lot).  */
156
  peel_loops_completely (loops, flags);
157
 
158
  /* Now decide rest of unrolling and peeling.  */
159
  decide_unrolling_and_peeling (loops, flags);
160
 
161
  loop = loops->tree_root;
162
  while (loop->inner)
163
    loop = loop->inner;
164
 
165
  /* Scan the loops, inner ones first.  */
166
  while (loop != loops->tree_root)
167
    {
168
      if (loop->next)
169
        {
170
          next = loop->next;
171
          while (next->inner)
172
            next = next->inner;
173
        }
174
      else
175
        next = loop->outer;
176
 
177
      check = true;
178
      /* And perform the appropriate transformations.  */
179
      switch (loop->lpt_decision.decision)
180
        {
181
        case LPT_PEEL_COMPLETELY:
182
          /* Already done.  */
183
          gcc_unreachable ();
184
        case LPT_PEEL_SIMPLE:
185
          peel_loop_simple (loops, loop);
186
          break;
187
        case LPT_UNROLL_CONSTANT:
188
          unroll_loop_constant_iterations (loops, loop);
189
          break;
190
        case LPT_UNROLL_RUNTIME:
191
          unroll_loop_runtime_iterations (loops, loop);
192
          break;
193
        case LPT_UNROLL_STUPID:
194
          unroll_loop_stupid (loops, loop);
195
          break;
196
        case LPT_NONE:
197
          check = false;
198
          break;
199
        default:
200
          gcc_unreachable ();
201
        }
202
      if (check)
203
        {
204
#ifdef ENABLE_CHECKING
205
          verify_dominators (CDI_DOMINATORS);
206
          verify_loop_structure (loops);
207
#endif
208
        }
209
      loop = next;
210
    }
211
 
212
  iv_analysis_done ();
213
}
214
 
215
/* Check whether exit of the LOOP is at the end of loop body.  */
216
 
217
static bool
218
loop_exit_at_end_p (struct loop *loop)
219
{
220
  struct niter_desc *desc = get_simple_loop_desc (loop);
221
  rtx insn;
222
 
223
  if (desc->in_edge->dest != loop->latch)
224
    return false;
225
 
226
  /* Check that the latch is empty.  */
227
  FOR_BB_INSNS (loop->latch, insn)
228
    {
229
      if (INSN_P (insn))
230
        return false;
231
    }
232
 
233
  return true;
234
}
235
 
236
/* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
237
static void
238
peel_loops_completely (struct loops *loops, int flags)
239
{
240
  struct loop *loop;
241
  unsigned i;
242
 
243
  /* Scan the loops, the inner ones first.  */
244
  for (i = loops->num - 1; i > 0; i--)
245
    {
246
      loop = loops->parray[i];
247
      if (!loop)
248
        continue;
249
 
250
      loop->lpt_decision.decision = LPT_NONE;
251
 
252
      if (dump_file)
253
        fprintf (dump_file,
254
                 "\n;; *** Considering loop %d for complete peeling ***\n",
255
                 loop->num);
256
 
257
      loop->ninsns = num_loop_insns (loop);
258
 
259
      decide_peel_once_rolling (loop, flags);
260
      if (loop->lpt_decision.decision == LPT_NONE)
261
        decide_peel_completely (loop, flags);
262
 
263
      if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
264
        {
265
          peel_loop_completely (loops, loop);
266
#ifdef ENABLE_CHECKING
267
          verify_dominators (CDI_DOMINATORS);
268
          verify_loop_structure (loops);
269
#endif
270
        }
271
    }
272
}
273
 
274
/* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
275
static void
276
decide_unrolling_and_peeling (struct loops *loops, int flags)
277
{
278
  struct loop *loop = loops->tree_root, *next;
279
 
280
  while (loop->inner)
281
    loop = loop->inner;
282
 
283
  /* Scan the loops, inner ones first.  */
284
  while (loop != loops->tree_root)
285
    {
286
      if (loop->next)
287
        {
288
          next = loop->next;
289
          while (next->inner)
290
            next = next->inner;
291
        }
292
      else
293
        next = loop->outer;
294
 
295
      loop->lpt_decision.decision = LPT_NONE;
296
 
297
      if (dump_file)
298
        fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
299
 
300
      /* Do not peel cold areas.  */
301
      if (!maybe_hot_bb_p (loop->header))
302
        {
303
          if (dump_file)
304
            fprintf (dump_file, ";; Not considering loop, cold area\n");
305
          loop = next;
306
          continue;
307
        }
308
 
309
      /* Can the loop be manipulated?  */
310
      if (!can_duplicate_loop_p (loop))
311
        {
312
          if (dump_file)
313
            fprintf (dump_file,
314
                     ";; Not considering loop, cannot duplicate\n");
315
          loop = next;
316
          continue;
317
        }
318
 
319
      /* Skip non-innermost loops.  */
320
      if (loop->inner)
321
        {
322
          if (dump_file)
323
            fprintf (dump_file, ";; Not considering loop, is not innermost\n");
324
          loop = next;
325
          continue;
326
        }
327
 
328
      loop->ninsns = num_loop_insns (loop);
329
      loop->av_ninsns = average_num_loop_insns (loop);
330
 
331
      /* Try transformations one by one in decreasing order of
332
         priority.  */
333
 
334
      decide_unroll_constant_iterations (loop, flags);
335
      if (loop->lpt_decision.decision == LPT_NONE)
336
        decide_unroll_runtime_iterations (loop, flags);
337
      if (loop->lpt_decision.decision == LPT_NONE)
338
        decide_unroll_stupid (loop, flags);
339
      if (loop->lpt_decision.decision == LPT_NONE)
340
        decide_peel_simple (loop, flags);
341
 
342
      loop = next;
343
    }
344
}
345
 
346
/* Decide whether the LOOP is once rolling and suitable for complete
347
   peeling.  */
348
static void
349
decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
350
{
351
  struct niter_desc *desc;
352
 
353
  if (dump_file)
354
    fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
355
 
356
  /* Is the loop small enough?  */
357
  if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
358
    {
359
      if (dump_file)
360
        fprintf (dump_file, ";; Not considering loop, is too big\n");
361
      return;
362
    }
363
 
364
  /* Check for simple loops.  */
365
  desc = get_simple_loop_desc (loop);
366
 
367
  /* Check number of iterations.  */
368
  if (!desc->simple_p
369
      || desc->assumptions
370
      || desc->infinite
371
      || !desc->const_iter
372
      || desc->niter != 0)
373
    {
374
      if (dump_file)
375
        fprintf (dump_file,
376
                 ";; Unable to prove that the loop rolls exactly once\n");
377
      return;
378
    }
379
 
380
  /* Success.  */
381
  if (dump_file)
382
    fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
383
  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
384
}
385
 
386
/* Decide whether the LOOP is suitable for complete peeling.  */
387
static void
388
decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
389
{
390
  unsigned npeel;
391
  struct niter_desc *desc;
392
 
393
  if (dump_file)
394
    fprintf (dump_file, "\n;; Considering peeling completely\n");
395
 
396
  /* Skip non-innermost loops.  */
397
  if (loop->inner)
398
    {
399
      if (dump_file)
400
        fprintf (dump_file, ";; Not considering loop, is not innermost\n");
401
      return;
402
    }
403
 
404
  /* Do not peel cold areas.  */
405
  if (!maybe_hot_bb_p (loop->header))
406
    {
407
      if (dump_file)
408
        fprintf (dump_file, ";; Not considering loop, cold area\n");
409
      return;
410
    }
411
 
412
  /* Can the loop be manipulated?  */
413
  if (!can_duplicate_loop_p (loop))
414
    {
415
      if (dump_file)
416
        fprintf (dump_file,
417
                 ";; Not considering loop, cannot duplicate\n");
418
      return;
419
    }
420
 
421
  /* npeel = number of iterations to peel.  */
422
  npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
423
  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
424
    npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
425
 
426
  /* Is the loop small enough?  */
427
  if (!npeel)
428
    {
429
      if (dump_file)
430
        fprintf (dump_file, ";; Not considering loop, is too big\n");
431
      return;
432
    }
433
 
434
  /* Check for simple loops.  */
435
  desc = get_simple_loop_desc (loop);
436
 
437
  /* Check number of iterations.  */
438
  if (!desc->simple_p
439
      || desc->assumptions
440
      || !desc->const_iter
441
      || desc->infinite)
442
    {
443
      if (dump_file)
444
        fprintf (dump_file,
445
                 ";; Unable to prove that the loop iterates constant times\n");
446
      return;
447
    }
448
 
449
  if (desc->niter > npeel - 1)
450
    {
451
      if (dump_file)
452
        {
453
          fprintf (dump_file,
454
                   ";; Not peeling loop completely, rolls too much (");
455
          fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
456
          fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
457
        }
458
      return;
459
    }
460
 
461
  /* Success.  */
462
  if (dump_file)
463
    fprintf (dump_file, ";; Decided to peel loop completely\n");
464
  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
465
}
466
 
467
/* Peel all iterations of LOOP, remove exit edges and cancel the loop
468
   completely.  The transformation done:
469
 
470
   for (i = 0; i < 4; i++)
471
     body;
472
 
473
   ==>
474
 
475
   i = 0;
476
   body; i++;
477
   body; i++;
478
   body; i++;
479
   body; i++;
480
   */
481
static void
482
peel_loop_completely (struct loops *loops, struct loop *loop)
483
{
484
  sbitmap wont_exit;
485
  unsigned HOST_WIDE_INT npeel;
486
  unsigned n_remove_edges, i;
487
  edge *remove_edges, ein;
488
  struct niter_desc *desc = get_simple_loop_desc (loop);
489
  struct opt_info *opt_info = NULL;
490
 
491
  npeel = desc->niter;
492
 
493
  if (npeel)
494
    {
495
      bool ok;
496
 
497
      wont_exit = sbitmap_alloc (npeel + 1);
498
      sbitmap_ones (wont_exit);
499
      RESET_BIT (wont_exit, 0);
500
      if (desc->noloop_assumptions)
501
        RESET_BIT (wont_exit, 1);
502
 
503
      remove_edges = xcalloc (npeel, sizeof (edge));
504
      n_remove_edges = 0;
505
 
506
      if (flag_split_ivs_in_unroller)
507
        opt_info = analyze_insns_in_loop (loop);
508
 
509
      opt_info_start_duplication (opt_info);
510
      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
511
                                          loops, npeel,
512
                                          wont_exit, desc->out_edge,
513
                                          remove_edges, &n_remove_edges,
514
                                          DLTHE_FLAG_UPDATE_FREQ
515
                                          | DLTHE_FLAG_COMPLETTE_PEEL
516
                                          | (opt_info
517
                                             ? DLTHE_RECORD_COPY_NUMBER : 0));
518
      gcc_assert (ok);
519
 
520
      free (wont_exit);
521
 
522
      if (opt_info)
523
        {
524
          apply_opt_in_copies (opt_info, npeel, false, true);
525
          free_opt_info (opt_info);
526
        }
527
 
528
      /* Remove the exit edges.  */
529
      for (i = 0; i < n_remove_edges; i++)
530
        remove_path (loops, remove_edges[i]);
531
      free (remove_edges);
532
    }
533
 
534
  ein = desc->in_edge;
535
  free_simple_loop_desc (loop);
536
 
537
  /* Now remove the unreachable part of the last iteration and cancel
538
     the loop.  */
539
  remove_path (loops, ein);
540
 
541
  if (dump_file)
542
    fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
543
}
544
 
545
/* Decide whether to unroll LOOP iterating constant number of times
546
   and how much.  */
547
 
548
static void
549
decide_unroll_constant_iterations (struct loop *loop, int flags)
550
{
551
  unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
552
  struct niter_desc *desc;
553
 
554
  if (!(flags & UAP_UNROLL))
555
    {
556
      /* We were not asked to, just return back silently.  */
557
      return;
558
    }
559
 
560
  if (dump_file)
561
    fprintf (dump_file,
562
             "\n;; Considering unrolling loop with constant "
563
             "number of iterations\n");
564
 
565
  /* nunroll = total number of copies of the original loop body in
566
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
567
  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
568
  nunroll_by_av
569
    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
570
  if (nunroll > nunroll_by_av)
571
    nunroll = nunroll_by_av;
572
  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
573
    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
574
 
575
  /* Skip big loops.  */
576
  if (nunroll <= 1)
577
    {
578
      if (dump_file)
579
        fprintf (dump_file, ";; Not considering loop, is too big\n");
580
      return;
581
    }
582
 
583
  /* Check for simple loops.  */
584
  desc = get_simple_loop_desc (loop);
585
 
586
  /* Check number of iterations.  */
587
  if (!desc->simple_p || !desc->const_iter || desc->assumptions)
588
    {
589
      if (dump_file)
590
        fprintf (dump_file,
591
                 ";; Unable to prove that the loop iterates constant times\n");
592
      return;
593
    }
594
 
595
  /* Check whether the loop rolls enough to consider.  */
596
  if (desc->niter < 2 * nunroll)
597
    {
598
      if (dump_file)
599
        fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
600
      return;
601
    }
602
 
603
  /* Success; now compute number of iterations to unroll.  We alter
604
     nunroll so that as few as possible copies of loop body are
605
     necessary, while still not decreasing the number of unrollings
606
     too much (at most by 1).  */
607
  best_copies = 2 * nunroll + 10;
608
 
609
  i = 2 * nunroll + 2;
610
  if (i - 1 >= desc->niter)
611
    i = desc->niter - 2;
612
 
613
  for (; i >= nunroll - 1; i--)
614
    {
615
      unsigned exit_mod = desc->niter % (i + 1);
616
 
617
      if (!loop_exit_at_end_p (loop))
618
        n_copies = exit_mod + i + 1;
619
      else if (exit_mod != (unsigned) i
620
               || desc->noloop_assumptions != NULL_RTX)
621
        n_copies = exit_mod + i + 2;
622
      else
623
        n_copies = i + 1;
624
 
625
      if (n_copies < best_copies)
626
        {
627
          best_copies = n_copies;
628
          best_unroll = i;
629
        }
630
    }
631
 
632
  if (dump_file)
633
    fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
634
             best_unroll + 1, best_copies, nunroll);
635
 
636
  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
637
  loop->lpt_decision.times = best_unroll;
638
 
639
  if (dump_file)
640
    fprintf (dump_file,
641
             ";; Decided to unroll the constant times rolling loop, %d times.\n",
642
             loop->lpt_decision.times);
643
}
644
 
645
/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
646
   times.  The transformation does this:
647
 
648
   for (i = 0; i < 102; i++)
649
     body;
650
 
651
   ==>
652
 
653
   i = 0;
654
   body; i++;
655
   body; i++;
656
   while (i < 102)
657
     {
658
       body; i++;
659
       body; i++;
660
       body; i++;
661
       body; i++;
662
     }
663
  */
664
static void
665
unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
666
{
667
  unsigned HOST_WIDE_INT niter;
668
  unsigned exit_mod;
669
  sbitmap wont_exit;
670
  unsigned n_remove_edges, i;
671
  edge *remove_edges;
672
  unsigned max_unroll = loop->lpt_decision.times;
673
  struct niter_desc *desc = get_simple_loop_desc (loop);
674
  bool exit_at_end = loop_exit_at_end_p (loop);
675
  struct opt_info *opt_info = NULL;
676
  bool ok;
677
 
678
  niter = desc->niter;
679
 
680
  /* Should not get here (such loop should be peeled instead).  */
681
  gcc_assert (niter > max_unroll + 1);
682
 
683
  exit_mod = niter % (max_unroll + 1);
684
 
685
  wont_exit = sbitmap_alloc (max_unroll + 1);
686
  sbitmap_ones (wont_exit);
687
 
688
  remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
689
  n_remove_edges = 0;
690
  if (flag_split_ivs_in_unroller
691
      || flag_variable_expansion_in_unroller)
692
    opt_info = analyze_insns_in_loop (loop);
693
 
694
  if (!exit_at_end)
695
    {
696
      /* The exit is not at the end of the loop; leave exit test
697
         in the first copy, so that the loops that start with test
698
         of exit condition have continuous body after unrolling.  */
699
 
700
      if (dump_file)
701
        fprintf (dump_file, ";; Condition on beginning of loop.\n");
702
 
703
      /* Peel exit_mod iterations.  */
704
      RESET_BIT (wont_exit, 0);
705
      if (desc->noloop_assumptions)
706
        RESET_BIT (wont_exit, 1);
707
 
708
      if (exit_mod)
709
        {
710
          opt_info_start_duplication (opt_info);
711
          ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
712
                                              loops, exit_mod,
713
                                              wont_exit, desc->out_edge,
714
                                              remove_edges, &n_remove_edges,
715
                                              DLTHE_FLAG_UPDATE_FREQ
716
                                              | (opt_info && exit_mod > 1
717
                                                 ? DLTHE_RECORD_COPY_NUMBER
718
                                                   : 0));
719
          gcc_assert (ok);
720
 
721
          if (opt_info && exit_mod > 1)
722
            apply_opt_in_copies (opt_info, exit_mod, false, false);
723
 
724
          desc->noloop_assumptions = NULL_RTX;
725
          desc->niter -= exit_mod;
726
          desc->niter_max -= exit_mod;
727
        }
728
 
729
      SET_BIT (wont_exit, 1);
730
    }
731
  else
732
    {
733
      /* Leave exit test in last copy, for the same reason as above if
734
         the loop tests the condition at the end of loop body.  */
735
 
736
      if (dump_file)
737
        fprintf (dump_file, ";; Condition on end of loop.\n");
738
 
739
      /* We know that niter >= max_unroll + 2; so we do not need to care of
740
         case when we would exit before reaching the loop.  So just peel
741
         exit_mod + 1 iterations.  */
742
      if (exit_mod != max_unroll
743
          || desc->noloop_assumptions)
744
        {
745
          RESET_BIT (wont_exit, 0);
746
          if (desc->noloop_assumptions)
747
            RESET_BIT (wont_exit, 1);
748
 
749
          opt_info_start_duplication (opt_info);
750
          ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
751
                                              loops, exit_mod + 1,
752
                                              wont_exit, desc->out_edge,
753
                                              remove_edges, &n_remove_edges,
754
                                              DLTHE_FLAG_UPDATE_FREQ
755
                                              | (opt_info && exit_mod > 0
756
                                                 ? DLTHE_RECORD_COPY_NUMBER
757
                                                   : 0));
758
          gcc_assert (ok);
759
 
760
          if (opt_info && exit_mod > 0)
761
            apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
762
 
763
          desc->niter -= exit_mod + 1;
764
          desc->niter_max -= exit_mod + 1;
765
          desc->noloop_assumptions = NULL_RTX;
766
 
767
          SET_BIT (wont_exit, 0);
768
          SET_BIT (wont_exit, 1);
769
        }
770
 
771
      RESET_BIT (wont_exit, max_unroll);
772
    }
773
 
774
  /* Now unroll the loop.  */
775
 
776
  opt_info_start_duplication (opt_info);
777
  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
778
                                      loops, max_unroll,
779
                                      wont_exit, desc->out_edge,
780
                                      remove_edges, &n_remove_edges,
781
                                      DLTHE_FLAG_UPDATE_FREQ
782
                                      | (opt_info
783
                                         ? DLTHE_RECORD_COPY_NUMBER
784
                                           : 0));
785
  gcc_assert (ok);
786
 
787
  if (opt_info)
788
    {
789
      apply_opt_in_copies (opt_info, max_unroll, true, true);
790
      free_opt_info (opt_info);
791
    }
792
 
793
  free (wont_exit);
794
 
795
  if (exit_at_end)
796
    {
797
      basic_block exit_block = get_bb_copy (desc->in_edge->src);
798
      /* Find a new in and out edge; they are in the last copy we have made.  */
799
 
800
      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
801
        {
802
          desc->out_edge = EDGE_SUCC (exit_block, 0);
803
          desc->in_edge = EDGE_SUCC (exit_block, 1);
804
        }
805
      else
806
        {
807
          desc->out_edge = EDGE_SUCC (exit_block, 1);
808
          desc->in_edge = EDGE_SUCC (exit_block, 0);
809
        }
810
    }
811
 
812
  desc->niter /= max_unroll + 1;
813
  desc->niter_max /= max_unroll + 1;
814
  desc->niter_expr = GEN_INT (desc->niter);
815
 
816
  /* Remove the edges.  */
817
  for (i = 0; i < n_remove_edges; i++)
818
    remove_path (loops, remove_edges[i]);
819
  free (remove_edges);
820
 
821
  if (dump_file)
822
    fprintf (dump_file,
823
             ";; Unrolled loop %d times, constant # of iterations %i insns\n",
824
             max_unroll, num_loop_insns (loop));
825
}
826
 
827
/* Decide whether to unroll LOOP iterating runtime computable number of times
828
   and how much.  */
829
static void
830
decide_unroll_runtime_iterations (struct loop *loop, int flags)
831
{
832
  unsigned nunroll, nunroll_by_av, i;
833
  struct niter_desc *desc;
834
 
835
  if (!(flags & UAP_UNROLL))
836
    {
837
      /* We were not asked to, just return back silently.  */
838
      return;
839
    }
840
 
841
  if (dump_file)
842
    fprintf (dump_file,
843
             "\n;; Considering unrolling loop with runtime "
844
             "computable number of iterations\n");
845
 
846
  /* nunroll = total number of copies of the original loop body in
847
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
848
  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
849
  nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
850
  if (nunroll > nunroll_by_av)
851
    nunroll = nunroll_by_av;
852
  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
853
    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
854
 
855
  /* Skip big loops.  */
856
  if (nunroll <= 1)
857
    {
858
      if (dump_file)
859
        fprintf (dump_file, ";; Not considering loop, is too big\n");
860
      return;
861
    }
862
 
863
  /* Check for simple loops.  */
864
  desc = get_simple_loop_desc (loop);
865
 
866
  /* Check simpleness.  */
867
  if (!desc->simple_p || desc->assumptions)
868
    {
869
      if (dump_file)
870
        fprintf (dump_file,
871
                 ";; Unable to prove that the number of iterations "
872
                 "can be counted in runtime\n");
873
      return;
874
    }
875
 
876
  if (desc->const_iter)
877
    {
878
      if (dump_file)
879
        fprintf (dump_file, ";; Loop iterates constant times\n");
880
      return;
881
    }
882
 
883
  /* If we have profile feedback, check whether the loop rolls.  */
884
  if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
885
    {
886
      if (dump_file)
887
        fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
888
      return;
889
    }
890
 
891
  /* Success; now force nunroll to be power of 2, as we are unable to
892
     cope with overflows in computation of number of iterations.  */
893
  for (i = 1; 2 * i <= nunroll; i *= 2)
894
    continue;
895
 
896
  loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
897
  loop->lpt_decision.times = i - 1;
898
 
899
  if (dump_file)
900
    fprintf (dump_file,
901
             ";; Decided to unroll the runtime computable "
902
             "times rolling loop, %d times.\n",
903
             loop->lpt_decision.times);
904
}
905
 
906
/* Unroll LOOP for that we are able to count number of iterations in runtime
907
   LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
908
   extra care for case n < 0):
909
 
910
   for (i = 0; i < n; i++)
911
     body;
912
 
913
   ==>
914
 
915
   i = 0;
916
   mod = n % 4;
917
 
918
   switch (mod)
919
     {
920
       case 3:
921
         body; i++;
922
       case 2:
923
         body; i++;
924
       case 1:
925
         body; i++;
926
       case 0: ;
927
     }
928
 
929
   while (i < n)
930
     {
931
       body; i++;
932
       body; i++;
933
       body; i++;
934
       body; i++;
935
     }
936
   */
937
static void
938
unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
939
{
940
  rtx old_niter, niter, init_code, branch_code, tmp;
941
  unsigned i, j, p;
942
  basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
943
  unsigned n_dom_bbs;
944
  sbitmap wont_exit;
945
  int may_exit_copy;
946
  unsigned n_peel, n_remove_edges;
947
  edge *remove_edges, e;
948
  bool extra_zero_check, last_may_exit;
949
  unsigned max_unroll = loop->lpt_decision.times;
950
  struct niter_desc *desc = get_simple_loop_desc (loop);
951
  bool exit_at_end = loop_exit_at_end_p (loop);
952
  struct opt_info *opt_info = NULL;
953
  bool ok;
954
 
955
  if (flag_split_ivs_in_unroller
956
      || flag_variable_expansion_in_unroller)
957
    opt_info = analyze_insns_in_loop (loop);
958
 
959
  /* Remember blocks whose dominators will have to be updated.  */
960
  dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
961
  n_dom_bbs = 0;
962
 
963
  body = get_loop_body (loop);
964
  for (i = 0; i < loop->num_nodes; i++)
965
    {
966
      unsigned nldom;
967
      basic_block *ldom;
968
 
969
      nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
970
      for (j = 0; j < nldom; j++)
971
        if (!flow_bb_inside_loop_p (loop, ldom[j]))
972
          dom_bbs[n_dom_bbs++] = ldom[j];
973
 
974
      free (ldom);
975
    }
976
  free (body);
977
 
978
  if (!exit_at_end)
979
    {
980
      /* Leave exit in first copy (for explanation why see comment in
981
         unroll_loop_constant_iterations).  */
982
      may_exit_copy = 0;
983
      n_peel = max_unroll - 1;
984
      extra_zero_check = true;
985
      last_may_exit = false;
986
    }
987
  else
988
    {
989
      /* Leave exit in last copy (for explanation why see comment in
990
         unroll_loop_constant_iterations).  */
991
      may_exit_copy = max_unroll;
992
      n_peel = max_unroll;
993
      extra_zero_check = false;
994
      last_may_exit = true;
995
    }
996
 
997
  /* Get expression for number of iterations.  */
998
  start_sequence ();
999
  old_niter = niter = gen_reg_rtx (desc->mode);
1000
  tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1001
  if (tmp != niter)
1002
    emit_move_insn (niter, tmp);
1003
 
1004
  /* Count modulo by ANDing it with max_unroll; we use the fact that
1005
     the number of unrollings is a power of two, and thus this is correct
1006
     even if there is overflow in the computation.  */
1007
  niter = expand_simple_binop (desc->mode, AND,
1008
                               niter,
1009
                               GEN_INT (max_unroll),
1010
                               NULL_RTX, 0, OPTAB_LIB_WIDEN);
1011
 
1012
  init_code = get_insns ();
1013
  end_sequence ();
1014
 
1015
  /* Precondition the loop.  */
1016
  loop_split_edge_with (loop_preheader_edge (loop), init_code);
1017
 
1018
  remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
1019
  n_remove_edges = 0;
1020
 
1021
  wont_exit = sbitmap_alloc (max_unroll + 2);
1022
 
1023
  /* Peel the first copy of loop body (almost always we must leave exit test
1024
     here; the only exception is when we have extra zero check and the number
1025
     of iterations is reliable.  Also record the place of (possible) extra
1026
     zero check.  */
1027
  sbitmap_zero (wont_exit);
1028
  if (extra_zero_check
1029
      && !desc->noloop_assumptions)
1030
    SET_BIT (wont_exit, 1);
1031
  ezc_swtch = loop_preheader_edge (loop)->src;
1032
  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1033
                                      loops, 1,
1034
                                      wont_exit, desc->out_edge,
1035
                                      remove_edges, &n_remove_edges,
1036
                                      DLTHE_FLAG_UPDATE_FREQ);
1037
  gcc_assert (ok);
1038
 
1039
  /* Record the place where switch will be built for preconditioning.  */
1040
  swtch = loop_split_edge_with (loop_preheader_edge (loop),
1041
                                NULL_RTX);
1042
 
1043
  for (i = 0; i < n_peel; i++)
1044
    {
1045
      /* Peel the copy.  */
1046
      sbitmap_zero (wont_exit);
1047
      if (i != n_peel - 1 || !last_may_exit)
1048
        SET_BIT (wont_exit, 1);
1049
      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1050
                                          loops, 1,
1051
                                          wont_exit, desc->out_edge,
1052
                                          remove_edges, &n_remove_edges,
1053
                                          DLTHE_FLAG_UPDATE_FREQ);
1054
      gcc_assert (ok);
1055
 
1056
      /* Create item for switch.  */
1057
      j = n_peel - i - (extra_zero_check ? 0 : 1);
1058
      p = REG_BR_PROB_BASE / (i + 2);
1059
 
1060
      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1061
      branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1062
                                          block_label (preheader), p,
1063
                                          NULL_RTX);
1064
 
1065
      swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1066
      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1067
      single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1068
      e = make_edge (swtch, preheader,
1069
                     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1070
      e->probability = p;
1071
    }
1072
 
1073
  if (extra_zero_check)
1074
    {
1075
      /* Add branch for zero iterations.  */
1076
      p = REG_BR_PROB_BASE / (max_unroll + 1);
1077
      swtch = ezc_swtch;
1078
      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1079
      branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1080
                                          block_label (preheader), p,
1081
                                          NULL_RTX);
1082
 
1083
      swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1084
      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1085
      single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1086
      e = make_edge (swtch, preheader,
1087
                     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1088
      e->probability = p;
1089
    }
1090
 
1091
  /* Recount dominators for outer blocks.  */
1092
  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1093
 
1094
  /* And unroll loop.  */
1095
 
1096
  sbitmap_ones (wont_exit);
1097
  RESET_BIT (wont_exit, may_exit_copy);
1098
  opt_info_start_duplication (opt_info);
1099
 
1100
  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1101
                                      loops, max_unroll,
1102
                                      wont_exit, desc->out_edge,
1103
                                      remove_edges, &n_remove_edges,
1104
                                      DLTHE_FLAG_UPDATE_FREQ
1105
                                      | (opt_info
1106
                                         ? DLTHE_RECORD_COPY_NUMBER
1107
                                           : 0));
1108
  gcc_assert (ok);
1109
 
1110
  if (opt_info)
1111
    {
1112
      apply_opt_in_copies (opt_info, max_unroll, true, true);
1113
      free_opt_info (opt_info);
1114
    }
1115
 
1116
  free (wont_exit);
1117
 
1118
  if (exit_at_end)
1119
    {
1120
      basic_block exit_block = get_bb_copy (desc->in_edge->src);
1121
      /* Find a new in and out edge; they are in the last copy we have
1122
         made.  */
1123
 
1124
      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1125
        {
1126
          desc->out_edge = EDGE_SUCC (exit_block, 0);
1127
          desc->in_edge = EDGE_SUCC (exit_block, 1);
1128
        }
1129
      else
1130
        {
1131
          desc->out_edge = EDGE_SUCC (exit_block, 1);
1132
          desc->in_edge = EDGE_SUCC (exit_block, 0);
1133
        }
1134
    }
1135
 
1136
  /* Remove the edges.  */
1137
  for (i = 0; i < n_remove_edges; i++)
1138
    remove_path (loops, remove_edges[i]);
1139
  free (remove_edges);
1140
 
1141
  /* We must be careful when updating the number of iterations due to
1142
     preconditioning and the fact that the value must be valid at entry
1143
     of the loop.  After passing through the above code, we see that
1144
     the correct new number of iterations is this:  */
1145
  gcc_assert (!desc->const_iter);
1146
  desc->niter_expr =
1147
    simplify_gen_binary (UDIV, desc->mode, old_niter,
1148
                         GEN_INT (max_unroll + 1));
1149
  desc->niter_max /= max_unroll + 1;
1150
  if (exit_at_end)
1151
    {
1152
      desc->niter_expr =
1153
        simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1154
      desc->noloop_assumptions = NULL_RTX;
1155
      desc->niter_max--;
1156
    }
1157
 
1158
  if (dump_file)
1159
    fprintf (dump_file,
1160
             ";; Unrolled loop %d times, counting # of iterations "
1161
             "in runtime, %i insns\n",
1162
             max_unroll, num_loop_insns (loop));
1163
}
1164
 
1165
/* Decide whether to simply peel LOOP and how much.  */
1166
static void
1167
decide_peel_simple (struct loop *loop, int flags)
1168
{
1169
  unsigned npeel;
1170
  struct niter_desc *desc;
1171
 
1172
  if (!(flags & UAP_PEEL))
1173
    {
1174
      /* We were not asked to, just return back silently.  */
1175
      return;
1176
    }
1177
 
1178
  if (dump_file)
1179
    fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1180
 
1181
  /* npeel = number of iterations to peel.  */
1182
  npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1183
  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1184
    npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1185
 
1186
  /* Skip big loops.  */
1187
  if (!npeel)
1188
    {
1189
      if (dump_file)
1190
        fprintf (dump_file, ";; Not considering loop, is too big\n");
1191
      return;
1192
    }
1193
 
1194
  /* Check for simple loops.  */
1195
  desc = get_simple_loop_desc (loop);
1196
 
1197
  /* Check number of iterations.  */
1198
  if (desc->simple_p && !desc->assumptions && desc->const_iter)
1199
    {
1200
      if (dump_file)
1201
        fprintf (dump_file, ";; Loop iterates constant times\n");
1202
      return;
1203
    }
1204
 
1205
  /* Do not simply peel loops with branches inside -- it increases number
1206
     of mispredicts.  */
1207
  if (num_loop_branches (loop) > 1)
1208
    {
1209
      if (dump_file)
1210
        fprintf (dump_file, ";; Not peeling, contains branches\n");
1211
      return;
1212
    }
1213
 
1214
  if (loop->header->count)
1215
    {
1216
      unsigned niter = expected_loop_iterations (loop);
1217
      if (niter + 1 > npeel)
1218
        {
1219
          if (dump_file)
1220
            {
1221
              fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1222
              fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1223
                       (HOST_WIDEST_INT) (niter + 1));
1224
              fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1225
                       npeel);
1226
            }
1227
          return;
1228
        }
1229
      npeel = niter + 1;
1230
    }
1231
  else
1232
    {
1233
      /* For now we have no good heuristics to decide whether loop peeling
1234
         will be effective, so disable it.  */
1235
      if (dump_file)
1236
        fprintf (dump_file,
1237
                 ";; Not peeling loop, no evidence it will be profitable\n");
1238
      return;
1239
    }
1240
 
1241
  /* Success.  */
1242
  loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1243
  loop->lpt_decision.times = npeel;
1244
 
1245
  if (dump_file)
1246
    fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1247
             loop->lpt_decision.times);
1248
}
1249
 
1250
/* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1251
   while (cond)
1252
     body;
1253
 
1254
   ==>
1255
 
1256
   if (!cond) goto end;
1257
   body;
1258
   if (!cond) goto end;
1259
   body;
1260
   while (cond)
1261
     body;
1262
   end: ;
1263
   */
1264
static void
1265
peel_loop_simple (struct loops *loops, struct loop *loop)
1266
{
1267
  sbitmap wont_exit;
1268
  unsigned npeel = loop->lpt_decision.times;
1269
  struct niter_desc *desc = get_simple_loop_desc (loop);
1270
  struct opt_info *opt_info = NULL;
1271
  bool ok;
1272
 
1273
  if (flag_split_ivs_in_unroller && npeel > 1)
1274
    opt_info = analyze_insns_in_loop (loop);
1275
 
1276
  wont_exit = sbitmap_alloc (npeel + 1);
1277
  sbitmap_zero (wont_exit);
1278
 
1279
  opt_info_start_duplication (opt_info);
1280
 
1281
  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1282
                                      loops, npeel, wont_exit,
1283
                                      NULL, NULL,
1284
                                      NULL, DLTHE_FLAG_UPDATE_FREQ
1285
                                      | (opt_info
1286
                                         ? DLTHE_RECORD_COPY_NUMBER
1287
                                           : 0));
1288
  gcc_assert (ok);
1289
 
1290
  free (wont_exit);
1291
 
1292
  if (opt_info)
1293
    {
1294
      apply_opt_in_copies (opt_info, npeel, false, false);
1295
      free_opt_info (opt_info);
1296
    }
1297
 
1298
  if (desc->simple_p)
1299
    {
1300
      if (desc->const_iter)
1301
        {
1302
          desc->niter -= npeel;
1303
          desc->niter_expr = GEN_INT (desc->niter);
1304
          desc->noloop_assumptions = NULL_RTX;
1305
        }
1306
      else
1307
        {
1308
          /* We cannot just update niter_expr, as its value might be clobbered
1309
             inside loop.  We could handle this by counting the number into
1310
             temporary just like we do in runtime unrolling, but it does not
1311
             seem worthwhile.  */
1312
          free_simple_loop_desc (loop);
1313
        }
1314
    }
1315
  if (dump_file)
1316
    fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1317
}
1318
 
1319
/* Decide whether to unroll LOOP stupidly and how much.  */
1320
static void
1321
decide_unroll_stupid (struct loop *loop, int flags)
1322
{
1323
  unsigned nunroll, nunroll_by_av, i;
1324
  struct niter_desc *desc;
1325
 
1326
  if (!(flags & UAP_UNROLL_ALL))
1327
    {
1328
      /* We were not asked to, just return back silently.  */
1329
      return;
1330
    }
1331
 
1332
  if (dump_file)
1333
    fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1334
 
1335
  /* nunroll = total number of copies of the original loop body in
1336
     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1337
  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1338
  nunroll_by_av
1339
    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1340
  if (nunroll > nunroll_by_av)
1341
    nunroll = nunroll_by_av;
1342
  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1343
    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1344
 
1345
  /* Skip big loops.  */
1346
  if (nunroll <= 1)
1347
    {
1348
      if (dump_file)
1349
        fprintf (dump_file, ";; Not considering loop, is too big\n");
1350
      return;
1351
    }
1352
 
1353
  /* Check for simple loops.  */
1354
  desc = get_simple_loop_desc (loop);
1355
 
1356
  /* Check simpleness.  */
1357
  if (desc->simple_p && !desc->assumptions)
1358
    {
1359
      if (dump_file)
1360
        fprintf (dump_file, ";; The loop is simple\n");
1361
      return;
1362
    }
1363
 
1364
  /* Do not unroll loops with branches inside -- it increases number
1365
     of mispredicts.  */
1366
  if (num_loop_branches (loop) > 1)
1367
    {
1368
      if (dump_file)
1369
        fprintf (dump_file, ";; Not unrolling, contains branches\n");
1370
      return;
1371
    }
1372
 
1373
  /* If we have profile feedback, check whether the loop rolls.  */
1374
  if (loop->header->count
1375
      && expected_loop_iterations (loop) < 2 * nunroll)
1376
    {
1377
      if (dump_file)
1378
        fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1379
      return;
1380
    }
1381
 
1382
  /* Success.  Now force nunroll to be power of 2, as it seems that this
1383
     improves results (partially because of better alignments, partially
1384
     because of some dark magic).  */
1385
  for (i = 1; 2 * i <= nunroll; i *= 2)
1386
    continue;
1387
 
1388
  loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1389
  loop->lpt_decision.times = i - 1;
1390
 
1391
  if (dump_file)
1392
    fprintf (dump_file,
1393
             ";; Decided to unroll the loop stupidly, %d times.\n",
1394
             loop->lpt_decision.times);
1395
}
1396
 
1397
/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1398
   while (cond)
1399
     body;
1400
 
1401
   ==>
1402
 
1403
   while (cond)
1404
     {
1405
       body;
1406
       if (!cond) break;
1407
       body;
1408
       if (!cond) break;
1409
       body;
1410
       if (!cond) break;
1411
       body;
1412
     }
1413
   */
1414
static void
1415
unroll_loop_stupid (struct loops *loops, struct loop *loop)
1416
{
1417
  sbitmap wont_exit;
1418
  unsigned nunroll = loop->lpt_decision.times;
1419
  struct niter_desc *desc = get_simple_loop_desc (loop);
1420
  struct opt_info *opt_info = NULL;
1421
  bool ok;
1422
 
1423
  if (flag_split_ivs_in_unroller
1424
      || flag_variable_expansion_in_unroller)
1425
    opt_info = analyze_insns_in_loop (loop);
1426
 
1427
 
1428
  wont_exit = sbitmap_alloc (nunroll + 1);
1429
  sbitmap_zero (wont_exit);
1430
  opt_info_start_duplication (opt_info);
1431
 
1432
  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1433
                                      loops, nunroll, wont_exit,
1434
                                      NULL, NULL, NULL,
1435
                                      DLTHE_FLAG_UPDATE_FREQ
1436
                                      | (opt_info
1437
                                         ? DLTHE_RECORD_COPY_NUMBER
1438
                                           : 0));
1439
  gcc_assert (ok);
1440
 
1441
  if (opt_info)
1442
    {
1443
      apply_opt_in_copies (opt_info, nunroll, true, true);
1444
      free_opt_info (opt_info);
1445
    }
1446
 
1447
  free (wont_exit);
1448
 
1449
  if (desc->simple_p)
1450
    {
1451
      /* We indeed may get here provided that there are nontrivial assumptions
1452
         for a loop to be really simple.  We could update the counts, but the
1453
         problem is that we are unable to decide which exit will be taken
1454
         (not really true in case the number of iterations is constant,
1455
         but noone will do anything with this information, so we do not
1456
         worry about it).  */
1457
      desc->simple_p = false;
1458
    }
1459
 
1460
  if (dump_file)
1461
    fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1462
             nunroll, num_loop_insns (loop));
1463
}
1464
 
1465
/* A hash function for information about insns to split.  */
1466
 
1467
static hashval_t
1468
si_info_hash (const void *ivts)
1469
{
1470
  return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1471
}
1472
 
1473
/* An equality functions for information about insns to split.  */
1474
 
1475
static int
1476
si_info_eq (const void *ivts1, const void *ivts2)
1477
{
1478
  const struct iv_to_split *i1 = ivts1;
1479
  const struct iv_to_split *i2 = ivts2;
1480
 
1481
  return i1->insn == i2->insn;
1482
}
1483
 
1484
/* Return a hash for VES, which is really a "var_to_expand *".  */
1485
 
1486
static hashval_t
1487
ve_info_hash (const void *ves)
1488
{
1489
  return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1490
}
1491
 
1492
/* Return true if IVTS1 and IVTS2 (which are really both of type
1493
   "var_to_expand *") refer to the same instruction.  */
1494
 
1495
static int
1496
ve_info_eq (const void *ivts1, const void *ivts2)
1497
{
1498
  const struct var_to_expand *i1 = ivts1;
1499
  const struct var_to_expand *i2 = ivts2;
1500
 
1501
  return i1->insn == i2->insn;
1502
}
1503
 
1504
/* Returns true if REG is referenced in one insn in LOOP.  */
1505
 
1506
bool
1507
referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1508
{
1509
  basic_block *body, bb;
1510
  unsigned i;
1511
  int count_ref = 0;
1512
  rtx insn;
1513
 
1514
  body = get_loop_body (loop);
1515
  for (i = 0; i < loop->num_nodes; i++)
1516
    {
1517
      bb = body[i];
1518
 
1519
      FOR_BB_INSNS (bb, insn)
1520
      {
1521
        if (rtx_referenced_p (reg, insn))
1522
          count_ref++;
1523
      }
1524
    }
1525
  return (count_ref  == 1);
1526
}
1527
 
1528
/* Determine whether INSN contains an accumulator
1529
   which can be expanded into separate copies,
1530
   one for each copy of the LOOP body.
1531
 
1532
   for (i = 0 ; i < n; i++)
1533
     sum += a[i];
1534
 
1535
   ==>
1536
 
1537
   sum += a[i]
1538
   ....
1539
   i = i+1;
1540
   sum1 += a[i]
1541
   ....
1542
   i = i+1
1543
   sum2 += a[i];
1544
   ....
1545
 
1546
   Return NULL if INSN contains no opportunity for expansion of accumulator.
1547
   Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1548
   information and return a pointer to it.
1549
*/
1550
 
1551
static struct var_to_expand *
1552
analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1553
{
1554
  rtx set, dest, src, op1;
1555
  struct var_to_expand *ves;
1556
  enum machine_mode mode1, mode2;
1557
 
1558
  set = single_set (insn);
1559
  if (!set)
1560
    return NULL;
1561
 
1562
  dest = SET_DEST (set);
1563
  src = SET_SRC (set);
1564
 
1565
  if (GET_CODE (src) != PLUS
1566
      && GET_CODE (src) != MINUS
1567
      && GET_CODE (src) != MULT)
1568
    return NULL;
1569
 
1570
  /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1571
     in MD.  But if there is no optab to generate the insn, we can not
1572
     perform the variable expansion.  This can happen if an MD provides
1573
     an insn but not a named pattern to generate it, for example to avoid
1574
     producing code that needs additional mode switches like for x87/mmx.
1575
 
1576
     So we check have_insn_for which looks for an optab for the operation
1577
     in SRC.  If it doesn't exist, we can't perform the expansion even
1578
     though INSN is valid.  */
1579
  if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1580
    return NULL;
1581
 
1582
  if (!XEXP (src, 0))
1583
    return NULL;
1584
 
1585
  op1 = XEXP (src, 0);
1586
 
1587
  if (!REG_P (dest)
1588
      && !(GET_CODE (dest) == SUBREG
1589
           && REG_P (SUBREG_REG (dest))))
1590
    return NULL;
1591
 
1592
  if (!rtx_equal_p (dest, op1))
1593
    return NULL;
1594
 
1595
  if (!referenced_in_one_insn_in_loop_p (loop, dest))
1596
    return NULL;
1597
 
1598
  if (rtx_referenced_p (dest, XEXP (src, 1)))
1599
    return NULL;
1600
 
1601
  mode1 = GET_MODE (dest);
1602
  mode2 = GET_MODE (XEXP (src, 1));
1603
  if ((FLOAT_MODE_P (mode1)
1604
       || FLOAT_MODE_P (mode2))
1605
      && !flag_unsafe_math_optimizations)
1606
    return NULL;
1607
 
1608
  /* Record the accumulator to expand.  */
1609
  ves = xmalloc (sizeof (struct var_to_expand));
1610
  ves->insn = insn;
1611
  ves->var_expansions = VEC_alloc (rtx, heap, 1);
1612
  ves->reg = copy_rtx (dest);
1613
  ves->op = GET_CODE (src);
1614
  ves->expansion_count = 0;
1615
  ves->reuse_expansion = 0;
1616
  return ves;
1617
}
1618
 
1619
/* Determine whether there is an induction variable in INSN that
1620
   we would like to split during unrolling.
1621
 
1622
   I.e. replace
1623
 
1624
   i = i + 1;
1625
   ...
1626
   i = i + 1;
1627
   ...
1628
   i = i + 1;
1629
   ...
1630
 
1631
   type chains by
1632
 
1633
   i0 = i + 1
1634
   ...
1635
   i = i0 + 1
1636
   ...
1637
   i = i0 + 2
1638
   ...
1639
 
1640
   Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1641
   an IV_TO_SPLIT structure, fill it with the relevant information and return a
1642
   pointer to it.  */
1643
 
1644
static struct iv_to_split *
1645
analyze_iv_to_split_insn (rtx insn)
1646
{
1647
  rtx set, dest;
1648
  struct rtx_iv iv;
1649
  struct iv_to_split *ivts;
1650
  bool ok;
1651
 
1652
  /* For now we just split the basic induction variables.  Later this may be
1653
     extended for example by selecting also addresses of memory references.  */
1654
  set = single_set (insn);
1655
  if (!set)
1656
    return NULL;
1657
 
1658
  dest = SET_DEST (set);
1659
  if (!REG_P (dest))
1660
    return NULL;
1661
 
1662
  if (!biv_p (insn, dest))
1663
    return NULL;
1664
 
1665
  ok = iv_analyze (insn, dest, &iv);
1666
  gcc_assert (ok);
1667
 
1668
  if (iv.step == const0_rtx
1669
      || iv.mode != iv.extend_mode)
1670
    return NULL;
1671
 
1672
  /* Record the insn to split.  */
1673
  ivts = xmalloc (sizeof (struct iv_to_split));
1674
  ivts->insn = insn;
1675
  ivts->base_var = NULL_RTX;
1676
  ivts->step = iv.step;
1677
  ivts->n_loc = 1;
1678
  ivts->loc[0] = 1;
1679
 
1680
  return ivts;
1681
}
1682
 
1683
/* Determines which of insns in LOOP can be optimized.
1684
   Return a OPT_INFO struct with the relevant hash tables filled
1685
   with all insns to be optimized.  The FIRST_NEW_BLOCK field
1686
   is undefined for the return value.  */
1687
 
1688
static struct opt_info *
1689
analyze_insns_in_loop (struct loop *loop)
1690
{
1691
  basic_block *body, bb;
1692
  unsigned i, num_edges = 0;
1693
  struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1694
  rtx insn;
1695
  struct iv_to_split *ivts = NULL;
1696
  struct var_to_expand *ves = NULL;
1697
  PTR *slot1;
1698
  PTR *slot2;
1699
  edge *edges = get_loop_exit_edges (loop, &num_edges);
1700
  bool can_apply = false;
1701
 
1702
  iv_analysis_loop_init (loop);
1703
 
1704
  body = get_loop_body (loop);
1705
 
1706
  if (flag_split_ivs_in_unroller)
1707
    opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1708
                                            si_info_hash, si_info_eq, free);
1709
 
1710
  /* Record the loop exit bb and loop preheader before the unrolling.  */
1711
  if (!loop_preheader_edge (loop)->src)
1712
    {
1713
      loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1714
      opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1715
    }
1716
  else
1717
    opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1718
 
1719
  if (num_edges == 1
1720
      && !(edges[0]->flags & EDGE_COMPLEX))
1721
    {
1722
      opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1723
      can_apply = true;
1724
    }
1725
 
1726
  if (flag_variable_expansion_in_unroller
1727
      && can_apply)
1728
    opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1729
                                                      ve_info_hash, ve_info_eq, free);
1730
 
1731
  for (i = 0; i < loop->num_nodes; i++)
1732
    {
1733
      bb = body[i];
1734
      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1735
        continue;
1736
 
1737
      FOR_BB_INSNS (bb, insn)
1738
      {
1739
        if (!INSN_P (insn))
1740
          continue;
1741
 
1742
        if (opt_info->insns_to_split)
1743
          ivts = analyze_iv_to_split_insn (insn);
1744
 
1745
        if (ivts)
1746
          {
1747
            slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1748
            *slot1 = ivts;
1749
            continue;
1750
          }
1751
 
1752
        if (opt_info->insns_with_var_to_expand)
1753
          ves = analyze_insn_to_expand_var (loop, insn);
1754
 
1755
        if (ves)
1756
          {
1757
            slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1758
            *slot2 = ves;
1759
          }
1760
      }
1761
    }
1762
 
1763
  free (edges);
1764
  free (body);
1765
  return opt_info;
1766
}
1767
 
1768
/* Called just before loop duplication.  Records start of duplicated area
1769
   to OPT_INFO.  */
1770
 
1771
static void
1772
opt_info_start_duplication (struct opt_info *opt_info)
1773
{
1774
  if (opt_info)
1775
    opt_info->first_new_block = last_basic_block;
1776
}
1777
 
1778
/* Determine the number of iterations between initialization of the base
1779
   variable and the current copy (N_COPY).  N_COPIES is the total number
1780
   of newly created copies.  UNROLLING is true if we are unrolling
1781
   (not peeling) the loop.  */
1782
 
1783
static unsigned
1784
determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1785
{
1786
  if (unrolling)
1787
    {
1788
      /* If we are unrolling, initialization is done in the original loop
1789
         body (number 0).  */
1790
      return n_copy;
1791
    }
1792
  else
1793
    {
1794
      /* If we are peeling, the copy in that the initialization occurs has
1795
         number 1.  The original loop (number 0) is the last.  */
1796
      if (n_copy)
1797
        return n_copy - 1;
1798
      else
1799
        return n_copies;
1800
    }
1801
}
1802
 
1803
/* Locate in EXPR the expression corresponding to the location recorded
1804
   in IVTS, and return a pointer to the RTX for this location.  */
1805
 
1806
static rtx *
1807
get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1808
{
1809
  unsigned i;
1810
  rtx *ret = &expr;
1811
 
1812
  for (i = 0; i < ivts->n_loc; i++)
1813
    ret = &XEXP (*ret, ivts->loc[i]);
1814
 
1815
  return ret;
1816
}
1817
 
1818
/* Allocate basic variable for the induction variable chain.  Callback for
1819
   htab_traverse.  */
1820
 
1821
static int
1822
allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1823
{
1824
  struct iv_to_split *ivts = *slot;
1825
  rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1826
 
1827
  ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1828
 
1829
  return 1;
1830
}
1831
 
1832
/* Insert initialization of basic variable of IVTS before INSN, taking
1833
   the initial value from INSN.  */
1834
 
1835
static void
1836
insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1837
{
1838
  rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1839
  rtx seq;
1840
 
1841
  start_sequence ();
1842
  expr = force_operand (expr, ivts->base_var);
1843
  if (expr != ivts->base_var)
1844
    emit_move_insn (ivts->base_var, expr);
1845
  seq = get_insns ();
1846
  end_sequence ();
1847
 
1848
  emit_insn_before (seq, insn);
1849
}
1850
 
1851
/* Replace the use of induction variable described in IVTS in INSN
1852
   by base variable + DELTA * step.  */
1853
 
1854
static void
1855
split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1856
{
1857
  rtx expr, *loc, seq, incr, var;
1858
  enum machine_mode mode = GET_MODE (ivts->base_var);
1859
  rtx src, dest, set;
1860
 
1861
  /* Construct base + DELTA * step.  */
1862
  if (!delta)
1863
    expr = ivts->base_var;
1864
  else
1865
    {
1866
      incr = simplify_gen_binary (MULT, mode,
1867
                                  ivts->step, gen_int_mode (delta, mode));
1868
      expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1869
                                  ivts->base_var, incr);
1870
    }
1871
 
1872
  /* Figure out where to do the replacement.  */
1873
  loc = get_ivts_expr (single_set (insn), ivts);
1874
 
1875
  /* If we can make the replacement right away, we're done.  */
1876
  if (validate_change (insn, loc, expr, 0))
1877
    return;
1878
 
1879
  /* Otherwise, force EXPR into a register and try again.  */
1880
  start_sequence ();
1881
  var = gen_reg_rtx (mode);
1882
  expr = force_operand (expr, var);
1883
  if (expr != var)
1884
    emit_move_insn (var, expr);
1885
  seq = get_insns ();
1886
  end_sequence ();
1887
  emit_insn_before (seq, insn);
1888
 
1889
  if (validate_change (insn, loc, var, 0))
1890
    return;
1891
 
1892
  /* The last chance.  Try recreating the assignment in insn
1893
     completely from scratch.  */
1894
  set = single_set (insn);
1895
  gcc_assert (set);
1896
 
1897
  start_sequence ();
1898
  *loc = var;
1899
  src = copy_rtx (SET_SRC (set));
1900
  dest = copy_rtx (SET_DEST (set));
1901
  src = force_operand (src, dest);
1902
  if (src != dest)
1903
    emit_move_insn (dest, src);
1904
  seq = get_insns ();
1905
  end_sequence ();
1906
 
1907
  emit_insn_before (seq, insn);
1908
  delete_insn (insn);
1909
}
1910
 
1911
 
1912
/* Return one expansion of the accumulator recorded in struct VE.  */
1913
 
1914
static rtx
1915
get_expansion (struct var_to_expand *ve)
1916
{
1917
  rtx reg;
1918
 
1919
  if (ve->reuse_expansion == 0)
1920
    reg = ve->reg;
1921
  else
1922
    reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1923
 
1924
  if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1925
    ve->reuse_expansion = 0;
1926
  else
1927
    ve->reuse_expansion++;
1928
 
1929
  return reg;
1930
}
1931
 
1932
 
1933
/* Given INSN replace the uses of the accumulator recorded in VE
1934
   with a new register.  */
1935
 
1936
static void
1937
expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1938
{
1939
  rtx new_reg, set;
1940
  bool really_new_expansion = false;
1941
 
1942
  set = single_set (insn);
1943
  gcc_assert (set);
1944
 
1945
  /* Generate a new register only if the expansion limit has not been
1946
     reached.  Else reuse an already existing expansion.  */
1947
  if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1948
    {
1949
      really_new_expansion = true;
1950
      new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1951
    }
1952
  else
1953
    new_reg = get_expansion (ve);
1954
 
1955
  validate_change (insn, &SET_DEST (set), new_reg, 1);
1956
  validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1957
 
1958
  if (apply_change_group ())
1959
    if (really_new_expansion)
1960
      {
1961
        VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1962
        ve->expansion_count++;
1963
      }
1964
}
1965
 
1966
/* Initialize the variable expansions in loop preheader.
1967
   Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1968
   basic block where the initialization of the expansions
1969
   should take place.  */
1970
 
1971
static int
1972
insert_var_expansion_initialization (void **slot, void *place_p)
1973
{
1974
  struct var_to_expand *ve = *slot;
1975
  basic_block place = (basic_block)place_p;
1976
  rtx seq, var, zero_init, insn;
1977
  unsigned i;
1978
 
1979
  if (VEC_length (rtx, ve->var_expansions) == 0)
1980
    return 1;
1981
 
1982
  start_sequence ();
1983
  if (ve->op == PLUS || ve->op == MINUS)
1984
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1985
      {
1986
        zero_init =  CONST0_RTX (GET_MODE (var));
1987
        emit_move_insn (var, zero_init);
1988
      }
1989
  else if (ve->op == MULT)
1990
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1991
      {
1992
        zero_init =  CONST1_RTX (GET_MODE (var));
1993
        emit_move_insn (var, zero_init);
1994
      }
1995
 
1996
  seq = get_insns ();
1997
  end_sequence ();
1998
 
1999
  insn = BB_HEAD (place);
2000
  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2001
    insn = NEXT_INSN (insn);
2002
 
2003
  emit_insn_after (seq, insn);
2004
  /* Continue traversing the hash table.  */
2005
  return 1;
2006
}
2007
 
2008
/*  Combine the variable expansions at the loop exit.
2009
    Callbacks for htab_traverse.  PLACE_P is the loop exit
2010
    basic block where the summation of the expansions should
2011
    take place.  */
2012
 
2013
static int
2014
combine_var_copies_in_loop_exit (void **slot, void *place_p)
2015
{
2016
  struct var_to_expand *ve = *slot;
2017
  basic_block place = (basic_block)place_p;
2018
  rtx sum = ve->reg;
2019
  rtx expr, seq, var, insn;
2020
  unsigned i;
2021
 
2022
  if (VEC_length (rtx, ve->var_expansions) == 0)
2023
    return 1;
2024
 
2025
  start_sequence ();
2026
  if (ve->op == PLUS || ve->op == MINUS)
2027
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2028
      {
2029
        sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2030
                                   var, sum);
2031
      }
2032
  else if (ve->op == MULT)
2033
    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2034
      {
2035
        sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2036
                                   var, sum);
2037
      }
2038
 
2039
  expr = force_operand (sum, ve->reg);
2040
  if (expr != ve->reg)
2041
    emit_move_insn (ve->reg, expr);
2042
  seq = get_insns ();
2043
  end_sequence ();
2044
 
2045
  insn = BB_HEAD (place);
2046
  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2047
    insn = NEXT_INSN (insn);
2048
 
2049
  emit_insn_after (seq, insn);
2050
 
2051
  /* Continue traversing the hash table.  */
2052
  return 1;
2053
}
2054
 
2055
/* Apply loop optimizations in loop copies using the
2056
   data which gathered during the unrolling.  Structure
2057
   OPT_INFO record that data.
2058
 
2059
   UNROLLING is true if we unrolled (not peeled) the loop.
2060
   REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2061
   the loop (as it should happen in complete unrolling, but not in ordinary
2062
   peeling of the loop).  */
2063
 
2064
static void
2065
apply_opt_in_copies (struct opt_info *opt_info,
2066
                     unsigned n_copies, bool unrolling,
2067
                     bool rewrite_original_loop)
2068
{
2069
  unsigned i, delta;
2070
  basic_block bb, orig_bb;
2071
  rtx insn, orig_insn, next;
2072
  struct iv_to_split ivts_templ, *ivts;
2073
  struct var_to_expand ve_templ, *ves;
2074
 
2075
  /* Sanity check -- we need to put initialization in the original loop
2076
     body.  */
2077
  gcc_assert (!unrolling || rewrite_original_loop);
2078
 
2079
  /* Allocate the basic variables (i0).  */
2080
  if (opt_info->insns_to_split)
2081
    htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2082
 
2083
  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2084
    {
2085
      bb = BASIC_BLOCK (i);
2086
      orig_bb = get_bb_original (bb);
2087
 
2088
      /* bb->aux holds position in copy sequence initialized by
2089
         duplicate_loop_to_header_edge.  */
2090
      delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2091
                                        unrolling);
2092
      bb->aux = 0;
2093
      orig_insn = BB_HEAD (orig_bb);
2094
      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2095
        {
2096
          next = NEXT_INSN (insn);
2097
          if (!INSN_P (insn))
2098
            continue;
2099
 
2100
          while (!INSN_P (orig_insn))
2101
            orig_insn = NEXT_INSN (orig_insn);
2102
 
2103
          ivts_templ.insn = orig_insn;
2104
          ve_templ.insn = orig_insn;
2105
 
2106
          /* Apply splitting iv optimization.  */
2107
          if (opt_info->insns_to_split)
2108
            {
2109
              ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2110
 
2111
              if (ivts)
2112
                {
2113
                  gcc_assert (GET_CODE (PATTERN (insn))
2114
                              == GET_CODE (PATTERN (orig_insn)));
2115
 
2116
                  if (!delta)
2117
                    insert_base_initialization (ivts, insn);
2118
                  split_iv (ivts, insn, delta);
2119
                }
2120
            }
2121
          /* Apply variable expansion optimization.  */
2122
          if (unrolling && opt_info->insns_with_var_to_expand)
2123
            {
2124
              ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2125
              if (ves)
2126
                {
2127
                  gcc_assert (GET_CODE (PATTERN (insn))
2128
                              == GET_CODE (PATTERN (orig_insn)));
2129
                  expand_var_during_unrolling (ves, insn);
2130
                }
2131
            }
2132
          orig_insn = NEXT_INSN (orig_insn);
2133
        }
2134
    }
2135
 
2136
  if (!rewrite_original_loop)
2137
    return;
2138
 
2139
  /* Initialize the variable expansions in the loop preheader
2140
     and take care of combining them at the loop exit.  */
2141
  if (opt_info->insns_with_var_to_expand)
2142
    {
2143
      htab_traverse (opt_info->insns_with_var_to_expand,
2144
                     insert_var_expansion_initialization,
2145
                     opt_info->loop_preheader);
2146
      htab_traverse (opt_info->insns_with_var_to_expand,
2147
                     combine_var_copies_in_loop_exit,
2148
                     opt_info->loop_exit);
2149
    }
2150
 
2151
  /* Rewrite also the original loop body.  Find them as originals of the blocks
2152
     in the last copied iteration, i.e. those that have
2153
     get_bb_copy (get_bb_original (bb)) == bb.  */
2154
  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2155
    {
2156
      bb = BASIC_BLOCK (i);
2157
      orig_bb = get_bb_original (bb);
2158
      if (get_bb_copy (orig_bb) != bb)
2159
        continue;
2160
 
2161
      delta = determine_split_iv_delta (0, n_copies, unrolling);
2162
      for (orig_insn = BB_HEAD (orig_bb);
2163
           orig_insn != NEXT_INSN (BB_END (bb));
2164
           orig_insn = next)
2165
        {
2166
          next = NEXT_INSN (orig_insn);
2167
 
2168
          if (!INSN_P (orig_insn))
2169
            continue;
2170
 
2171
          ivts_templ.insn = orig_insn;
2172
          if (opt_info->insns_to_split)
2173
            {
2174
              ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2175
              if (ivts)
2176
                {
2177
                  if (!delta)
2178
                    insert_base_initialization (ivts, orig_insn);
2179
                  split_iv (ivts, orig_insn, delta);
2180
                  continue;
2181
                }
2182
            }
2183
 
2184
        }
2185
    }
2186
}
2187
 
2188
/*  Release the data structures used for the variable expansion
2189
    optimization.  Callbacks for htab_traverse.  */
2190
 
2191
static int
2192
release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2193
{
2194
  struct var_to_expand *ve = *slot;
2195
 
2196
  VEC_free (rtx, heap, ve->var_expansions);
2197
 
2198
  /* Continue traversing the hash table.  */
2199
  return 1;
2200
}
2201
 
2202
/* Release OPT_INFO.  */
2203
 
2204
static void
2205
free_opt_info (struct opt_info *opt_info)
2206
{
2207
  if (opt_info->insns_to_split)
2208
    htab_delete (opt_info->insns_to_split);
2209
  if (opt_info->insns_with_var_to_expand)
2210
    {
2211
      htab_traverse (opt_info->insns_with_var_to_expand,
2212
                     release_var_copies, NULL);
2213
      htab_delete (opt_info->insns_with_var_to_expand);
2214
    }
2215
  free (opt_info);
2216
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.