OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [loop-doloop.c] - Blame information for rev 318

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Perform doloop optimizations
2
   Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3
   Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it under
8
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 3, or (at your option) any later
10
version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13
WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
#include "config.h"
22
#include "system.h"
23
#include "coretypes.h"
24
#include "tm.h"
25
#include "rtl.h"
26
#include "flags.h"
27
#include "expr.h"
28
#include "hard-reg-set.h"
29
#include "basic-block.h"
30
#include "toplev.h"
31
#include "tm_p.h"
32
#include "cfgloop.h"
33
#include "output.h"
34
#include "params.h"
35
#include "target.h"
36
 
37
/* This module is used to modify loops with a determinable number of
38
   iterations to use special low-overhead looping instructions.
39
 
40
   It first validates whether the loop is well behaved and has a
41
   determinable number of iterations (either at compile or run-time).
42
   It then modifies the loop to use a low-overhead looping pattern as
43
   follows:
44
 
45
   1. A pseudo register is allocated as the loop iteration counter.
46
 
47
   2. The number of loop iterations is calculated and is stored
48
      in the loop counter.
49
 
50
   3. At the end of the loop, the jump insn is replaced by the
51
      doloop_end pattern.  The compare must remain because it might be
52
      used elsewhere.  If the loop-variable or condition register are
53
      used elsewhere, they will be eliminated by flow.
54
 
55
   4. An optional doloop_begin pattern is inserted at the top of the
56
      loop.
57
 
58
   TODO The optimization should only performed when either the biv used for exit
59
   condition is unused at all except for the exit test, or if we do not have to
60
   change its value, since otherwise we have to add a new induction variable,
61
   which usually will not pay up (unless the cost of the doloop pattern is
62
   somehow extremely lower than the cost of compare & jump, or unless the bct
63
   register cannot be used for anything else but doloop -- ??? detect these
64
   cases).  */
65
 
66
#ifdef HAVE_doloop_end
67
 
68
/* Return the loop termination condition for PATTERN or zero
69
   if it is not a decrement and branch jump insn.  */
70
 
71
rtx
72
doloop_condition_get (rtx pattern)
73
{
74
  rtx cmp;
75
  rtx inc;
76
  rtx reg;
77
  rtx inc_src;
78
  rtx condition;
79
 
80
  /* The canonical doloop pattern we expect is:
81
 
82
     (parallel [(set (pc) (if_then_else (condition)
83
                                        (label_ref (label))
84
                                        (pc)))
85
                (set (reg) (plus (reg) (const_int -1)))
86
                (additional clobbers and uses)])
87
 
88
     Some targets (IA-64) wrap the set of the loop counter in
89
     an if_then_else too.
90
 
91
     In summary, the branch must be the first entry of the
92
     parallel (also required by jump.c), and the second
93
     entry of the parallel must be a set of the loop counter
94
     register.  */
95
 
96
  if (GET_CODE (pattern) != PARALLEL)
97
    return 0;
98
 
99
  cmp = XVECEXP (pattern, 0, 0);
100
  inc = XVECEXP (pattern, 0, 1);
101
 
102
  /* Check for (set (reg) (something)).  */
103
  if (GET_CODE (inc) != SET)
104
    return 0;
105
  reg = SET_DEST (inc);
106
  if (! REG_P (reg))
107
    return 0;
108
 
109
  /* Check if something = (plus (reg) (const_int -1)).
110
     On IA-64, this decrement is wrapped in an if_then_else.  */
111
  inc_src = SET_SRC (inc);
112
  if (GET_CODE (inc_src) == IF_THEN_ELSE)
113
    inc_src = XEXP (inc_src, 1);
114
  if (GET_CODE (inc_src) != PLUS
115
      || XEXP (inc_src, 0) != reg
116
      || XEXP (inc_src, 1) != constm1_rtx)
117
    return 0;
118
 
119
  /* Check for (set (pc) (if_then_else (condition)
120
                                       (label_ref (label))
121
                                       (pc))).  */
122
  if (GET_CODE (cmp) != SET
123
      || SET_DEST (cmp) != pc_rtx
124
      || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
125
      || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
126
      || XEXP (SET_SRC (cmp), 2) != pc_rtx)
127
    return 0;
128
 
129
  /* Extract loop termination condition.  */
130
  condition = XEXP (SET_SRC (cmp), 0);
131
 
132
  /* We expect a GE or NE comparison with 0 or 1.  */
133
  if ((GET_CODE (condition) != GE
134
       && GET_CODE (condition) != NE)
135
      || (XEXP (condition, 1) != const0_rtx
136
          && XEXP (condition, 1) != const1_rtx))
137
    return 0;
138
 
139
  if ((XEXP (condition, 0) == reg)
140
      || (GET_CODE (XEXP (condition, 0)) == PLUS
141
                   && XEXP (XEXP (condition, 0), 0) == reg))
142
    return condition;
143
 
144
  /* ??? If a machine uses a funny comparison, we could return a
145
     canonicalized form here.  */
146
 
147
  return 0;
148
}
149
 
150
/* Return nonzero if the loop specified by LOOP is suitable for
151
   the use of special low-overhead looping instructions.  DESC
152
   describes the number of iterations of the loop.  */
153
 
154
static bool
155
doloop_valid_p (struct loop *loop, struct niter_desc *desc)
156
{
157
  basic_block *body = get_loop_body (loop), bb;
158
  rtx insn;
159
  unsigned i;
160
  bool result = true;
161
 
162
  /* Check for loops that may not terminate under special conditions.  */
163
  if (!desc->simple_p
164
      || desc->assumptions
165
      || desc->infinite)
166
    {
167
      /* There are some cases that would require a special attention.
168
         For example if the comparison is LEU and the comparison value
169
         is UINT_MAX then the loop will not terminate.  Similarly, if the
170
         comparison code is GEU and the comparison value is 0, the
171
         loop will not terminate.
172
 
173
         If the absolute increment is not 1, the loop can be infinite
174
         even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
175
 
176
         ??? We could compute these conditions at run-time and have a
177
         additional jump around the loop to ensure an infinite loop.
178
         However, it is very unlikely that this is the intended
179
         behavior of the loop and checking for these rare boundary
180
         conditions would pessimize all other code.
181
 
182
         If the loop is executed only a few times an extra check to
183
         restart the loop could use up most of the benefits of using a
184
         count register loop.  Note however, that normally, this
185
         restart branch would never execute, so it could be predicted
186
         well by the CPU.  We should generate the pessimistic code by
187
         default, and have an option, e.g. -funsafe-loops that would
188
         enable count-register loops in this case.  */
189
      if (dump_file)
190
        fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
191
      result = false;
192
      goto cleanup;
193
    }
194
 
195
  for (i = 0; i < loop->num_nodes; i++)
196
    {
197
      bb = body[i];
198
 
199
      for (insn = BB_HEAD (bb);
200
           insn != NEXT_INSN (BB_END (bb));
201
           insn = NEXT_INSN (insn))
202
        {
203
          /* Different targets have different necessities for low-overhead
204
             looping.  Call the back end for each instruction within the loop
205
             to let it decide whether the insn prohibits a low-overhead loop.
206
             It will then return the cause for it to emit to the dump file.  */
207
          const char * invalid = targetm.invalid_within_doloop (insn);
208
          if (invalid)
209
            {
210
              if (dump_file)
211
                fprintf (dump_file, "Doloop: %s\n", invalid);
212
              result = false;
213
              goto cleanup;
214
            }
215
        }
216
    }
217
  result = true;
218
 
219
cleanup:
220
  free (body);
221
 
222
  return result;
223
}
224
 
225
/* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
226
   edge.  If the condition is always false, do not do anything.  If it is always
227
   true, redirect E to DEST and return false.  In all other cases, true is
228
   returned.  */
229
 
230
static bool
231
add_test (rtx cond, edge *e, basic_block dest)
232
{
233
  rtx seq, jump, label;
234
  enum machine_mode mode;
235
  rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
236
  enum rtx_code code = GET_CODE (cond);
237
  basic_block bb;
238
 
239
  mode = GET_MODE (XEXP (cond, 0));
240
  if (mode == VOIDmode)
241
    mode = GET_MODE (XEXP (cond, 1));
242
 
243
  start_sequence ();
244
  op0 = force_operand (op0, NULL_RTX);
245
  op1 = force_operand (op1, NULL_RTX);
246
  label = block_label (dest);
247
  do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label);
248
 
249
  jump = get_last_insn ();
250
  if (!JUMP_P (jump))
251
    {
252
      /* The condition is always false and the jump was optimized out.  */
253
      end_sequence ();
254
      return true;
255
    }
256
 
257
  seq = get_insns ();
258
  end_sequence ();
259
  bb = loop_split_edge_with (*e, seq);
260
  *e = single_succ_edge (bb);
261
 
262
  if (any_uncondjump_p (jump))
263
    {
264
      /* The condition is always true.  */
265
      delete_insn (jump);
266
      redirect_edge_and_branch_force (*e, dest);
267
      return false;
268
    }
269
 
270
  JUMP_LABEL (jump) = label;
271
 
272
  /* The jump is supposed to handle an unlikely special case.  */
273
  REG_NOTES (jump)
274
          = gen_rtx_EXPR_LIST (REG_BR_PROB,
275
                               const0_rtx, REG_NOTES (jump));
276
  LABEL_NUSES (label)++;
277
 
278
  make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
279
  return true;
280
}
281
 
282
/* Modify the loop to use the low-overhead looping insn where LOOP
283
   describes the loop, DESC describes the number of iterations of the
284
   loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
285
   end of the loop.  CONDITION is the condition separated from the
286
   DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
287
 
288
static void
289
doloop_modify (struct loop *loop, struct niter_desc *desc,
290
               rtx doloop_seq, rtx condition, rtx count)
291
{
292
  rtx counter_reg;
293
  rtx tmp, noloop = NULL_RTX;
294
  rtx sequence;
295
  rtx jump_insn;
296
  rtx jump_label;
297
  int nonneg = 0;
298
  bool increment_count;
299
  basic_block loop_end = desc->out_edge->src;
300
  enum machine_mode mode;
301
 
302
  jump_insn = BB_END (loop_end);
303
 
304
  if (dump_file)
305
    {
306
      fprintf (dump_file, "Doloop: Inserting doloop pattern (");
307
      if (desc->const_iter)
308
        fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
309
      else
310
        fputs ("runtime", dump_file);
311
      fputs (" iterations).\n", dump_file);
312
    }
313
 
314
  /* Discard original jump to continue loop.  The original compare
315
     result may still be live, so it cannot be discarded explicitly.  */
316
  delete_insn (jump_insn);
317
 
318
  counter_reg = XEXP (condition, 0);
319
  if (GET_CODE (counter_reg) == PLUS)
320
    counter_reg = XEXP (counter_reg, 0);
321
  mode = GET_MODE (counter_reg);
322
 
323
  increment_count = false;
324
  switch (GET_CODE (condition))
325
    {
326
    case NE:
327
      /* Currently only NE tests against zero and one are supported.  */
328
      noloop = XEXP (condition, 1);
329
      if (noloop != const0_rtx)
330
        {
331
          gcc_assert (noloop == const1_rtx);
332
          increment_count = true;
333
        }
334
      break;
335
 
336
    case GE:
337
      /* Currently only GE tests against zero are supported.  */
338
      gcc_assert (XEXP (condition, 1) == const0_rtx);
339
 
340
      noloop = constm1_rtx;
341
 
342
      /* The iteration count does not need incrementing for a GE test.  */
343
      increment_count = false;
344
 
345
      /* Determine if the iteration counter will be non-negative.
346
         Note that the maximum value loaded is iterations_max - 1.  */
347
      if (desc->niter_max
348
          <= ((unsigned HOST_WIDEST_INT) 1
349
              << (GET_MODE_BITSIZE (mode) - 1)))
350
        nonneg = 1;
351
      break;
352
 
353
      /* Abort if an invalid doloop pattern has been generated.  */
354
    default:
355
      gcc_unreachable ();
356
    }
357
 
358
  if (increment_count)
359
    count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
360
 
361
  /* Insert initialization of the count register into the loop header.  */
362
  start_sequence ();
363
  tmp = force_operand (count, counter_reg);
364
  convert_move (counter_reg, tmp, 1);
365
  sequence = get_insns ();
366
  end_sequence ();
367
  emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
368
 
369
  if (desc->noloop_assumptions)
370
    {
371
      rtx ass = copy_rtx (desc->noloop_assumptions);
372
      basic_block preheader = loop_preheader_edge (loop)->src;
373
      basic_block set_zero
374
              = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
375
      basic_block new_preheader
376
              = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
377
      edge te;
378
 
379
      /* Expand the condition testing the assumptions and if it does not pass,
380
         reset the count register to 0.  */
381
      redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
382
      set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
383
 
384
      set_zero->count = 0;
385
      set_zero->frequency = 0;
386
 
387
      te = single_succ_edge (preheader);
388
      for (; ass; ass = XEXP (ass, 1))
389
        if (!add_test (XEXP (ass, 0), &te, set_zero))
390
          break;
391
 
392
      if (ass)
393
        {
394
          /* We reached a condition that is always true.  This is very hard to
395
             reproduce (such a loop does not roll, and thus it would most
396
             likely get optimized out by some of the preceding optimizations).
397
             In fact, I do not have any testcase for it.  However, it would
398
             also be very hard to show that it is impossible, so we must
399
             handle this case.  */
400
          set_zero->count = preheader->count;
401
          set_zero->frequency = preheader->frequency;
402
        }
403
 
404
      if (EDGE_COUNT (set_zero->preds) == 0)
405
        {
406
          /* All the conditions were simplified to false, remove the
407
             unreachable set_zero block.  */
408
          remove_bb_from_loops (set_zero);
409
          delete_basic_block (set_zero);
410
        }
411
      else
412
        {
413
          /* Reset the counter to zero in the set_zero block.  */
414
          start_sequence ();
415
          convert_move (counter_reg, noloop, 0);
416
          sequence = get_insns ();
417
          end_sequence ();
418
          emit_insn_after (sequence, BB_END (set_zero));
419
 
420
          set_immediate_dominator (CDI_DOMINATORS, set_zero,
421
                                   recount_dominator (CDI_DOMINATORS,
422
                                                      set_zero));
423
        }
424
 
425
      set_immediate_dominator (CDI_DOMINATORS, new_preheader,
426
                               recount_dominator (CDI_DOMINATORS,
427
                                                  new_preheader));
428
    }
429
 
430
  /* Some targets (eg, C4x) need to initialize special looping
431
     registers.  */
432
#ifdef HAVE_doloop_begin
433
  {
434
    rtx init;
435
    unsigned level = get_loop_level (loop) + 1;
436
    init = gen_doloop_begin (counter_reg,
437
                             desc->const_iter ? desc->niter_expr : const0_rtx,
438
                             GEN_INT (desc->niter_max),
439
                             GEN_INT (level));
440
    if (init)
441
      {
442
        start_sequence ();
443
        emit_insn (init);
444
        sequence = get_insns ();
445
        end_sequence ();
446
        emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
447
      }
448
  }
449
#endif
450
 
451
  /* Insert the new low-overhead looping insn.  */
452
  emit_jump_insn_after (doloop_seq, BB_END (loop_end));
453
  jump_insn = BB_END (loop_end);
454
  jump_label = block_label (desc->in_edge->dest);
455
  JUMP_LABEL (jump_insn) = jump_label;
456
  LABEL_NUSES (jump_label)++;
457
 
458
  /* Ensure the right fallthru edge is marked, for case we have reversed
459
     the condition.  */
460
  desc->in_edge->flags &= ~EDGE_FALLTHRU;
461
  desc->out_edge->flags |= EDGE_FALLTHRU;
462
 
463
  /* Add a REG_NONNEG note if the actual or estimated maximum number
464
     of iterations is non-negative.  */
465
  if (nonneg)
466
    {
467
      REG_NOTES (jump_insn)
468
        = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
469
    }
470
}
471
 
472
/* Process loop described by LOOP validating that the loop is suitable for
473
   conversion to use a low overhead looping instruction, replacing the jump
474
   insn where suitable.  Returns true if the loop was successfully
475
   modified.  */
476
 
477
static bool
478
doloop_optimize (struct loop *loop)
479
{
480
  enum machine_mode mode;
481
  rtx doloop_seq, doloop_pat, doloop_reg;
482
  rtx iterations, count;
483
  rtx iterations_max;
484
  rtx start_label;
485
  rtx condition;
486
  unsigned level, est_niter;
487
  int max_cost;
488
  struct niter_desc *desc;
489
  unsigned word_mode_size;
490
  unsigned HOST_WIDE_INT word_mode_max;
491
 
492
  if (dump_file)
493
    fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
494
 
495
  iv_analysis_loop_init (loop);
496
 
497
  /* Find the simple exit of a LOOP.  */
498
  desc = get_simple_loop_desc (loop);
499
 
500
  /* Check that loop is a candidate for a low-overhead looping insn.  */
501
  if (!doloop_valid_p (loop, desc))
502
    {
503
      if (dump_file)
504
        fprintf (dump_file,
505
                 "Doloop: The loop is not suitable.\n");
506
      return false;
507
    }
508
  mode = desc->mode;
509
 
510
  est_niter = 3;
511
  if (desc->const_iter)
512
    est_niter = desc->niter;
513
  /* If the estimate on number of iterations is reliable (comes from profile
514
     feedback), use it.  Do not use it normally, since the expected number
515
     of iterations of an unrolled loop is 2.  */
516
  if (loop->header->count)
517
    est_niter = expected_loop_iterations (loop);
518
 
519
  if (est_niter < 3)
520
    {
521
      if (dump_file)
522
        fprintf (dump_file,
523
                 "Doloop: Too few iterations (%u) to be profitable.\n",
524
                 est_niter);
525
      return false;
526
    }
527
 
528
  max_cost
529
    = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
530
  if (rtx_cost (desc->niter_expr, SET) > max_cost)
531
    {
532
      if (dump_file)
533
        fprintf (dump_file,
534
                 "Doloop: number of iterations too costly to compute.\n");
535
      return false;
536
    }
537
 
538
  count = copy_rtx (desc->niter_expr);
539
  iterations = desc->const_iter ? desc->niter_expr : const0_rtx;
540
  iterations_max = GEN_INT (desc->niter_max);
541
  level = get_loop_level (loop) + 1;
542
 
543
  /* Generate looping insn.  If the pattern FAILs then give up trying
544
     to modify the loop since there is some aspect the back-end does
545
     not like.  */
546
  start_label = block_label (desc->in_edge->dest);
547
  doloop_reg = gen_reg_rtx (mode);
548
  doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
549
                               GEN_INT (level), start_label);
550
 
551
  word_mode_size = GET_MODE_BITSIZE (word_mode);
552
  word_mode_max
553
          = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
554
  if (! doloop_seq
555
      && mode != word_mode
556
      /* Before trying mode different from the one in that # of iterations is
557
         computed, we must be sure that the number of iterations fits into
558
         the new mode.  */
559
      && (word_mode_size >= GET_MODE_BITSIZE (mode)
560
          || desc->niter_max <= word_mode_max))
561
    {
562
      if (word_mode_size > GET_MODE_BITSIZE (mode))
563
        {
564
          count = simplify_gen_unary (ZERO_EXTEND, word_mode,
565
                                      count, mode);
566
          iterations = simplify_gen_unary (ZERO_EXTEND, word_mode,
567
                                           iterations, mode);
568
          iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode,
569
                                               iterations_max, mode);
570
        }
571
      else
572
        {
573
          count = lowpart_subreg (word_mode, count, mode);
574
          iterations = lowpart_subreg (word_mode, iterations, mode);
575
          iterations_max = lowpart_subreg (word_mode, iterations_max, mode);
576
        }
577
      PUT_MODE (doloop_reg, word_mode);
578
      doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
579
                                   GEN_INT (level), start_label);
580
    }
581
  if (! doloop_seq)
582
    {
583
      if (dump_file)
584
        fprintf (dump_file,
585
                 "Doloop: Target unwilling to use doloop pattern!\n");
586
      return false;
587
    }
588
 
589
  /* If multiple instructions were created, the last must be the
590
     jump instruction.  Also, a raw define_insn may yield a plain
591
     pattern.  */
592
  doloop_pat = doloop_seq;
593
  if (INSN_P (doloop_pat))
594
    {
595
      while (NEXT_INSN (doloop_pat) != NULL_RTX)
596
        doloop_pat = NEXT_INSN (doloop_pat);
597
      if (JUMP_P (doloop_pat))
598
        doloop_pat = PATTERN (doloop_pat);
599
      else
600
        doloop_pat = NULL_RTX;
601
    }
602
 
603
  if (! doloop_pat
604
      || ! (condition = doloop_condition_get (doloop_pat)))
605
    {
606
      if (dump_file)
607
        fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
608
      return false;
609
    }
610
 
611
  doloop_modify (loop, desc, doloop_seq, condition, count);
612
  return true;
613
}
614
 
615
/* This is the main entry point.  Process all LOOPS using doloop_optimize.  */
616
 
617
void
618
doloop_optimize_loops (struct loops *loops)
619
{
620
  unsigned i;
621
  struct loop *loop;
622
 
623
  for (i = 1; i < loops->num; i++)
624
    {
625
      loop = loops->parray[i];
626
      if (!loop)
627
        continue;
628
 
629
      doloop_optimize (loop);
630
    }
631
 
632
  iv_analysis_done ();
633
 
634
#ifdef ENABLE_CHECKING
635
  verify_dominators (CDI_DOMINATORS);
636
  verify_loop_structure (loops);
637
#endif
638
}
639
#endif /* HAVE_doloop_end */
640
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.