OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [tree-ssa-loop-ivopts.c] - Blame information for rev 20

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
/* Induction variable optimizations.
2
   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
 
4
This file is part of GCC.
5
 
6
GCC is free software; you can redistribute it and/or modify it
7
under the terms of the GNU General Public License as published by the
8
Free Software Foundation; either version 2, or (at your option) any
9
later version.
10
 
11
GCC is distributed in the hope that it will be useful, but WITHOUT
12
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GCC; see the file COPYING.  If not, write to the Free
18
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301, USA.  */
20
 
21
/* This pass tries to find the optimal set of induction variables for the loop.
22
   It optimizes just the basic linear induction variables (although adding
23
   support for other types should not be too hard).  It includes the
24
   optimizations commonly known as strength reduction, induction variable
25
   coalescing and induction variable elimination.  It does it in the
26
   following steps:
27
 
28
   1) The interesting uses of induction variables are found.  This includes
29
 
30
      -- uses of induction variables in non-linear expressions
31
      -- addresses of arrays
32
      -- comparisons of induction variables
33
 
34
   2) Candidates for the induction variables are found.  This includes
35
 
36
      -- old induction variables
37
      -- the variables defined by expressions derived from the "interesting
38
         uses" above
39
 
40
   3) The optimal (w.r. to a cost function) set of variables is chosen.  The
41
      cost function assigns a cost to sets of induction variables and consists
42
      of three parts:
43
 
44
      -- The use costs.  Each of the interesting uses chooses the best induction
45
         variable in the set and adds its cost to the sum.  The cost reflects
46
         the time spent on modifying the induction variables value to be usable
47
         for the given purpose (adding base and offset for arrays, etc.).
48
      -- The variable costs.  Each of the variables has a cost assigned that
49
         reflects the costs associated with incrementing the value of the
50
         variable.  The original variables are somewhat preferred.
51
      -- The set cost.  Depending on the size of the set, extra cost may be
52
         added to reflect register pressure.
53
 
54
      All the costs are defined in a machine-specific way, using the target
55
      hooks and machine descriptions to determine them.
56
 
57
   4) The trees are transformed to use the new variables, the dead code is
58
      removed.
59
 
60
   All of this is done loop by loop.  Doing it globally is theoretically
61
   possible, it might give a better performance and it might enable us
62
   to decide costs more precisely, but getting all the interactions right
63
   would be complicated.  */
64
 
65
#include "config.h"
66
#include "system.h"
67
#include "coretypes.h"
68
#include "tm.h"
69
#include "tree.h"
70
#include "rtl.h"
71
#include "tm_p.h"
72
#include "hard-reg-set.h"
73
#include "basic-block.h"
74
#include "output.h"
75
#include "diagnostic.h"
76
#include "tree-flow.h"
77
#include "tree-dump.h"
78
#include "timevar.h"
79
#include "cfgloop.h"
80
#include "varray.h"
81
#include "expr.h"
82
#include "tree-pass.h"
83
#include "ggc.h"
84
#include "insn-config.h"
85
#include "recog.h"
86
#include "hashtab.h"
87
#include "tree-chrec.h"
88
#include "tree-scalar-evolution.h"
89
#include "cfgloop.h"
90
#include "params.h"
91
#include "langhooks.h"
92
 
93
/* The infinite cost.  */
94
#define INFTY 10000000
95
 
96
/* The expected number of loop iterations.  TODO -- use profiling instead of
97
   this.  */
98
#define AVG_LOOP_NITER(LOOP) 5
99
 
100
 
101
/* Representation of the induction variable.  */
102
struct iv
103
{
104
  tree base;            /* Initial value of the iv.  */
105
  tree base_object;     /* A memory object to that the induction variable points.  */
106
  tree step;            /* Step of the iv (constant only).  */
107
  tree ssa_name;        /* The ssa name with the value.  */
108
  bool biv_p;           /* Is it a biv?  */
109
  bool have_use_for;    /* Do we already have a use for it?  */
110
  unsigned use_id;      /* The identifier in the use if it is the case.  */
111
};
112
 
113
/* Per-ssa version information (induction variable descriptions, etc.).  */
114
struct version_info
115
{
116
  tree name;            /* The ssa name.  */
117
  struct iv *iv;        /* Induction variable description.  */
118
  bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
119
                           an expression that is not an induction variable.  */
120
  unsigned inv_id;      /* Id of an invariant.  */
121
  bool preserve_biv;    /* For the original biv, whether to preserve it.  */
122
};
123
 
124
/* Information attached to loop.  */
125
struct loop_data
126
{
127
  unsigned regs_used;   /* Number of registers used.  */
128
};
129
 
130
/* Types of uses.  */
131
enum use_type
132
{
133
  USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
134
  USE_OUTER,            /* The induction variable is used outside the loop.  */
135
  USE_ADDRESS,          /* Use in an address.  */
136
  USE_COMPARE           /* Use is a compare.  */
137
};
138
 
139
/* The candidate - cost pair.  */
140
struct cost_pair
141
{
142
  struct iv_cand *cand; /* The candidate.  */
143
  unsigned cost;        /* The cost.  */
144
  bitmap depends_on;    /* The list of invariants that have to be
145
                           preserved.  */
146
  tree value;           /* For final value elimination, the expression for
147
                           the final value of the iv.  For iv elimination,
148
                           the new bound to compare with.  */
149
};
150
 
151
/* Use.  */
152
struct iv_use
153
{
154
  unsigned id;          /* The id of the use.  */
155
  enum use_type type;   /* Type of the use.  */
156
  struct iv *iv;        /* The induction variable it is based on.  */
157
  tree stmt;            /* Statement in that it occurs.  */
158
  tree *op_p;           /* The place where it occurs.  */
159
  bitmap related_cands; /* The set of "related" iv candidates, plus the common
160
                           important ones.  */
161
 
162
  unsigned n_map_members; /* Number of candidates in the cost_map list.  */
163
  struct cost_pair *cost_map;
164
                        /* The costs wrto the iv candidates.  */
165
 
166
  struct iv_cand *selected;
167
                        /* The selected candidate.  */
168
};
169
 
170
/* The position where the iv is computed.  */
171
enum iv_position
172
{
173
  IP_NORMAL,            /* At the end, just before the exit condition.  */
174
  IP_END,               /* At the end of the latch block.  */
175
  IP_ORIGINAL           /* The original biv.  */
176
};
177
 
178
/* The induction variable candidate.  */
179
struct iv_cand
180
{
181
  unsigned id;          /* The number of the candidate.  */
182
  bool important;       /* Whether this is an "important" candidate, i.e. such
183
                           that it should be considered by all uses.  */
184
  enum iv_position pos; /* Where it is computed.  */
185
  tree incremented_at;  /* For original biv, the statement where it is
186
                           incremented.  */
187
  tree var_before;      /* The variable used for it before increment.  */
188
  tree var_after;       /* The variable used for it after increment.  */
189
  struct iv *iv;        /* The value of the candidate.  NULL for
190
                           "pseudocandidate" used to indicate the possibility
191
                           to replace the final value of an iv by direct
192
                           computation of the value.  */
193
  unsigned cost;        /* Cost of the candidate.  */
194
  bitmap depends_on;    /* The list of invariants that are used in step of the
195
                           biv.  */
196
};
197
 
198
/* The data used by the induction variable optimizations.  */
199
 
200
typedef struct iv_use *iv_use_p;
201
DEF_VEC_P(iv_use_p);
202
DEF_VEC_ALLOC_P(iv_use_p,heap);
203
 
204
typedef struct iv_cand *iv_cand_p;
205
DEF_VEC_P(iv_cand_p);
206
DEF_VEC_ALLOC_P(iv_cand_p,heap);
207
 
208
struct ivopts_data
209
{
210
  /* The currently optimized loop.  */
211
  struct loop *current_loop;
212
 
213
  /* Numbers of iterations for all exits of the current loop.  */
214
  htab_t niters;
215
 
216
  /* The size of version_info array allocated.  */
217
  unsigned version_info_size;
218
 
219
  /* The array of information for the ssa names.  */
220
  struct version_info *version_info;
221
 
222
  /* The bitmap of indices in version_info whose value was changed.  */
223
  bitmap relevant;
224
 
225
  /* The maximum invariant id.  */
226
  unsigned max_inv_id;
227
 
228
  /* The uses of induction variables.  */
229
  VEC(iv_use_p,heap) *iv_uses;
230
 
231
  /* The candidates.  */
232
  VEC(iv_cand_p,heap) *iv_candidates;
233
 
234
  /* A bitmap of important candidates.  */
235
  bitmap important_candidates;
236
 
237
  /* Whether to consider just related and important candidates when replacing a
238
     use.  */
239
  bool consider_all_candidates;
240
};
241
 
242
/* An assignment of iv candidates to uses.  */
243
 
244
struct iv_ca
245
{
246
  /* The number of uses covered by the assignment.  */
247
  unsigned upto;
248
 
249
  /* Number of uses that cannot be expressed by the candidates in the set.  */
250
  unsigned bad_uses;
251
 
252
  /* Candidate assigned to a use, together with the related costs.  */
253
  struct cost_pair **cand_for_use;
254
 
255
  /* Number of times each candidate is used.  */
256
  unsigned *n_cand_uses;
257
 
258
  /* The candidates used.  */
259
  bitmap cands;
260
 
261
  /* The number of candidates in the set.  */
262
  unsigned n_cands;
263
 
264
  /* Total number of registers needed.  */
265
  unsigned n_regs;
266
 
267
  /* Total cost of expressing uses.  */
268
  unsigned cand_use_cost;
269
 
270
  /* Total cost of candidates.  */
271
  unsigned cand_cost;
272
 
273
  /* Number of times each invariant is used.  */
274
  unsigned *n_invariant_uses;
275
 
276
  /* Total cost of the assignment.  */
277
  unsigned cost;
278
};
279
 
280
/* Difference of two iv candidate assignments.  */
281
 
282
struct iv_ca_delta
283
{
284
  /* Changed use.  */
285
  struct iv_use *use;
286
 
287
  /* An old assignment (for rollback purposes).  */
288
  struct cost_pair *old_cp;
289
 
290
  /* A new assignment.  */
291
  struct cost_pair *new_cp;
292
 
293
  /* Next change in the list.  */
294
  struct iv_ca_delta *next_change;
295
};
296
 
297
/* Bound on number of candidates below that all candidates are considered.  */
298
 
299
#define CONSIDER_ALL_CANDIDATES_BOUND \
300
  ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301
 
302
/* If there are more iv occurrences, we just give up (it is quite unlikely that
303
   optimizing such a loop would help, and it would take ages).  */
304
 
305
#define MAX_CONSIDERED_USES \
306
  ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307
 
308
/* If there are at most this number of ivs in the set, try removing unnecessary
309
   ivs from the set always.  */
310
 
311
#define ALWAYS_PRUNE_CAND_SET_BOUND \
312
  ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313
 
314
/* The list of trees for that the decl_rtl field must be reset is stored
315
   here.  */
316
 
317
static VEC(tree,heap) *decl_rtl_to_reset;
318
 
319
/* Number of uses recorded in DATA.  */
320
 
321
static inline unsigned
322
n_iv_uses (struct ivopts_data *data)
323
{
324
  return VEC_length (iv_use_p, data->iv_uses);
325
}
326
 
327
/* Ith use recorded in DATA.  */
328
 
329
static inline struct iv_use *
330
iv_use (struct ivopts_data *data, unsigned i)
331
{
332
  return VEC_index (iv_use_p, data->iv_uses, i);
333
}
334
 
335
/* Number of candidates recorded in DATA.  */
336
 
337
static inline unsigned
338
n_iv_cands (struct ivopts_data *data)
339
{
340
  return VEC_length (iv_cand_p, data->iv_candidates);
341
}
342
 
343
/* Ith candidate recorded in DATA.  */
344
 
345
static inline struct iv_cand *
346
iv_cand (struct ivopts_data *data, unsigned i)
347
{
348
  return VEC_index (iv_cand_p, data->iv_candidates, i);
349
}
350
 
351
/* The data for LOOP.  */
352
 
353
static inline struct loop_data *
354
loop_data (struct loop *loop)
355
{
356
  return loop->aux;
357
}
358
 
359
/* The single loop exit if it dominates the latch, NULL otherwise.  */
360
 
361
edge
362
single_dom_exit (struct loop *loop)
363
{
364
  edge exit = loop->single_exit;
365
 
366
  if (!exit)
367
    return NULL;
368
 
369
  if (!just_once_each_iteration_p (loop, exit->src))
370
    return NULL;
371
 
372
  return exit;
373
}
374
 
375
/* Dumps information about the induction variable IV to FILE.  */
376
 
377
extern void dump_iv (FILE *, struct iv *);
378
void
379
dump_iv (FILE *file, struct iv *iv)
380
{
381
  if (iv->ssa_name)
382
    {
383
      fprintf (file, "ssa name ");
384
      print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385
      fprintf (file, "\n");
386
    }
387
 
388
  fprintf (file, "  type ");
389
  print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390
  fprintf (file, "\n");
391
 
392
  if (iv->step)
393
    {
394
      fprintf (file, "  base ");
395
      print_generic_expr (file, iv->base, TDF_SLIM);
396
      fprintf (file, "\n");
397
 
398
      fprintf (file, "  step ");
399
      print_generic_expr (file, iv->step, TDF_SLIM);
400
      fprintf (file, "\n");
401
    }
402
  else
403
    {
404
      fprintf (file, "  invariant ");
405
      print_generic_expr (file, iv->base, TDF_SLIM);
406
      fprintf (file, "\n");
407
    }
408
 
409
  if (iv->base_object)
410
    {
411
      fprintf (file, "  base object ");
412
      print_generic_expr (file, iv->base_object, TDF_SLIM);
413
      fprintf (file, "\n");
414
    }
415
 
416
  if (iv->biv_p)
417
    fprintf (file, "  is a biv\n");
418
}
419
 
420
/* Dumps information about the USE to FILE.  */
421
 
422
extern void dump_use (FILE *, struct iv_use *);
423
void
424
dump_use (FILE *file, struct iv_use *use)
425
{
426
  fprintf (file, "use %d\n", use->id);
427
 
428
  switch (use->type)
429
    {
430
    case USE_NONLINEAR_EXPR:
431
      fprintf (file, "  generic\n");
432
      break;
433
 
434
    case USE_OUTER:
435
      fprintf (file, "  outside\n");
436
      break;
437
 
438
    case USE_ADDRESS:
439
      fprintf (file, "  address\n");
440
      break;
441
 
442
    case USE_COMPARE:
443
      fprintf (file, "  compare\n");
444
      break;
445
 
446
    default:
447
      gcc_unreachable ();
448
    }
449
 
450
  fprintf (file, "  in statement ");
451
  print_generic_expr (file, use->stmt, TDF_SLIM);
452
  fprintf (file, "\n");
453
 
454
  fprintf (file, "  at position ");
455
  if (use->op_p)
456
    print_generic_expr (file, *use->op_p, TDF_SLIM);
457
  fprintf (file, "\n");
458
 
459
  dump_iv (file, use->iv);
460
 
461
  if (use->related_cands)
462
    {
463
      fprintf (file, "  related candidates ");
464
      dump_bitmap (file, use->related_cands);
465
    }
466
}
467
 
468
/* Dumps information about the uses to FILE.  */
469
 
470
extern void dump_uses (FILE *, struct ivopts_data *);
471
void
472
dump_uses (FILE *file, struct ivopts_data *data)
473
{
474
  unsigned i;
475
  struct iv_use *use;
476
 
477
  for (i = 0; i < n_iv_uses (data); i++)
478
    {
479
      use = iv_use (data, i);
480
 
481
      dump_use (file, use);
482
      fprintf (file, "\n");
483
    }
484
}
485
 
486
/* Dumps information about induction variable candidate CAND to FILE.  */
487
 
488
extern void dump_cand (FILE *, struct iv_cand *);
489
void
490
dump_cand (FILE *file, struct iv_cand *cand)
491
{
492
  struct iv *iv = cand->iv;
493
 
494
  fprintf (file, "candidate %d%s\n",
495
           cand->id, cand->important ? " (important)" : "");
496
 
497
  if (cand->depends_on)
498
    {
499
      fprintf (file, "  depends on ");
500
      dump_bitmap (file, cand->depends_on);
501
    }
502
 
503
  if (!iv)
504
    {
505
      fprintf (file, "  final value replacement\n");
506
      return;
507
    }
508
 
509
  switch (cand->pos)
510
    {
511
    case IP_NORMAL:
512
      fprintf (file, "  incremented before exit test\n");
513
      break;
514
 
515
    case IP_END:
516
      fprintf (file, "  incremented at end\n");
517
      break;
518
 
519
    case IP_ORIGINAL:
520
      fprintf (file, "  original biv\n");
521
      break;
522
    }
523
 
524
  dump_iv (file, iv);
525
}
526
 
527
/* Returns the info for ssa version VER.  */
528
 
529
static inline struct version_info *
530
ver_info (struct ivopts_data *data, unsigned ver)
531
{
532
  return data->version_info + ver;
533
}
534
 
535
/* Returns the info for ssa name NAME.  */
536
 
537
static inline struct version_info *
538
name_info (struct ivopts_data *data, tree name)
539
{
540
  return ver_info (data, SSA_NAME_VERSION (name));
541
}
542
 
543
/* Checks whether there exists number X such that X * B = A, counting modulo
544
   2^BITS.  */
545
 
546
static bool
547
divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548
        HOST_WIDE_INT *x)
549
{
550
  unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551
  unsigned HOST_WIDE_INT inv, ex, val;
552
  unsigned i;
553
 
554
  a &= mask;
555
  b &= mask;
556
 
557
  /* First divide the whole equation by 2 as long as possible.  */
558
  while (!(a & 1) && !(b & 1))
559
    {
560
      a >>= 1;
561
      b >>= 1;
562
      bits--;
563
      mask >>= 1;
564
    }
565
 
566
  if (!(b & 1))
567
    {
568
      /* If b is still even, a is odd and there is no such x.  */
569
      return false;
570
    }
571
 
572
  /* Find the inverse of b.  We compute it as
573
     b^(2^(bits - 1) - 1) (mod 2^bits).  */
574
  inv = 1;
575
  ex = b;
576
  for (i = 0; i < bits - 1; i++)
577
    {
578
      inv = (inv * ex) & mask;
579
      ex = (ex * ex) & mask;
580
    }
581
 
582
  val = (a * inv) & mask;
583
 
584
  gcc_assert (((val * b) & mask) == a);
585
 
586
  if ((val >> (bits - 1)) & 1)
587
    val |= ~mask;
588
 
589
  *x = val;
590
 
591
  return true;
592
}
593
 
594
/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595
   emitted in LOOP.  */
596
 
597
static bool
598
stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
599
{
600
  basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
601
 
602
  gcc_assert (bb);
603
 
604
  if (sbb == loop->latch)
605
    return true;
606
 
607
  if (sbb != bb)
608
    return false;
609
 
610
  return stmt == last_stmt (bb);
611
}
612
 
613
/* Returns true if STMT if after the place where the original induction
614
   variable CAND is incremented.  */
615
 
616
static bool
617
stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
618
{
619
  basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620
  basic_block stmt_bb = bb_for_stmt (stmt);
621
  block_stmt_iterator bsi;
622
 
623
  if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624
    return false;
625
 
626
  if (stmt_bb != cand_bb)
627
    return true;
628
 
629
  /* Scan the block from the end, since the original ivs are usually
630
     incremented at the end of the loop body.  */
631
  for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
632
    {
633
      if (bsi_stmt (bsi) == cand->incremented_at)
634
        return false;
635
      if (bsi_stmt (bsi) == stmt)
636
        return true;
637
    }
638
}
639
 
640
/* Returns true if STMT if after the place where the induction variable
641
   CAND is incremented in LOOP.  */
642
 
643
static bool
644
stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
645
{
646
  switch (cand->pos)
647
    {
648
    case IP_END:
649
      return false;
650
 
651
    case IP_NORMAL:
652
      return stmt_after_ip_normal_pos (loop, stmt);
653
 
654
    case IP_ORIGINAL:
655
      return stmt_after_ip_original_pos (cand, stmt);
656
 
657
    default:
658
      gcc_unreachable ();
659
    }
660
}
661
 
662
/* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
663
 
664
static bool
665
abnormal_ssa_name_p (tree exp)
666
{
667
  if (!exp)
668
    return false;
669
 
670
  if (TREE_CODE (exp) != SSA_NAME)
671
    return false;
672
 
673
  return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
674
}
675
 
676
/* Returns false if BASE or INDEX contains a ssa name that occurs in an
677
   abnormal phi node.  Callback for for_each_index.  */
678
 
679
static bool
680
idx_contains_abnormal_ssa_name_p (tree base, tree *index,
681
                                  void *data ATTRIBUTE_UNUSED)
682
{
683
  if (TREE_CODE (base) == ARRAY_REF)
684
    {
685
      if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
686
        return false;
687
      if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
688
        return false;
689
    }
690
 
691
  return !abnormal_ssa_name_p (*index);
692
}
693
 
694
/* Returns true if EXPR contains a ssa name that occurs in an
695
   abnormal phi node.  */
696
 
697
bool
698
contains_abnormal_ssa_name_p (tree expr)
699
{
700
  enum tree_code code;
701
  enum tree_code_class class;
702
 
703
  if (!expr)
704
    return false;
705
 
706
  code = TREE_CODE (expr);
707
  class = TREE_CODE_CLASS (code);
708
 
709
  if (code == SSA_NAME)
710
    return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
711
 
712
  if (code == INTEGER_CST
713
      || is_gimple_min_invariant (expr))
714
    return false;
715
 
716
  if (code == ADDR_EXPR)
717
    return !for_each_index (&TREE_OPERAND (expr, 0),
718
                            idx_contains_abnormal_ssa_name_p,
719
                            NULL);
720
 
721
  switch (class)
722
    {
723
    case tcc_binary:
724
    case tcc_comparison:
725
      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
726
        return true;
727
 
728
      /* Fallthru.  */
729
    case tcc_unary:
730
      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
731
        return true;
732
 
733
      break;
734
 
735
    default:
736
      gcc_unreachable ();
737
    }
738
 
739
  return false;
740
}
741
 
742
/* Element of the table in that we cache the numbers of iterations obtained
743
   from exits of the loop.  */
744
 
745
struct nfe_cache_elt
746
{
747
  /* The edge for that the number of iterations is cached.  */
748
  edge exit;
749
 
750
  /* Number of iterations corresponding to this exit, or NULL if it cannot be
751
     determined.  */
752
  tree niter;
753
};
754
 
755
/* Hash function for nfe_cache_elt E.  */
756
 
757
static hashval_t
758
nfe_hash (const void *e)
759
{
760
  const struct nfe_cache_elt *elt = e;
761
 
762
  return htab_hash_pointer (elt->exit);
763
}
764
 
765
/* Equality function for nfe_cache_elt E1 and edge E2.  */
766
 
767
static int
768
nfe_eq (const void *e1, const void *e2)
769
{
770
  const struct nfe_cache_elt *elt1 = e1;
771
 
772
  return elt1->exit == e2;
773
}
774
 
775
/*  Returns tree describing number of iterations determined from
776
    EXIT of DATA->current_loop, or NULL if something goes wrong.  */
777
 
778
static tree
779
niter_for_exit (struct ivopts_data *data, edge exit)
780
{
781
  struct nfe_cache_elt *nfe_desc;
782
  struct tree_niter_desc desc;
783
  PTR *slot;
784
 
785
  slot = htab_find_slot_with_hash (data->niters, exit,
786
                                   htab_hash_pointer (exit),
787
                                   INSERT);
788
 
789
  if (!*slot)
790
    {
791
      nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
792
      nfe_desc->exit = exit;
793
 
794
      /* Try to determine number of iterations.  We must know it
795
         unconditionally (i.e., without possibility of # of iterations
796
         being zero).  Also, we cannot safely work with ssa names that
797
         appear in phi nodes on abnormal edges, so that we do not create
798
         overlapping life ranges for them (PR 27283).  */
799
      if (number_of_iterations_exit (data->current_loop,
800
                                     exit, &desc, true)
801
          && zero_p (desc.may_be_zero)
802
          && !contains_abnormal_ssa_name_p (desc.niter))
803
        nfe_desc->niter = desc.niter;
804
      else
805
        nfe_desc->niter = NULL_TREE;
806
    }
807
  else
808
    nfe_desc = *slot;
809
 
810
  return nfe_desc->niter;
811
}
812
 
813
/* Returns tree describing number of iterations determined from
814
   single dominating exit of DATA->current_loop, or NULL if something
815
   goes wrong.  */
816
 
817
static tree
818
niter_for_single_dom_exit (struct ivopts_data *data)
819
{
820
  edge exit = single_dom_exit (data->current_loop);
821
 
822
  if (!exit)
823
    return NULL;
824
 
825
  return niter_for_exit (data, exit);
826
}
827
 
828
/* Initializes data structures used by the iv optimization pass, stored
829
   in DATA.  LOOPS is the loop tree.  */
830
 
831
static void
832
tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
833
{
834
  unsigned i;
835
 
836
  data->version_info_size = 2 * num_ssa_names;
837
  data->version_info = xcalloc (data->version_info_size,
838
                                sizeof (struct version_info));
839
  data->relevant = BITMAP_ALLOC (NULL);
840
  data->important_candidates = BITMAP_ALLOC (NULL);
841
  data->max_inv_id = 0;
842
  data->niters = htab_create (10, nfe_hash, nfe_eq, free);
843
 
844
  for (i = 1; i < loops->num; i++)
845
    if (loops->parray[i])
846
      loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
847
 
848
  data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
849
  data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
850
  decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
851
}
852
 
853
/* Returns a memory object to that EXPR points.  In case we are able to
854
   determine that it does not point to any such object, NULL is returned.  */
855
 
856
static tree
857
determine_base_object (tree expr)
858
{
859
  enum tree_code code = TREE_CODE (expr);
860
  tree base, obj, op0, op1;
861
 
862
  if (!POINTER_TYPE_P (TREE_TYPE (expr)))
863
    return NULL_TREE;
864
 
865
  switch (code)
866
    {
867
    case INTEGER_CST:
868
      return NULL_TREE;
869
 
870
    case ADDR_EXPR:
871
      obj = TREE_OPERAND (expr, 0);
872
      base = get_base_address (obj);
873
 
874
      if (!base)
875
        return expr;
876
 
877
      if (TREE_CODE (base) == INDIRECT_REF)
878
        return determine_base_object (TREE_OPERAND (base, 0));
879
 
880
      return fold_convert (ptr_type_node,
881
                           build_fold_addr_expr (base));
882
 
883
    case PLUS_EXPR:
884
    case MINUS_EXPR:
885
      op0 = determine_base_object (TREE_OPERAND (expr, 0));
886
      op1 = determine_base_object (TREE_OPERAND (expr, 1));
887
 
888
      if (!op1)
889
        return op0;
890
 
891
      if (!op0)
892
        return (code == PLUS_EXPR
893
                ? op1
894
                : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
895
 
896
      return fold_build2 (code, ptr_type_node, op0, op1);
897
 
898
    case NOP_EXPR:
899
    case CONVERT_EXPR:
900
      return determine_base_object (TREE_OPERAND (expr, 0));
901
 
902
    default:
903
      return fold_convert (ptr_type_node, expr);
904
    }
905
}
906
 
907
/* Allocates an induction variable with given initial value BASE and step STEP
908
   for loop LOOP.  */
909
 
910
static struct iv *
911
alloc_iv (tree base, tree step)
912
{
913
  struct iv *iv = xcalloc (1, sizeof (struct iv));
914
 
915
  if (step && integer_zerop (step))
916
    step = NULL_TREE;
917
 
918
  iv->base = base;
919
  iv->base_object = determine_base_object (base);
920
  iv->step = step;
921
  iv->biv_p = false;
922
  iv->have_use_for = false;
923
  iv->use_id = 0;
924
  iv->ssa_name = NULL_TREE;
925
 
926
  return iv;
927
}
928
 
929
/* Sets STEP and BASE for induction variable IV.  */
930
 
931
static void
932
set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
933
{
934
  struct version_info *info = name_info (data, iv);
935
 
936
  gcc_assert (!info->iv);
937
 
938
  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
939
  info->iv = alloc_iv (base, step);
940
  info->iv->ssa_name = iv;
941
}
942
 
943
/* Finds induction variable declaration for VAR.  */
944
 
945
static struct iv *
946
get_iv (struct ivopts_data *data, tree var)
947
{
948
  basic_block bb;
949
 
950
  if (!name_info (data, var)->iv)
951
    {
952
      bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
953
 
954
      if (!bb
955
          || !flow_bb_inside_loop_p (data->current_loop, bb))
956
        set_iv (data, var, var, NULL_TREE);
957
    }
958
 
959
  return name_info (data, var)->iv;
960
}
961
 
962
/* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
963
   not define a simple affine biv with nonzero step.  */
964
 
965
static tree
966
determine_biv_step (tree phi)
967
{
968
  struct loop *loop = bb_for_stmt (phi)->loop_father;
969
  tree name = PHI_RESULT (phi);
970
  affine_iv iv;
971
 
972
  if (!is_gimple_reg (name))
973
    return NULL_TREE;
974
 
975
  if (!simple_iv (loop, phi, name, &iv, true))
976
    return NULL_TREE;
977
 
978
  return (zero_p (iv.step) ? NULL_TREE : iv.step);
979
}
980
 
981
/* Finds basic ivs.  */
982
 
983
static bool
984
find_bivs (struct ivopts_data *data)
985
{
986
  tree phi, step, type, base;
987
  bool found = false;
988
  struct loop *loop = data->current_loop;
989
 
990
  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
991
    {
992
      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
993
        continue;
994
 
995
      step = determine_biv_step (phi);
996
      if (!step)
997
        continue;
998
 
999
      base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1000
      base = expand_simple_operations (base);
1001
      if (contains_abnormal_ssa_name_p (base)
1002
          || contains_abnormal_ssa_name_p (step))
1003
        continue;
1004
 
1005
      type = TREE_TYPE (PHI_RESULT (phi));
1006
      base = fold_convert (type, base);
1007
      if (step)
1008
        step = fold_convert (type, step);
1009
 
1010
      set_iv (data, PHI_RESULT (phi), base, step);
1011
      found = true;
1012
    }
1013
 
1014
  return found;
1015
}
1016
 
1017
/* Marks basic ivs.  */
1018
 
1019
static void
1020
mark_bivs (struct ivopts_data *data)
1021
{
1022
  tree phi, var;
1023
  struct iv *iv, *incr_iv;
1024
  struct loop *loop = data->current_loop;
1025
  basic_block incr_bb;
1026
 
1027
  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1028
    {
1029
      iv = get_iv (data, PHI_RESULT (phi));
1030
      if (!iv)
1031
        continue;
1032
 
1033
      var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1034
      incr_iv = get_iv (data, var);
1035
      if (!incr_iv)
1036
        continue;
1037
 
1038
      /* If the increment is in the subloop, ignore it.  */
1039
      incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1040
      if (incr_bb->loop_father != data->current_loop
1041
          || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1042
        continue;
1043
 
1044
      iv->biv_p = true;
1045
      incr_iv->biv_p = true;
1046
    }
1047
}
1048
 
1049
/* Checks whether STMT defines a linear induction variable and stores its
1050
   parameters to IV.  */
1051
 
1052
static bool
1053
find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1054
{
1055
  tree lhs;
1056
  struct loop *loop = data->current_loop;
1057
 
1058
  iv->base = NULL_TREE;
1059
  iv->step = NULL_TREE;
1060
 
1061
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1062
    return false;
1063
 
1064
  lhs = TREE_OPERAND (stmt, 0);
1065
  if (TREE_CODE (lhs) != SSA_NAME)
1066
    return false;
1067
 
1068
  if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1069
    return false;
1070
  iv->base = expand_simple_operations (iv->base);
1071
 
1072
  if (contains_abnormal_ssa_name_p (iv->base)
1073
      || contains_abnormal_ssa_name_p (iv->step))
1074
    return false;
1075
 
1076
  return true;
1077
}
1078
 
1079
/* Finds general ivs in statement STMT.  */
1080
 
1081
static void
1082
find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1083
{
1084
  affine_iv iv;
1085
 
1086
  if (!find_givs_in_stmt_scev (data, stmt, &iv))
1087
    return;
1088
 
1089
  set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1090
}
1091
 
1092
/* Finds general ivs in basic block BB.  */
1093
 
1094
static void
1095
find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1096
{
1097
  block_stmt_iterator bsi;
1098
 
1099
  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1100
    find_givs_in_stmt (data, bsi_stmt (bsi));
1101
}
1102
 
1103
/* Finds general ivs.  */
1104
 
1105
static void
1106
find_givs (struct ivopts_data *data)
1107
{
1108
  struct loop *loop = data->current_loop;
1109
  basic_block *body = get_loop_body_in_dom_order (loop);
1110
  unsigned i;
1111
 
1112
  for (i = 0; i < loop->num_nodes; i++)
1113
    find_givs_in_bb (data, body[i]);
1114
  free (body);
1115
}
1116
 
1117
/* For each ssa name defined in LOOP determines whether it is an induction
1118
   variable and if so, its initial value and step.  */
1119
 
1120
static bool
1121
find_induction_variables (struct ivopts_data *data)
1122
{
1123
  unsigned i;
1124
  bitmap_iterator bi;
1125
 
1126
  if (!find_bivs (data))
1127
    return false;
1128
 
1129
  find_givs (data);
1130
  mark_bivs (data);
1131
 
1132
  if (dump_file && (dump_flags & TDF_DETAILS))
1133
    {
1134
      tree niter = niter_for_single_dom_exit (data);
1135
 
1136
      if (niter)
1137
        {
1138
          fprintf (dump_file, "  number of iterations ");
1139
          print_generic_expr (dump_file, niter, TDF_SLIM);
1140
          fprintf (dump_file, "\n\n");
1141
        };
1142
 
1143
      fprintf (dump_file, "Induction variables:\n\n");
1144
 
1145
      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1146
        {
1147
          if (ver_info (data, i)->iv)
1148
            dump_iv (dump_file, ver_info (data, i)->iv);
1149
        }
1150
    }
1151
 
1152
  return true;
1153
}
1154
 
1155
/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1156
 
1157
static struct iv_use *
1158
record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1159
            tree stmt, enum use_type use_type)
1160
{
1161
  struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1162
 
1163
  use->id = n_iv_uses (data);
1164
  use->type = use_type;
1165
  use->iv = iv;
1166
  use->stmt = stmt;
1167
  use->op_p = use_p;
1168
  use->related_cands = BITMAP_ALLOC (NULL);
1169
 
1170
  /* To avoid showing ssa name in the dumps, if it was not reset by the
1171
     caller.  */
1172
  iv->ssa_name = NULL_TREE;
1173
 
1174
  if (dump_file && (dump_flags & TDF_DETAILS))
1175
    dump_use (dump_file, use);
1176
 
1177
  VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1178
 
1179
  return use;
1180
}
1181
 
1182
/* Checks whether OP is a loop-level invariant and if so, records it.
1183
   NONLINEAR_USE is true if the invariant is used in a way we do not
1184
   handle specially.  */
1185
 
1186
static void
1187
record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1188
{
1189
  basic_block bb;
1190
  struct version_info *info;
1191
 
1192
  if (TREE_CODE (op) != SSA_NAME
1193
      || !is_gimple_reg (op))
1194
    return;
1195
 
1196
  bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1197
  if (bb
1198
      && flow_bb_inside_loop_p (data->current_loop, bb))
1199
    return;
1200
 
1201
  info = name_info (data, op);
1202
  info->name = op;
1203
  info->has_nonlin_use |= nonlinear_use;
1204
  if (!info->inv_id)
1205
    info->inv_id = ++data->max_inv_id;
1206
  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1207
}
1208
 
1209
/* Checks whether the use OP is interesting and if so, records it
1210
   as TYPE.  */
1211
 
1212
static struct iv_use *
1213
find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1214
                                       enum use_type type)
1215
{
1216
  struct iv *iv;
1217
  struct iv *civ;
1218
  tree stmt;
1219
  struct iv_use *use;
1220
 
1221
  if (TREE_CODE (op) != SSA_NAME)
1222
    return NULL;
1223
 
1224
  iv = get_iv (data, op);
1225
  if (!iv)
1226
    return NULL;
1227
 
1228
  if (iv->have_use_for)
1229
    {
1230
      use = iv_use (data, iv->use_id);
1231
 
1232
      gcc_assert (use->type == USE_NONLINEAR_EXPR
1233
                  || use->type == USE_OUTER);
1234
 
1235
      if (type == USE_NONLINEAR_EXPR)
1236
        use->type = USE_NONLINEAR_EXPR;
1237
      return use;
1238
    }
1239
 
1240
  if (zero_p (iv->step))
1241
    {
1242
      record_invariant (data, op, true);
1243
      return NULL;
1244
    }
1245
  iv->have_use_for = true;
1246
 
1247
  civ = xmalloc (sizeof (struct iv));
1248
  *civ = *iv;
1249
 
1250
  stmt = SSA_NAME_DEF_STMT (op);
1251
  gcc_assert (TREE_CODE (stmt) == PHI_NODE
1252
              || TREE_CODE (stmt) == MODIFY_EXPR);
1253
 
1254
  use = record_use (data, NULL, civ, stmt, type);
1255
  iv->use_id = use->id;
1256
 
1257
  return use;
1258
}
1259
 
1260
/* Checks whether the use OP is interesting and if so, records it.  */
1261
 
1262
static struct iv_use *
1263
find_interesting_uses_op (struct ivopts_data *data, tree op)
1264
{
1265
  return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1266
}
1267
 
1268
/* Records a definition of induction variable OP that is used outside of the
1269
   loop.  */
1270
 
1271
static struct iv_use *
1272
find_interesting_uses_outer (struct ivopts_data *data, tree op)
1273
{
1274
  return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1275
}
1276
 
1277
/* Checks whether the condition *COND_P in STMT is interesting
1278
   and if so, records it.  */
1279
 
1280
static void
1281
find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1282
{
1283
  tree *op0_p;
1284
  tree *op1_p;
1285
  struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1286
  struct iv const_iv;
1287
  tree zero = integer_zero_node;
1288
 
1289
  const_iv.step = NULL_TREE;
1290
 
1291
  if (TREE_CODE (*cond_p) != SSA_NAME
1292
      && !COMPARISON_CLASS_P (*cond_p))
1293
    return;
1294
 
1295
  if (TREE_CODE (*cond_p) == SSA_NAME)
1296
    {
1297
      op0_p = cond_p;
1298
      op1_p = &zero;
1299
    }
1300
  else
1301
    {
1302
      op0_p = &TREE_OPERAND (*cond_p, 0);
1303
      op1_p = &TREE_OPERAND (*cond_p, 1);
1304
    }
1305
 
1306
  if (TREE_CODE (*op0_p) == SSA_NAME)
1307
    iv0 = get_iv (data, *op0_p);
1308
  else
1309
    iv0 = &const_iv;
1310
 
1311
  if (TREE_CODE (*op1_p) == SSA_NAME)
1312
    iv1 = get_iv (data, *op1_p);
1313
  else
1314
    iv1 = &const_iv;
1315
 
1316
  if (/* When comparing with non-invariant value, we may not do any senseful
1317
         induction variable elimination.  */
1318
      (!iv0 || !iv1)
1319
      /* Eliminating condition based on two ivs would be nontrivial.
1320
         ??? TODO -- it is not really important to handle this case.  */
1321
      || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1322
    {
1323
      find_interesting_uses_op (data, *op0_p);
1324
      find_interesting_uses_op (data, *op1_p);
1325
      return;
1326
    }
1327
 
1328
  if (zero_p (iv0->step) && zero_p (iv1->step))
1329
    {
1330
      /* If both are invariants, this is a work for unswitching.  */
1331
      return;
1332
    }
1333
 
1334
  civ = xmalloc (sizeof (struct iv));
1335
  *civ = zero_p (iv0->step) ? *iv1: *iv0;
1336
  record_use (data, cond_p, civ, stmt, USE_COMPARE);
1337
}
1338
 
1339
/* Returns true if expression EXPR is obviously invariant in LOOP,
1340
   i.e. if all its operands are defined outside of the LOOP.  */
1341
 
1342
bool
1343
expr_invariant_in_loop_p (struct loop *loop, tree expr)
1344
{
1345
  basic_block def_bb;
1346
  unsigned i, len;
1347
 
1348
  if (is_gimple_min_invariant (expr))
1349
    return true;
1350
 
1351
  if (TREE_CODE (expr) == SSA_NAME)
1352
    {
1353
      def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1354
      if (def_bb
1355
          && flow_bb_inside_loop_p (loop, def_bb))
1356
        return false;
1357
 
1358
      return true;
1359
    }
1360
 
1361
  if (!EXPR_P (expr))
1362
    return false;
1363
 
1364
  len = TREE_CODE_LENGTH (TREE_CODE (expr));
1365
  for (i = 0; i < len; i++)
1366
    if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1367
      return false;
1368
 
1369
  return true;
1370
}
1371
 
1372
/* Cumulates the steps of indices into DATA and replaces their values with the
1373
   initial ones.  Returns false when the value of the index cannot be determined.
1374
   Callback for for_each_index.  */
1375
 
1376
struct ifs_ivopts_data
1377
{
1378
  struct ivopts_data *ivopts_data;
1379
  tree stmt;
1380
  tree *step_p;
1381
};
1382
 
1383
static bool
1384
idx_find_step (tree base, tree *idx, void *data)
1385
{
1386
  struct ifs_ivopts_data *dta = data;
1387
  struct iv *iv;
1388
  tree step, iv_step, lbound, off;
1389
  struct loop *loop = dta->ivopts_data->current_loop;
1390
 
1391
  if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1392
      || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1393
    return false;
1394
 
1395
  /* If base is a component ref, require that the offset of the reference
1396
     be invariant.  */
1397
  if (TREE_CODE (base) == COMPONENT_REF)
1398
    {
1399
      off = component_ref_field_offset (base);
1400
      return expr_invariant_in_loop_p (loop, off);
1401
    }
1402
 
1403
  /* If base is array, first check whether we will be able to move the
1404
     reference out of the loop (in order to take its address in strength
1405
     reduction).  In order for this to work we need both lower bound
1406
     and step to be loop invariants.  */
1407
  if (TREE_CODE (base) == ARRAY_REF)
1408
    {
1409
      step = array_ref_element_size (base);
1410
      lbound = array_ref_low_bound (base);
1411
 
1412
      if (!expr_invariant_in_loop_p (loop, step)
1413
          || !expr_invariant_in_loop_p (loop, lbound))
1414
        return false;
1415
    }
1416
 
1417
  if (TREE_CODE (*idx) != SSA_NAME)
1418
    return true;
1419
 
1420
  iv = get_iv (dta->ivopts_data, *idx);
1421
  if (!iv)
1422
    return false;
1423
 
1424
  *idx = iv->base;
1425
 
1426
  if (!iv->step)
1427
    return true;
1428
 
1429
  if (TREE_CODE (base) == ARRAY_REF)
1430
    {
1431
      step = array_ref_element_size (base);
1432
 
1433
      /* We only handle addresses whose step is an integer constant.  */
1434
      if (TREE_CODE (step) != INTEGER_CST)
1435
        return false;
1436
    }
1437
  else
1438
    /* The step for pointer arithmetics already is 1 byte.  */
1439
    step = build_int_cst (sizetype, 1);
1440
 
1441
  /* FIXME: convert_step should not be used outside chrec_convert: fix
1442
     this by calling chrec_convert.  */
1443
  iv_step = convert_step (dta->ivopts_data->current_loop,
1444
                          sizetype, iv->base, iv->step, dta->stmt);
1445
 
1446
  if (!iv_step)
1447
    {
1448
      /* The index might wrap.  */
1449
      return false;
1450
    }
1451
 
1452
  step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1453
 
1454
  if (!*dta->step_p)
1455
    *dta->step_p = step;
1456
  else
1457
    *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1458
 
1459
  return true;
1460
}
1461
 
1462
/* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1463
   object is passed to it in DATA.  */
1464
 
1465
static bool
1466
idx_record_use (tree base, tree *idx,
1467
                void *data)
1468
{
1469
  find_interesting_uses_op (data, *idx);
1470
  if (TREE_CODE (base) == ARRAY_REF)
1471
    {
1472
      find_interesting_uses_op (data, array_ref_element_size (base));
1473
      find_interesting_uses_op (data, array_ref_low_bound (base));
1474
    }
1475
  return true;
1476
}
1477
 
1478
/* Returns true if memory reference REF may be unaligned.  */
1479
 
1480
static bool
1481
may_be_unaligned_p (tree ref)
1482
{
1483
  tree base;
1484
  tree base_type;
1485
  HOST_WIDE_INT bitsize;
1486
  HOST_WIDE_INT bitpos;
1487
  tree toffset;
1488
  enum machine_mode mode;
1489
  int unsignedp, volatilep;
1490
  unsigned base_align;
1491
 
1492
  /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1493
     thus they are not misaligned.  */
1494
  if (TREE_CODE (ref) == TARGET_MEM_REF)
1495
    return false;
1496
 
1497
  /* The test below is basically copy of what expr.c:normal_inner_ref
1498
     does to check whether the object must be loaded by parts when
1499
     STRICT_ALIGNMENT is true.  */
1500
  base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1501
                              &unsignedp, &volatilep, true);
1502
  base_type = TREE_TYPE (base);
1503
  base_align = TYPE_ALIGN (base_type);
1504
 
1505
  if (mode != BLKmode
1506
      && (base_align < GET_MODE_ALIGNMENT (mode)
1507
          || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1508
          || bitpos % BITS_PER_UNIT != 0))
1509
    return true;
1510
 
1511
  return false;
1512
}
1513
 
1514
/* Finds addresses in *OP_P inside STMT.  */
1515
 
1516
static void
1517
find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1518
{
1519
  tree base = *op_p, step = NULL;
1520
  struct iv *civ;
1521
  struct ifs_ivopts_data ifs_ivopts_data;
1522
 
1523
  /* Do not play with volatile memory references.  A bit too conservative,
1524
     perhaps, but safe.  */
1525
  if (stmt_ann (stmt)->has_volatile_ops)
1526
    goto fail;
1527
 
1528
  /* Ignore bitfields for now.  Not really something terribly complicated
1529
     to handle.  TODO.  */
1530
  if (TREE_CODE (base) == BIT_FIELD_REF
1531
      || (TREE_CODE (base) == COMPONENT_REF
1532
          && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1))))
1533
    goto fail;
1534
 
1535
  if (STRICT_ALIGNMENT
1536
      && may_be_unaligned_p (base))
1537
    goto fail;
1538
 
1539
  base = unshare_expr (base);
1540
 
1541
  if (TREE_CODE (base) == TARGET_MEM_REF)
1542
    {
1543
      tree type = build_pointer_type (TREE_TYPE (base));
1544
      tree astep;
1545
 
1546
      if (TMR_BASE (base)
1547
          && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1548
        {
1549
          civ = get_iv (data, TMR_BASE (base));
1550
          if (!civ)
1551
            goto fail;
1552
 
1553
          TMR_BASE (base) = civ->base;
1554
          step = civ->step;
1555
        }
1556
      if (TMR_INDEX (base)
1557
          && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1558
        {
1559
          civ = get_iv (data, TMR_INDEX (base));
1560
          if (!civ)
1561
            goto fail;
1562
 
1563
          TMR_INDEX (base) = civ->base;
1564
          astep = civ->step;
1565
 
1566
          if (astep)
1567
            {
1568
              if (TMR_STEP (base))
1569
                astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1570
 
1571
              if (step)
1572
                step = fold_build2 (PLUS_EXPR, type, step, astep);
1573
              else
1574
                step = astep;
1575
            }
1576
        }
1577
 
1578
      if (zero_p (step))
1579
        goto fail;
1580
      base = tree_mem_ref_addr (type, base);
1581
    }
1582
  else
1583
    {
1584
      ifs_ivopts_data.ivopts_data = data;
1585
      ifs_ivopts_data.stmt = stmt;
1586
      ifs_ivopts_data.step_p = &step;
1587
      if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1588
          || zero_p (step))
1589
        goto fail;
1590
 
1591
      gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1592
      gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1593
 
1594
      base = build_fold_addr_expr (base);
1595
    }
1596
 
1597
  civ = alloc_iv (base, step);
1598
  record_use (data, op_p, civ, stmt, USE_ADDRESS);
1599
  return;
1600
 
1601
fail:
1602
  for_each_index (op_p, idx_record_use, data);
1603
}
1604
 
1605
/* Finds and records invariants used in STMT.  */
1606
 
1607
static void
1608
find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609
{
1610
  ssa_op_iter iter;
1611
  use_operand_p use_p;
1612
  tree op;
1613
 
1614
  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615
    {
1616
      op = USE_FROM_PTR (use_p);
1617
      record_invariant (data, op, false);
1618
    }
1619
}
1620
 
1621
/* Finds interesting uses of induction variables in the statement STMT.  */
1622
 
1623
static void
1624
find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625
{
1626
  struct iv *iv;
1627
  tree op, lhs, rhs;
1628
  ssa_op_iter iter;
1629
  use_operand_p use_p;
1630
 
1631
  find_invariants_stmt (data, stmt);
1632
 
1633
  if (TREE_CODE (stmt) == COND_EXPR)
1634
    {
1635
      find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1636
      return;
1637
    }
1638
 
1639
  if (TREE_CODE (stmt) == MODIFY_EXPR)
1640
    {
1641
      lhs = TREE_OPERAND (stmt, 0);
1642
      rhs = TREE_OPERAND (stmt, 1);
1643
 
1644
      if (TREE_CODE (lhs) == SSA_NAME)
1645
        {
1646
          /* If the statement defines an induction variable, the uses are not
1647
             interesting by themselves.  */
1648
 
1649
          iv = get_iv (data, lhs);
1650
 
1651
          if (iv && !zero_p (iv->step))
1652
            return;
1653
        }
1654
 
1655
      switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656
        {
1657
        case tcc_comparison:
1658
          find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1659
          return;
1660
 
1661
        case tcc_reference:
1662
          find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1663
          if (REFERENCE_CLASS_P (lhs))
1664
            find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1665
          return;
1666
 
1667
        default: ;
1668
        }
1669
 
1670
      if (REFERENCE_CLASS_P (lhs)
1671
          && is_gimple_val (rhs))
1672
        {
1673
          find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1674
          find_interesting_uses_op (data, rhs);
1675
          return;
1676
        }
1677
 
1678
      /* TODO -- we should also handle address uses of type
1679
 
1680
         memory = call (whatever);
1681
 
1682
         and
1683
 
1684
         call (memory).  */
1685
    }
1686
 
1687
  if (TREE_CODE (stmt) == PHI_NODE
1688
      && bb_for_stmt (stmt) == data->current_loop->header)
1689
    {
1690
      lhs = PHI_RESULT (stmt);
1691
      iv = get_iv (data, lhs);
1692
 
1693
      if (iv && !zero_p (iv->step))
1694
        return;
1695
    }
1696
 
1697
  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1698
    {
1699
      op = USE_FROM_PTR (use_p);
1700
 
1701
      if (TREE_CODE (op) != SSA_NAME)
1702
        continue;
1703
 
1704
      iv = get_iv (data, op);
1705
      if (!iv)
1706
        continue;
1707
 
1708
      find_interesting_uses_op (data, op);
1709
    }
1710
}
1711
 
1712
/* Finds interesting uses of induction variables outside of loops
1713
   on loop exit edge EXIT.  */
1714
 
1715
static void
1716
find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1717
{
1718
  tree phi, def;
1719
 
1720
  for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1721
    {
1722
      def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1723
      find_interesting_uses_outer (data, def);
1724
    }
1725
}
1726
 
1727
/* Finds uses of the induction variables that are interesting.  */
1728
 
1729
static void
1730
find_interesting_uses (struct ivopts_data *data)
1731
{
1732
  basic_block bb;
1733
  block_stmt_iterator bsi;
1734
  tree phi;
1735
  basic_block *body = get_loop_body (data->current_loop);
1736
  unsigned i;
1737
  struct version_info *info;
1738
  edge e;
1739
 
1740
  if (dump_file && (dump_flags & TDF_DETAILS))
1741
    fprintf (dump_file, "Uses:\n\n");
1742
 
1743
  for (i = 0; i < data->current_loop->num_nodes; i++)
1744
    {
1745
      edge_iterator ei;
1746
      bb = body[i];
1747
 
1748
      FOR_EACH_EDGE (e, ei, bb->succs)
1749
        if (e->dest != EXIT_BLOCK_PTR
1750
            && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1751
          find_interesting_uses_outside (data, e);
1752
 
1753
      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1754
        find_interesting_uses_stmt (data, phi);
1755
      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1756
        find_interesting_uses_stmt (data, bsi_stmt (bsi));
1757
    }
1758
 
1759
  if (dump_file && (dump_flags & TDF_DETAILS))
1760
    {
1761
      bitmap_iterator bi;
1762
 
1763
      fprintf (dump_file, "\n");
1764
 
1765
      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766
        {
1767
          info = ver_info (data, i);
1768
          if (info->inv_id)
1769
            {
1770
              fprintf (dump_file, "  ");
1771
              print_generic_expr (dump_file, info->name, TDF_SLIM);
1772
              fprintf (dump_file, " is invariant (%d)%s\n",
1773
                       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1774
            }
1775
        }
1776
 
1777
      fprintf (dump_file, "\n");
1778
    }
1779
 
1780
  free (body);
1781
}
1782
 
1783
/* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1784
   is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1785
   we are at the top-level of the processed address.  */
1786
 
1787
static tree
1788
strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1789
                unsigned HOST_WIDE_INT *offset)
1790
{
1791
  tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1792
  enum tree_code code;
1793
  tree type, orig_type = TREE_TYPE (expr);
1794
  unsigned HOST_WIDE_INT off0, off1, st;
1795
  tree orig_expr = expr;
1796
 
1797
  STRIP_NOPS (expr);
1798
 
1799
  type = TREE_TYPE (expr);
1800
  code = TREE_CODE (expr);
1801
  *offset = 0;
1802
 
1803
  switch (code)
1804
    {
1805
    case INTEGER_CST:
1806
      if (!cst_and_fits_in_hwi (expr)
1807
          || zero_p (expr))
1808
        return orig_expr;
1809
 
1810
      *offset = int_cst_value (expr);
1811
      return build_int_cst_type (orig_type, 0);
1812
 
1813
    case PLUS_EXPR:
1814
    case MINUS_EXPR:
1815
      op0 = TREE_OPERAND (expr, 0);
1816
      op1 = TREE_OPERAND (expr, 1);
1817
 
1818
      op0 = strip_offset_1 (op0, false, false, &off0);
1819
      op1 = strip_offset_1 (op1, false, false, &off1);
1820
 
1821
      *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1822
      if (op0 == TREE_OPERAND (expr, 0)
1823
          && op1 == TREE_OPERAND (expr, 1))
1824
        return orig_expr;
1825
 
1826
      if (zero_p (op1))
1827
        expr = op0;
1828
      else if (zero_p (op0))
1829
        {
1830
          if (code == PLUS_EXPR)
1831
            expr = op1;
1832
          else
1833
            expr = fold_build1 (NEGATE_EXPR, type, op1);
1834
        }
1835
      else
1836
        expr = fold_build2 (code, type, op0, op1);
1837
 
1838
      return fold_convert (orig_type, expr);
1839
 
1840
    case ARRAY_REF:
1841
      if (!inside_addr)
1842
        return orig_expr;
1843
 
1844
      step = array_ref_element_size (expr);
1845
      if (!cst_and_fits_in_hwi (step))
1846
        break;
1847
 
1848
      st = int_cst_value (step);
1849
      op1 = TREE_OPERAND (expr, 1);
1850
      op1 = strip_offset_1 (op1, false, false, &off1);
1851
      *offset = off1 * st;
1852
 
1853
      if (top_compref
1854
          && zero_p (op1))
1855
        {
1856
          /* Strip the component reference completely.  */
1857
          op0 = TREE_OPERAND (expr, 0);
1858
          op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1859
          *offset += off0;
1860
          return op0;
1861
        }
1862
      break;
1863
 
1864
    case COMPONENT_REF:
1865
      if (!inside_addr)
1866
        return orig_expr;
1867
 
1868
      tmp = component_ref_field_offset (expr);
1869
      if (top_compref
1870
          && cst_and_fits_in_hwi (tmp))
1871
        {
1872
          /* Strip the component reference completely.  */
1873
          op0 = TREE_OPERAND (expr, 0);
1874
          op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1875
          *offset = off0 + int_cst_value (tmp);
1876
          return op0;
1877
        }
1878
      break;
1879
 
1880
    case ADDR_EXPR:
1881
      op0 = TREE_OPERAND (expr, 0);
1882
      op0 = strip_offset_1 (op0, true, true, &off0);
1883
      *offset += off0;
1884
 
1885
      if (op0 == TREE_OPERAND (expr, 0))
1886
        return orig_expr;
1887
 
1888
      expr = build_fold_addr_expr (op0);
1889
      return fold_convert (orig_type, expr);
1890
 
1891
    case INDIRECT_REF:
1892
      inside_addr = false;
1893
      break;
1894
 
1895
    default:
1896
      return orig_expr;
1897
    }
1898
 
1899
  /* Default handling of expressions for that we want to recurse into
1900
     the first operand.  */
1901
  op0 = TREE_OPERAND (expr, 0);
1902
  op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1903
  *offset += off0;
1904
 
1905
  if (op0 == TREE_OPERAND (expr, 0)
1906
      && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1907
    return orig_expr;
1908
 
1909
  expr = copy_node (expr);
1910
  TREE_OPERAND (expr, 0) = op0;
1911
  if (op1)
1912
    TREE_OPERAND (expr, 1) = op1;
1913
 
1914
  /* Inside address, we might strip the top level component references,
1915
     thus changing type of the expression.  Handling of ADDR_EXPR
1916
     will fix that.  */
1917
  expr = fold_convert (orig_type, expr);
1918
 
1919
  return expr;
1920
}
1921
 
1922
/* Strips constant offsets from EXPR and stores them to OFFSET.  */
1923
 
1924
static tree
1925
strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1926
{
1927
  return strip_offset_1 (expr, false, false, offset);
1928
}
1929
 
1930
/* Returns variant of TYPE that can be used as base for different uses.
1931
   For integer types, we return unsigned variant of the type, which
1932
   avoids problems with overflows.  For pointer types, we return void *.  */
1933
 
1934
static tree
1935
generic_type_for (tree type)
1936
{
1937
  if (POINTER_TYPE_P (type))
1938
    return ptr_type_node;
1939
 
1940
  if (TYPE_UNSIGNED (type))
1941
    return type;
1942
 
1943
  return unsigned_type_for (type);
1944
}
1945
 
1946
/* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1947
   the bitmap to that we should store it.  */
1948
 
1949
static struct ivopts_data *fd_ivopts_data;
1950
static tree
1951
find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1952
{
1953
  bitmap *depends_on = data;
1954
  struct version_info *info;
1955
 
1956
  if (TREE_CODE (*expr_p) != SSA_NAME)
1957
    return NULL_TREE;
1958
  info = name_info (fd_ivopts_data, *expr_p);
1959
 
1960
  if (!info->inv_id || info->has_nonlin_use)
1961
    return NULL_TREE;
1962
 
1963
  if (!*depends_on)
1964
    *depends_on = BITMAP_ALLOC (NULL);
1965
  bitmap_set_bit (*depends_on, info->inv_id);
1966
 
1967
  return NULL_TREE;
1968
}
1969
 
1970
/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1971
   position to POS.  If USE is not NULL, the candidate is set as related to
1972
   it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1973
   replacement of the final value of the iv by a direct computation.  */
1974
 
1975
static struct iv_cand *
1976
add_candidate_1 (struct ivopts_data *data,
1977
                 tree base, tree step, bool important, enum iv_position pos,
1978
                 struct iv_use *use, tree incremented_at)
1979
{
1980
  unsigned i;
1981
  struct iv_cand *cand = NULL;
1982
  tree type, orig_type;
1983
 
1984
  if (base)
1985
    {
1986
      orig_type = TREE_TYPE (base);
1987
      type = generic_type_for (orig_type);
1988
      if (type != orig_type)
1989
        {
1990
          base = fold_convert (type, base);
1991
          if (step)
1992
            step = fold_convert (type, step);
1993
        }
1994
    }
1995
 
1996
  for (i = 0; i < n_iv_cands (data); i++)
1997
    {
1998
      cand = iv_cand (data, i);
1999
 
2000
      if (cand->pos != pos)
2001
        continue;
2002
 
2003
      if (cand->incremented_at != incremented_at)
2004
        continue;
2005
 
2006
      if (!cand->iv)
2007
        {
2008
          if (!base && !step)
2009
            break;
2010
 
2011
          continue;
2012
        }
2013
 
2014
      if (!base && !step)
2015
        continue;
2016
 
2017
      if (!operand_equal_p (base, cand->iv->base, 0))
2018
        continue;
2019
 
2020
      if (zero_p (cand->iv->step))
2021
        {
2022
          if (zero_p (step))
2023
            break;
2024
        }
2025
      else
2026
        {
2027
          if (step && operand_equal_p (step, cand->iv->step, 0))
2028
            break;
2029
        }
2030
    }
2031
 
2032
  if (i == n_iv_cands (data))
2033
    {
2034
      cand = xcalloc (1, sizeof (struct iv_cand));
2035
      cand->id = i;
2036
 
2037
      if (!base && !step)
2038
        cand->iv = NULL;
2039
      else
2040
        cand->iv = alloc_iv (base, step);
2041
 
2042
      cand->pos = pos;
2043
      if (pos != IP_ORIGINAL && cand->iv)
2044
        {
2045
          cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2046
          cand->var_after = cand->var_before;
2047
        }
2048
      cand->important = important;
2049
      cand->incremented_at = incremented_at;
2050
      VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2051
 
2052
      if (step
2053
          && TREE_CODE (step) != INTEGER_CST)
2054
        {
2055
          fd_ivopts_data = data;
2056
          walk_tree (&step, find_depends, &cand->depends_on, NULL);
2057
        }
2058
 
2059
      if (dump_file && (dump_flags & TDF_DETAILS))
2060
        dump_cand (dump_file, cand);
2061
    }
2062
 
2063
  if (important && !cand->important)
2064
    {
2065
      cand->important = true;
2066
      if (dump_file && (dump_flags & TDF_DETAILS))
2067
        fprintf (dump_file, "Candidate %d is important\n", cand->id);
2068
    }
2069
 
2070
  if (use)
2071
    {
2072
      bitmap_set_bit (use->related_cands, i);
2073
      if (dump_file && (dump_flags & TDF_DETAILS))
2074
        fprintf (dump_file, "Candidate %d is related to use %d\n",
2075
                 cand->id, use->id);
2076
    }
2077
 
2078
  return cand;
2079
}
2080
 
2081
/* Returns true if incrementing the induction variable at the end of the LOOP
2082
   is allowed.
2083
 
2084
   The purpose is to avoid splitting latch edge with a biv increment, thus
2085
   creating a jump, possibly confusing other optimization passes and leaving
2086
   less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2087
   is not available (so we do not have a better alternative), or if the latch
2088
   edge is already nonempty.  */
2089
 
2090
static bool
2091
allow_ip_end_pos_p (struct loop *loop)
2092
{
2093
  if (!ip_normal_pos (loop))
2094
    return true;
2095
 
2096
  if (!empty_block_p (ip_end_pos (loop)))
2097
    return true;
2098
 
2099
  return false;
2100
}
2101
 
2102
/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2103
   position to POS.  If USE is not NULL, the candidate is set as related to
2104
   it.  The candidate computation is scheduled on all available positions.  */
2105
 
2106
static void
2107
add_candidate (struct ivopts_data *data,
2108
               tree base, tree step, bool important, struct iv_use *use)
2109
{
2110
  if (ip_normal_pos (data->current_loop))
2111
    add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2112
  if (ip_end_pos (data->current_loop)
2113
      && allow_ip_end_pos_p (data->current_loop))
2114
    add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2115
}
2116
 
2117
/* Add a standard "0 + 1 * iteration" iv candidate for a
2118
   type with SIZE bits.  */
2119
 
2120
static void
2121
add_standard_iv_candidates_for_size (struct ivopts_data *data,
2122
                                     unsigned int size)
2123
{
2124
  tree type = lang_hooks.types.type_for_size (size, true);
2125
  add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2126
                 true, NULL);
2127
}
2128
 
2129
/* Adds standard iv candidates.  */
2130
 
2131
static void
2132
add_standard_iv_candidates (struct ivopts_data *data)
2133
{
2134
  add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2135
 
2136
  /* The same for a double-integer type if it is still fast enough.  */
2137
  if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2138
    add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2139
}
2140
 
2141
 
2142
/* Adds candidates bases on the old induction variable IV.  */
2143
 
2144
static void
2145
add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2146
{
2147
  tree phi, def;
2148
  struct iv_cand *cand;
2149
 
2150
  add_candidate (data, iv->base, iv->step, true, NULL);
2151
 
2152
  /* The same, but with initial value zero.  */
2153
  add_candidate (data,
2154
                 build_int_cst (TREE_TYPE (iv->base), 0),
2155
                 iv->step, true, NULL);
2156
 
2157
  phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2158
  if (TREE_CODE (phi) == PHI_NODE)
2159
    {
2160
      /* Additionally record the possibility of leaving the original iv
2161
         untouched.  */
2162
      def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2163
      cand = add_candidate_1 (data,
2164
                              iv->base, iv->step, true, IP_ORIGINAL, NULL,
2165
                              SSA_NAME_DEF_STMT (def));
2166
      cand->var_before = iv->ssa_name;
2167
      cand->var_after = def;
2168
    }
2169
}
2170
 
2171
/* Adds candidates based on the old induction variables.  */
2172
 
2173
static void
2174
add_old_ivs_candidates (struct ivopts_data *data)
2175
{
2176
  unsigned i;
2177
  struct iv *iv;
2178
  bitmap_iterator bi;
2179
 
2180
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2181
    {
2182
      iv = ver_info (data, i)->iv;
2183
      if (iv && iv->biv_p && !zero_p (iv->step))
2184
        add_old_iv_candidates (data, iv);
2185
    }
2186
}
2187
 
2188
/* Adds candidates based on the value of the induction variable IV and USE.  */
2189
 
2190
static void
2191
add_iv_value_candidates (struct ivopts_data *data,
2192
                         struct iv *iv, struct iv_use *use)
2193
{
2194
  unsigned HOST_WIDE_INT offset;
2195
  tree base;
2196
 
2197
  add_candidate (data, iv->base, iv->step, false, use);
2198
 
2199
  /* The same, but with initial value zero.  Make such variable important,
2200
     since it is generic enough so that possibly many uses may be based
2201
     on it.  */
2202
  add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2203
                 iv->step, true, use);
2204
 
2205
  /* Third, try removing the constant offset.  */
2206
  base = strip_offset (iv->base, &offset);
2207
  if (offset)
2208
    add_candidate (data, base, iv->step, false, use);
2209
}
2210
 
2211
/* Possibly adds pseudocandidate for replacing the final value of USE by
2212
   a direct computation.  */
2213
 
2214
static void
2215
add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2216
{
2217
  /* We must know where we exit the loop and how many times does it roll.  */
2218
  if (! niter_for_single_dom_exit (data))
2219
    return;
2220
 
2221
  add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2222
}
2223
 
2224
/* Adds candidates based on the uses.  */
2225
 
2226
static void
2227
add_derived_ivs_candidates (struct ivopts_data *data)
2228
{
2229
  unsigned i;
2230
 
2231
  for (i = 0; i < n_iv_uses (data); i++)
2232
    {
2233
      struct iv_use *use = iv_use (data, i);
2234
 
2235
      if (!use)
2236
        continue;
2237
 
2238
      switch (use->type)
2239
        {
2240
        case USE_NONLINEAR_EXPR:
2241
        case USE_COMPARE:
2242
        case USE_ADDRESS:
2243
          /* Just add the ivs based on the value of the iv used here.  */
2244
          add_iv_value_candidates (data, use->iv, use);
2245
          break;
2246
 
2247
        case USE_OUTER:
2248
          add_iv_value_candidates (data, use->iv, use);
2249
 
2250
          /* Additionally, add the pseudocandidate for the possibility to
2251
             replace the final value by a direct computation.  */
2252
          add_iv_outer_candidates (data, use);
2253
          break;
2254
 
2255
        default:
2256
          gcc_unreachable ();
2257
        }
2258
    }
2259
}
2260
 
2261
/* Record important candidates and add them to related_cands bitmaps
2262
   if needed.  */
2263
 
2264
static void
2265
record_important_candidates (struct ivopts_data *data)
2266
{
2267
  unsigned i;
2268
  struct iv_use *use;
2269
 
2270
  for (i = 0; i < n_iv_cands (data); i++)
2271
    {
2272
      struct iv_cand *cand = iv_cand (data, i);
2273
 
2274
      if (cand->important)
2275
        bitmap_set_bit (data->important_candidates, i);
2276
    }
2277
 
2278
  data->consider_all_candidates = (n_iv_cands (data)
2279
                                   <= CONSIDER_ALL_CANDIDATES_BOUND);
2280
 
2281
  if (data->consider_all_candidates)
2282
    {
2283
      /* We will not need "related_cands" bitmaps in this case,
2284
         so release them to decrease peak memory consumption.  */
2285
      for (i = 0; i < n_iv_uses (data); i++)
2286
        {
2287
          use = iv_use (data, i);
2288
          BITMAP_FREE (use->related_cands);
2289
        }
2290
    }
2291
  else
2292
    {
2293
      /* Add important candidates to the related_cands bitmaps.  */
2294
      for (i = 0; i < n_iv_uses (data); i++)
2295
        bitmap_ior_into (iv_use (data, i)->related_cands,
2296
                         data->important_candidates);
2297
    }
2298
}
2299
 
2300
/* Finds the candidates for the induction variables.  */
2301
 
2302
static void
2303
find_iv_candidates (struct ivopts_data *data)
2304
{
2305
  /* Add commonly used ivs.  */
2306
  add_standard_iv_candidates (data);
2307
 
2308
  /* Add old induction variables.  */
2309
  add_old_ivs_candidates (data);
2310
 
2311
  /* Add induction variables derived from uses.  */
2312
  add_derived_ivs_candidates (data);
2313
 
2314
  /* Record the important candidates.  */
2315
  record_important_candidates (data);
2316
}
2317
 
2318
/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2319
   If consider_all_candidates is true, we use a two-dimensional array, otherwise
2320
   we allocate a simple list to every use.  */
2321
 
2322
static void
2323
alloc_use_cost_map (struct ivopts_data *data)
2324
{
2325
  unsigned i, size, s, j;
2326
 
2327
  for (i = 0; i < n_iv_uses (data); i++)
2328
    {
2329
      struct iv_use *use = iv_use (data, i);
2330
      bitmap_iterator bi;
2331
 
2332
      if (data->consider_all_candidates)
2333
        size = n_iv_cands (data);
2334
      else
2335
        {
2336
          s = 0;
2337
          EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2338
            {
2339
              s++;
2340
            }
2341
 
2342
          /* Round up to the power of two, so that moduling by it is fast.  */
2343
          for (size = 1; size < s; size <<= 1)
2344
            continue;
2345
        }
2346
 
2347
      use->n_map_members = size;
2348
      use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2349
    }
2350
}
2351
 
2352
/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2353
   on invariants DEPENDS_ON and that the value used in expressing it
2354
   is VALUE.*/
2355
 
2356
static void
2357
set_use_iv_cost (struct ivopts_data *data,
2358
                 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2359
                 bitmap depends_on, tree value)
2360
{
2361
  unsigned i, s;
2362
 
2363
  if (cost == INFTY)
2364
    {
2365
      BITMAP_FREE (depends_on);
2366
      return;
2367
    }
2368
 
2369
  if (data->consider_all_candidates)
2370
    {
2371
      use->cost_map[cand->id].cand = cand;
2372
      use->cost_map[cand->id].cost = cost;
2373
      use->cost_map[cand->id].depends_on = depends_on;
2374
      use->cost_map[cand->id].value = value;
2375
      return;
2376
    }
2377
 
2378
  /* n_map_members is a power of two, so this computes modulo.  */
2379
  s = cand->id & (use->n_map_members - 1);
2380
  for (i = s; i < use->n_map_members; i++)
2381
    if (!use->cost_map[i].cand)
2382
      goto found;
2383
  for (i = 0; i < s; i++)
2384
    if (!use->cost_map[i].cand)
2385
      goto found;
2386
 
2387
  gcc_unreachable ();
2388
 
2389
found:
2390
  use->cost_map[i].cand = cand;
2391
  use->cost_map[i].cost = cost;
2392
  use->cost_map[i].depends_on = depends_on;
2393
  use->cost_map[i].value = value;
2394
}
2395
 
2396
/* Gets cost of (USE, CANDIDATE) pair.  */
2397
 
2398
static struct cost_pair *
2399
get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2400
                 struct iv_cand *cand)
2401
{
2402
  unsigned i, s;
2403
  struct cost_pair *ret;
2404
 
2405
  if (!cand)
2406
    return NULL;
2407
 
2408
  if (data->consider_all_candidates)
2409
    {
2410
      ret = use->cost_map + cand->id;
2411
      if (!ret->cand)
2412
        return NULL;
2413
 
2414
      return ret;
2415
    }
2416
 
2417
  /* n_map_members is a power of two, so this computes modulo.  */
2418
  s = cand->id & (use->n_map_members - 1);
2419
  for (i = s; i < use->n_map_members; i++)
2420
    if (use->cost_map[i].cand == cand)
2421
      return use->cost_map + i;
2422
 
2423
  for (i = 0; i < s; i++)
2424
    if (use->cost_map[i].cand == cand)
2425
      return use->cost_map + i;
2426
 
2427
  return NULL;
2428
}
2429
 
2430
/* Returns estimate on cost of computing SEQ.  */
2431
 
2432
static unsigned
2433
seq_cost (rtx seq)
2434
{
2435
  unsigned cost = 0;
2436
  rtx set;
2437
 
2438
  for (; seq; seq = NEXT_INSN (seq))
2439
    {
2440
      set = single_set (seq);
2441
      if (set)
2442
        cost += rtx_cost (set, SET);
2443
      else
2444
        cost++;
2445
    }
2446
 
2447
  return cost;
2448
}
2449
 
2450
/* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2451
static rtx
2452
produce_memory_decl_rtl (tree obj, int *regno)
2453
{
2454
  rtx x;
2455
 
2456
  gcc_assert (obj);
2457
  if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2458
    {
2459
      const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2460
      x = gen_rtx_SYMBOL_REF (Pmode, name);
2461
    }
2462
  else
2463
    x = gen_raw_REG (Pmode, (*regno)++);
2464
 
2465
  return gen_rtx_MEM (DECL_MODE (obj), x);
2466
}
2467
 
2468
/* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2469
   walk_tree.  DATA contains the actual fake register number.  */
2470
 
2471
static tree
2472
prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2473
{
2474
  tree obj = NULL_TREE;
2475
  rtx x = NULL_RTX;
2476
  int *regno = data;
2477
 
2478
  switch (TREE_CODE (*expr_p))
2479
    {
2480
    case ADDR_EXPR:
2481
      for (expr_p = &TREE_OPERAND (*expr_p, 0);
2482
           handled_component_p (*expr_p);
2483
           expr_p = &TREE_OPERAND (*expr_p, 0))
2484
        continue;
2485
      obj = *expr_p;
2486
      if (DECL_P (obj))
2487
        x = produce_memory_decl_rtl (obj, regno);
2488
      break;
2489
 
2490
    case SSA_NAME:
2491
      *ws = 0;
2492
      obj = SSA_NAME_VAR (*expr_p);
2493
      if (!DECL_RTL_SET_P (obj))
2494
        x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2495
      break;
2496
 
2497
    case VAR_DECL:
2498
    case PARM_DECL:
2499
    case RESULT_DECL:
2500
      *ws = 0;
2501
      obj = *expr_p;
2502
 
2503
      if (DECL_RTL_SET_P (obj))
2504
        break;
2505
 
2506
      if (DECL_MODE (obj) == BLKmode)
2507
        x = produce_memory_decl_rtl (obj, regno);
2508
      else
2509
        x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2510
 
2511
      break;
2512
 
2513
    default:
2514
      break;
2515
    }
2516
 
2517
  if (x)
2518
    {
2519
      VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2520
      SET_DECL_RTL (obj, x);
2521
    }
2522
 
2523
  return NULL_TREE;
2524
}
2525
 
2526
/* Determines cost of the computation of EXPR.  */
2527
 
2528
static unsigned
2529
computation_cost (tree expr)
2530
{
2531
  rtx seq, rslt;
2532
  tree type = TREE_TYPE (expr);
2533
  unsigned cost;
2534
  /* Avoid using hard regs in ways which may be unsupported.  */
2535
  int regno = LAST_VIRTUAL_REGISTER + 1;
2536
 
2537
  walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2538
  start_sequence ();
2539
  rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2540
  seq = get_insns ();
2541
  end_sequence ();
2542
 
2543
  cost = seq_cost (seq);
2544
  if (MEM_P (rslt))
2545
    cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2546
 
2547
  return cost;
2548
}
2549
 
2550
/* Returns variable containing the value of candidate CAND at statement AT.  */
2551
 
2552
static tree
2553
var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2554
{
2555
  if (stmt_after_increment (loop, cand, stmt))
2556
    return cand->var_after;
2557
  else
2558
    return cand->var_before;
2559
}
2560
 
2561
/* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2562
   but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2563
 
2564
int
2565
tree_int_cst_sign_bit (tree t)
2566
{
2567
  unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2568
  unsigned HOST_WIDE_INT w;
2569
 
2570
  if (bitno < HOST_BITS_PER_WIDE_INT)
2571
    w = TREE_INT_CST_LOW (t);
2572
  else
2573
    {
2574
      w = TREE_INT_CST_HIGH (t);
2575
      bitno -= HOST_BITS_PER_WIDE_INT;
2576
    }
2577
 
2578
  return (w >> bitno) & 1;
2579
}
2580
 
2581
/* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2582
   return cst.  Otherwise return NULL_TREE.  */
2583
 
2584
static tree
2585
constant_multiple_of (tree type, tree top, tree bot)
2586
{
2587
  tree res, mby, p0, p1;
2588
  enum tree_code code;
2589
  bool negate;
2590
 
2591
  STRIP_NOPS (top);
2592
  STRIP_NOPS (bot);
2593
 
2594
  if (operand_equal_p (top, bot, 0))
2595
    return build_int_cst (type, 1);
2596
 
2597
  code = TREE_CODE (top);
2598
  switch (code)
2599
    {
2600
    case MULT_EXPR:
2601
      mby = TREE_OPERAND (top, 1);
2602
      if (TREE_CODE (mby) != INTEGER_CST)
2603
        return NULL_TREE;
2604
 
2605
      res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2606
      if (!res)
2607
        return NULL_TREE;
2608
 
2609
      return fold_binary_to_constant (MULT_EXPR, type, res,
2610
                                      fold_convert (type, mby));
2611
 
2612
    case PLUS_EXPR:
2613
    case MINUS_EXPR:
2614
      p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2615
      if (!p0)
2616
        return NULL_TREE;
2617
      p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2618
      if (!p1)
2619
        return NULL_TREE;
2620
 
2621
      return fold_binary_to_constant (code, type, p0, p1);
2622
 
2623
    case INTEGER_CST:
2624
      if (TREE_CODE (bot) != INTEGER_CST)
2625
        return NULL_TREE;
2626
 
2627
      bot = fold_convert (type, bot);
2628
      top = fold_convert (type, top);
2629
 
2630
      /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2631
         the result afterwards.  */
2632
      if (tree_int_cst_sign_bit (bot))
2633
        {
2634
          negate = true;
2635
          bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2636
        }
2637
      else
2638
        negate = false;
2639
 
2640
      /* Ditto for TOP.  */
2641
      if (tree_int_cst_sign_bit (top))
2642
        {
2643
          negate = !negate;
2644
          top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2645
        }
2646
 
2647
      if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2648
        return NULL_TREE;
2649
 
2650
      res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2651
      if (negate)
2652
        res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2653
      return res;
2654
 
2655
    default:
2656
      return NULL_TREE;
2657
    }
2658
}
2659
 
2660
/* Sets COMB to CST.  */
2661
 
2662
static void
2663
aff_combination_const (struct affine_tree_combination *comb, tree type,
2664
                       unsigned HOST_WIDE_INT cst)
2665
{
2666
  unsigned prec = TYPE_PRECISION (type);
2667
 
2668
  comb->type = type;
2669
  comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2670
 
2671
  comb->n = 0;
2672
  comb->rest = NULL_TREE;
2673
  comb->offset = cst & comb->mask;
2674
}
2675
 
2676
/* Sets COMB to single element ELT.  */
2677
 
2678
static void
2679
aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2680
{
2681
  unsigned prec = TYPE_PRECISION (type);
2682
 
2683
  comb->type = type;
2684
  comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2685
 
2686
  comb->n = 1;
2687
  comb->elts[0] = elt;
2688
  comb->coefs[0] = 1;
2689
  comb->rest = NULL_TREE;
2690
  comb->offset = 0;
2691
}
2692
 
2693
/* Scales COMB by SCALE.  */
2694
 
2695
static void
2696
aff_combination_scale (struct affine_tree_combination *comb,
2697
                       unsigned HOST_WIDE_INT scale)
2698
{
2699
  unsigned i, j;
2700
 
2701
  if (scale == 1)
2702
    return;
2703
 
2704
  if (scale == 0)
2705
    {
2706
      aff_combination_const (comb, comb->type, 0);
2707
      return;
2708
    }
2709
 
2710
  comb->offset = (scale * comb->offset) & comb->mask;
2711
  for (i = 0, j = 0; i < comb->n; i++)
2712
    {
2713
      comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2714
      comb->elts[j] = comb->elts[i];
2715
      if (comb->coefs[j] != 0)
2716
        j++;
2717
    }
2718
  comb->n = j;
2719
 
2720
  if (comb->rest)
2721
    {
2722
      if (comb->n < MAX_AFF_ELTS)
2723
        {
2724
          comb->coefs[comb->n] = scale;
2725
          comb->elts[comb->n] = comb->rest;
2726
          comb->rest = NULL_TREE;
2727
          comb->n++;
2728
        }
2729
      else
2730
        comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2731
                                  build_int_cst_type (comb->type, scale));
2732
    }
2733
}
2734
 
2735
/* Adds ELT * SCALE to COMB.  */
2736
 
2737
static void
2738
aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2739
                         unsigned HOST_WIDE_INT scale)
2740
{
2741
  unsigned i;
2742
 
2743
  if (scale == 0)
2744
    return;
2745
 
2746
  for (i = 0; i < comb->n; i++)
2747
    if (operand_equal_p (comb->elts[i], elt, 0))
2748
      {
2749
        comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2750
        if (comb->coefs[i])
2751
          return;
2752
 
2753
        comb->n--;
2754
        comb->coefs[i] = comb->coefs[comb->n];
2755
        comb->elts[i] = comb->elts[comb->n];
2756
 
2757
        if (comb->rest)
2758
          {
2759
            gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2760
            comb->coefs[comb->n] = 1;
2761
            comb->elts[comb->n] = comb->rest;
2762
            comb->rest = NULL_TREE;
2763
            comb->n++;
2764
          }
2765
        return;
2766
      }
2767
  if (comb->n < MAX_AFF_ELTS)
2768
    {
2769
      comb->coefs[comb->n] = scale;
2770
      comb->elts[comb->n] = elt;
2771
      comb->n++;
2772
      return;
2773
    }
2774
 
2775
  if (scale == 1)
2776
    elt = fold_convert (comb->type, elt);
2777
  else
2778
    elt = fold_build2 (MULT_EXPR, comb->type,
2779
                       fold_convert (comb->type, elt),
2780
                       build_int_cst_type (comb->type, scale));
2781
 
2782
  if (comb->rest)
2783
    comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2784
  else
2785
    comb->rest = elt;
2786
}
2787
 
2788
/* Adds COMB2 to COMB1.  */
2789
 
2790
static void
2791
aff_combination_add (struct affine_tree_combination *comb1,
2792
                     struct affine_tree_combination *comb2)
2793
{
2794
  unsigned i;
2795
 
2796
  comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2797
  for (i = 0; i < comb2->n; i++)
2798
    aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2799
  if (comb2->rest)
2800
    aff_combination_add_elt (comb1, comb2->rest, 1);
2801
}
2802
 
2803
/* Splits EXPR into an affine combination of parts.  */
2804
 
2805
static void
2806
tree_to_aff_combination (tree expr, tree type,
2807
                         struct affine_tree_combination *comb)
2808
{
2809
  struct affine_tree_combination tmp;
2810
  enum tree_code code;
2811
  tree cst, core, toffset;
2812
  HOST_WIDE_INT bitpos, bitsize;
2813
  enum machine_mode mode;
2814
  int unsignedp, volatilep;
2815
 
2816
  STRIP_NOPS (expr);
2817
 
2818
  code = TREE_CODE (expr);
2819
  switch (code)
2820
    {
2821
    case INTEGER_CST:
2822
      aff_combination_const (comb, type, int_cst_value (expr));
2823
      return;
2824
 
2825
    case PLUS_EXPR:
2826
    case MINUS_EXPR:
2827
      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2828
      tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2829
      if (code == MINUS_EXPR)
2830
        aff_combination_scale (&tmp, -1);
2831
      aff_combination_add (comb, &tmp);
2832
      return;
2833
 
2834
    case MULT_EXPR:
2835
      cst = TREE_OPERAND (expr, 1);
2836
      if (TREE_CODE (cst) != INTEGER_CST)
2837
        break;
2838
      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2839
      aff_combination_scale (comb, int_cst_value (cst));
2840
      return;
2841
 
2842
    case NEGATE_EXPR:
2843
      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2844
      aff_combination_scale (comb, -1);
2845
      return;
2846
 
2847
    case ADDR_EXPR:
2848
      core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2849
                                  &toffset, &mode, &unsignedp, &volatilep,
2850
                                  false);
2851
      if (bitpos % BITS_PER_UNIT != 0)
2852
        break;
2853
      aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2854
      core = build_fold_addr_expr (core);
2855
      if (TREE_CODE (core) == ADDR_EXPR)
2856
        aff_combination_add_elt (comb, core, 1);
2857
      else
2858
        {
2859
          tree_to_aff_combination (core, type, &tmp);
2860
          aff_combination_add (comb, &tmp);
2861
        }
2862
      if (toffset)
2863
        {
2864
          tree_to_aff_combination (toffset, type, &tmp);
2865
          aff_combination_add (comb, &tmp);
2866
        }
2867
      return;
2868
 
2869
    default:
2870
      break;
2871
    }
2872
 
2873
  aff_combination_elt (comb, type, expr);
2874
}
2875
 
2876
/* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2877
 
2878
static tree
2879
add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2880
                 unsigned HOST_WIDE_INT mask)
2881
{
2882
  enum tree_code code;
2883
 
2884
  scale &= mask;
2885
  elt = fold_convert (type, elt);
2886
 
2887
  if (scale == 1)
2888
    {
2889
      if (!expr)
2890
        return elt;
2891
 
2892
      return fold_build2 (PLUS_EXPR, type, expr, elt);
2893
    }
2894
 
2895
  if (scale == mask)
2896
    {
2897
      if (!expr)
2898
        return fold_build1 (NEGATE_EXPR, type, elt);
2899
 
2900
      return fold_build2 (MINUS_EXPR, type, expr, elt);
2901
    }
2902
 
2903
  if (!expr)
2904
    return fold_build2 (MULT_EXPR, type, elt,
2905
                        build_int_cst_type (type, scale));
2906
 
2907
  if ((scale | (mask >> 1)) == mask)
2908
    {
2909
      /* Scale is negative.  */
2910
      code = MINUS_EXPR;
2911
      scale = (-scale) & mask;
2912
    }
2913
  else
2914
    code = PLUS_EXPR;
2915
 
2916
  elt = fold_build2 (MULT_EXPR, type, elt,
2917
                     build_int_cst_type (type, scale));
2918
  return fold_build2 (code, type, expr, elt);
2919
}
2920
 
2921
/* Copies the tree elements of COMB to ensure that they are not shared.  */
2922
 
2923
static void
2924
unshare_aff_combination (struct affine_tree_combination *comb)
2925
{
2926
  unsigned i;
2927
 
2928
  for (i = 0; i < comb->n; i++)
2929
    comb->elts[i] = unshare_expr (comb->elts[i]);
2930
  if (comb->rest)
2931
    comb->rest = unshare_expr (comb->rest);
2932
}
2933
 
2934
/* Makes tree from the affine combination COMB.  */
2935
 
2936
static tree
2937
aff_combination_to_tree (struct affine_tree_combination *comb)
2938
{
2939
  tree type = comb->type;
2940
  tree expr = comb->rest;
2941
  unsigned i;
2942
  unsigned HOST_WIDE_INT off, sgn;
2943
 
2944
  /* Handle the special case produced by get_computation_aff when
2945
     the type does not fit in HOST_WIDE_INT.  */
2946
  if (comb->n == 0 && comb->offset == 0)
2947
    return fold_convert (type, expr);
2948
 
2949
  gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2950
 
2951
  for (i = 0; i < comb->n; i++)
2952
    expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2953
                            comb->mask);
2954
 
2955
  if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2956
    {
2957
      /* Offset is negative.  */
2958
      off = (-comb->offset) & comb->mask;
2959
      sgn = comb->mask;
2960
    }
2961
  else
2962
    {
2963
      off = comb->offset;
2964
      sgn = 1;
2965
    }
2966
  return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2967
                          comb->mask);
2968
}
2969
 
2970
/* Determines the expression by that USE is expressed from induction variable
2971
   CAND at statement AT in LOOP.  The expression is stored in a decomposed
2972
   form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2973
 
2974
static bool
2975
get_computation_aff (struct loop *loop,
2976
                     struct iv_use *use, struct iv_cand *cand, tree at,
2977
                     struct affine_tree_combination *aff)
2978
{
2979
  tree ubase = use->iv->base;
2980
  tree ustep = use->iv->step;
2981
  tree cbase = cand->iv->base;
2982
  tree cstep = cand->iv->step;
2983
  tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2984
  tree uutype;
2985
  tree expr, delta;
2986
  tree ratio;
2987
  unsigned HOST_WIDE_INT ustepi, cstepi;
2988
  HOST_WIDE_INT ratioi;
2989
  struct affine_tree_combination cbase_aff, expr_aff;
2990
  tree cstep_orig = cstep, ustep_orig = ustep;
2991
 
2992
  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2993
    {
2994
      /* We do not have a precision to express the values of use.  */
2995
      return false;
2996
    }
2997
 
2998
  expr = var_at_stmt (loop, cand, at);
2999
 
3000
  if (TREE_TYPE (expr) != ctype)
3001
    {
3002
      /* This may happen with the original ivs.  */
3003
      expr = fold_convert (ctype, expr);
3004
    }
3005
 
3006
  if (TYPE_UNSIGNED (utype))
3007
    uutype = utype;
3008
  else
3009
    {
3010
      uutype = unsigned_type_for (utype);
3011
      ubase = fold_convert (uutype, ubase);
3012
      ustep = fold_convert (uutype, ustep);
3013
    }
3014
 
3015
  if (uutype != ctype)
3016
    {
3017
      expr = fold_convert (uutype, expr);
3018
      cbase = fold_convert (uutype, cbase);
3019
      cstep = fold_convert (uutype, cstep);
3020
 
3021
      /* If the conversion is not noop, we must take it into account when
3022
         considering the value of the step.  */
3023
      if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3024
        cstep_orig = cstep;
3025
    }
3026
 
3027
  if (cst_and_fits_in_hwi (cstep_orig)
3028
      && cst_and_fits_in_hwi (ustep_orig))
3029
    {
3030
      ustepi = int_cst_value (ustep_orig);
3031
      cstepi = int_cst_value (cstep_orig);
3032
 
3033
      if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3034
        {
3035
          /* TODO maybe consider case when ustep divides cstep and the ratio is
3036
             a power of 2 (so that the division is fast to execute)?  We would
3037
             need to be much more careful with overflows etc. then.  */
3038
          return false;
3039
        }
3040
 
3041
      ratio = build_int_cst_type (uutype, ratioi);
3042
    }
3043
  else
3044
    {
3045
      ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3046
      if (!ratio)
3047
        return false;
3048
 
3049
      /* Ratioi is only used to detect special cases when the multiplicative
3050
         factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3051
         we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3052
         to integer_onep/integer_all_onesp, since the former ignores
3053
         TREE_OVERFLOW.  */
3054
      if (cst_and_fits_in_hwi (ratio))
3055
        ratioi = int_cst_value (ratio);
3056
      else if (integer_onep (ratio))
3057
        ratioi = 1;
3058
      else if (integer_all_onesp (ratio))
3059
        ratioi = -1;
3060
      else
3061
        ratioi = 0;
3062
    }
3063
 
3064
  /* We may need to shift the value if we are after the increment.  */
3065
  if (stmt_after_increment (loop, cand, at))
3066
    cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3067
 
3068
  /* use = ubase - ratio * cbase + ratio * var.
3069
 
3070
     In general case ubase + ratio * (var - cbase) could be better (one less
3071
     multiplication), but often it is possible to eliminate redundant parts
3072
     of computations from (ubase - ratio * cbase) term, and if it does not
3073
     happen, fold is able to apply the distributive law to obtain this form
3074
     anyway.  */
3075
 
3076
  if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3077
    {
3078
      /* Let's compute in trees and just return the result in AFF.  This case
3079
         should not be very common, and fold itself is not that bad either,
3080
         so making the aff. functions more complicated to handle this case
3081
         is not that urgent.  */
3082
      if (ratioi == 1)
3083
        {
3084
          delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3085
          expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3086
        }
3087
      else if (ratioi == -1)
3088
        {
3089
          delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3090
          expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3091
        }
3092
      else
3093
        {
3094
          delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3095
          delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3096
          expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3097
          expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3098
        }
3099
 
3100
      aff->type = uutype;
3101
      aff->n = 0;
3102
      aff->offset = 0;
3103
      aff->mask = 0;
3104
      aff->rest = expr;
3105
      return true;
3106
    }
3107
 
3108
  /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3109
     possible to compute ratioi.  */
3110
  gcc_assert (ratioi);
3111
 
3112
  tree_to_aff_combination (ubase, uutype, aff);
3113
  tree_to_aff_combination (cbase, uutype, &cbase_aff);
3114
  tree_to_aff_combination (expr, uutype, &expr_aff);
3115
  aff_combination_scale (&cbase_aff, -ratioi);
3116
  aff_combination_scale (&expr_aff, ratioi);
3117
  aff_combination_add (aff, &cbase_aff);
3118
  aff_combination_add (aff, &expr_aff);
3119
 
3120
  return true;
3121
}
3122
 
3123
/* Determines the expression by that USE is expressed from induction variable
3124
   CAND at statement AT in LOOP.  The computation is unshared.  */
3125
 
3126
static tree
3127
get_computation_at (struct loop *loop,
3128
                    struct iv_use *use, struct iv_cand *cand, tree at)
3129
{
3130
  struct affine_tree_combination aff;
3131
  tree type = TREE_TYPE (use->iv->base);
3132
 
3133
  if (!get_computation_aff (loop, use, cand, at, &aff))
3134
    return NULL_TREE;
3135
  unshare_aff_combination (&aff);
3136
  return fold_convert (type, aff_combination_to_tree (&aff));
3137
}
3138
 
3139
/* Determines the expression by that USE is expressed from induction variable
3140
   CAND in LOOP.  The computation is unshared.  */
3141
 
3142
static tree
3143
get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3144
{
3145
  return get_computation_at (loop, use, cand, use->stmt);
3146
}
3147
 
3148
/* Returns cost of addition in MODE.  */
3149
 
3150
static unsigned
3151
add_cost (enum machine_mode mode)
3152
{
3153
  static unsigned costs[NUM_MACHINE_MODES];
3154
  rtx seq;
3155
  unsigned cost;
3156
 
3157
  if (costs[mode])
3158
    return costs[mode];
3159
 
3160
  start_sequence ();
3161
  force_operand (gen_rtx_fmt_ee (PLUS, mode,
3162
                                 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3163
                                 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3164
                 NULL_RTX);
3165
  seq = get_insns ();
3166
  end_sequence ();
3167
 
3168
  cost = seq_cost (seq);
3169
  if (!cost)
3170
    cost = 1;
3171
 
3172
  costs[mode] = cost;
3173
 
3174
  if (dump_file && (dump_flags & TDF_DETAILS))
3175
    fprintf (dump_file, "Addition in %s costs %d\n",
3176
             GET_MODE_NAME (mode), cost);
3177
  return cost;
3178
}
3179
 
3180
/* Entry in a hashtable of already known costs for multiplication.  */
3181
struct mbc_entry
3182
{
3183
  HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3184
  enum machine_mode mode;       /* In mode.  */
3185
  unsigned cost;                /* The cost.  */
3186
};
3187
 
3188
/* Counts hash value for the ENTRY.  */
3189
 
3190
static hashval_t
3191
mbc_entry_hash (const void *entry)
3192
{
3193
  const struct mbc_entry *e = entry;
3194
 
3195
  return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3196
}
3197
 
3198
/* Compares the hash table entries ENTRY1 and ENTRY2.  */
3199
 
3200
static int
3201
mbc_entry_eq (const void *entry1, const void *entry2)
3202
{
3203
  const struct mbc_entry *e1 = entry1;
3204
  const struct mbc_entry *e2 = entry2;
3205
 
3206
  return (e1->mode == e2->mode
3207
          && e1->cst == e2->cst);
3208
}
3209
 
3210
/* Returns cost of multiplication by constant CST in MODE.  */
3211
 
3212
unsigned
3213
multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3214
{
3215
  static htab_t costs;
3216
  struct mbc_entry **cached, act;
3217
  rtx seq;
3218
  unsigned cost;
3219
 
3220
  if (!costs)
3221
    costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3222
 
3223
  act.mode = mode;
3224
  act.cst = cst;
3225
  cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3226
  if (*cached)
3227
    return (*cached)->cost;
3228
 
3229
  *cached = xmalloc (sizeof (struct mbc_entry));
3230
  (*cached)->mode = mode;
3231
  (*cached)->cst = cst;
3232
 
3233
  start_sequence ();
3234
  expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3235
               gen_int_mode (cst, mode), NULL_RTX, 0);
3236
  seq = get_insns ();
3237
  end_sequence ();
3238
 
3239
  cost = seq_cost (seq);
3240
 
3241
  if (dump_file && (dump_flags & TDF_DETAILS))
3242
    fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3243
             (int) cst, GET_MODE_NAME (mode), cost);
3244
 
3245
  (*cached)->cost = cost;
3246
 
3247
  return cost;
3248
}
3249
 
3250
/* Returns true if multiplying by RATIO is allowed in address.  */
3251
 
3252
bool
3253
multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3254
{
3255
#define MAX_RATIO 128
3256
  static sbitmap valid_mult;
3257
 
3258
  if (!valid_mult)
3259
    {
3260
      rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3261
      rtx addr;
3262
      HOST_WIDE_INT i;
3263
 
3264
      valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3265
      sbitmap_zero (valid_mult);
3266
      addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3267
      for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3268
        {
3269
          XEXP (addr, 1) = gen_int_mode (i, Pmode);
3270
          if (memory_address_p (Pmode, addr))
3271
            SET_BIT (valid_mult, i + MAX_RATIO);
3272
        }
3273
 
3274
      if (dump_file && (dump_flags & TDF_DETAILS))
3275
        {
3276
          fprintf (dump_file, "  allowed multipliers:");
3277
          for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3278
            if (TEST_BIT (valid_mult, i + MAX_RATIO))
3279
              fprintf (dump_file, " %d", (int) i);
3280
          fprintf (dump_file, "\n");
3281
          fprintf (dump_file, "\n");
3282
        }
3283
    }
3284
 
3285
  if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3286
    return false;
3287
 
3288
  return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3289
}
3290
 
3291
/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3292
   If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3293
   variable is omitted.  The created memory accesses MODE.
3294
 
3295
   TODO -- there must be some better way.  This all is quite crude.  */
3296
 
3297
static unsigned
3298
get_address_cost (bool symbol_present, bool var_present,
3299
                  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3300
{
3301
  static bool initialized = false;
3302
  static HOST_WIDE_INT rat, off;
3303
  static HOST_WIDE_INT min_offset, max_offset;
3304
  static unsigned costs[2][2][2][2];
3305
  unsigned cost, acost;
3306
  rtx seq, addr, base;
3307
  bool offset_p, ratio_p;
3308
  rtx reg1;
3309
  HOST_WIDE_INT s_offset;
3310
  unsigned HOST_WIDE_INT mask;
3311
  unsigned bits;
3312
 
3313
  if (!initialized)
3314
    {
3315
      HOST_WIDE_INT i;
3316
      initialized = true;
3317
 
3318
      reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3319
 
3320
      addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3321
      for (i = 1; i <= 1 << 20; i <<= 1)
3322
        {
3323
          XEXP (addr, 1) = gen_int_mode (i, Pmode);
3324
          if (!memory_address_p (Pmode, addr))
3325
            break;
3326
        }
3327
      max_offset = i >> 1;
3328
      off = max_offset;
3329
 
3330
      for (i = 1; i <= 1 << 20; i <<= 1)
3331
        {
3332
          XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3333
          if (!memory_address_p (Pmode, addr))
3334
            break;
3335
        }
3336
      min_offset = -(i >> 1);
3337
 
3338
      if (dump_file && (dump_flags & TDF_DETAILS))
3339
        {
3340
          fprintf (dump_file, "get_address_cost:\n");
3341
          fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3342
          fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3343
        }
3344
 
3345
      rat = 1;
3346
      for (i = 2; i <= MAX_RATIO; i++)
3347
        if (multiplier_allowed_in_address_p (i))
3348
          {
3349
            rat = i;
3350
            break;
3351
          }
3352
    }
3353
 
3354
  bits = GET_MODE_BITSIZE (Pmode);
3355
  mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3356
  offset &= mask;
3357
  if ((offset >> (bits - 1) & 1))
3358
    offset |= ~mask;
3359
  s_offset = offset;
3360
 
3361
  cost = 0;
3362
  offset_p = (s_offset != 0
3363
              && min_offset <= s_offset && s_offset <= max_offset);
3364
  ratio_p = (ratio != 1
3365
             && multiplier_allowed_in_address_p (ratio));
3366
 
3367
  if (ratio != 1 && !ratio_p)
3368
    cost += multiply_by_cost (ratio, Pmode);
3369
 
3370
  if (s_offset && !offset_p && !symbol_present)
3371
    {
3372
      cost += add_cost (Pmode);
3373
      var_present = true;
3374
    }
3375
 
3376
  acost = costs[symbol_present][var_present][offset_p][ratio_p];
3377
  if (!acost)
3378
    {
3379
      int old_cse_not_expected;
3380
      acost = 0;
3381
 
3382
      addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3383
      reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3384
      if (ratio_p)
3385
        addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3386
 
3387
      if (var_present)
3388
        addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3389
 
3390
      if (symbol_present)
3391
        {
3392
          base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3393
          if (offset_p)
3394
            base = gen_rtx_fmt_e (CONST, Pmode,
3395
                                  gen_rtx_fmt_ee (PLUS, Pmode,
3396
                                                  base,
3397
                                                  gen_int_mode (off, Pmode)));
3398
        }
3399
      else if (offset_p)
3400
        base = gen_int_mode (off, Pmode);
3401
      else
3402
        base = NULL_RTX;
3403
 
3404
      if (base)
3405
        addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3406
 
3407
      start_sequence ();
3408
      /* To avoid splitting addressing modes, pretend that no cse will
3409
         follow.  */
3410
      old_cse_not_expected = cse_not_expected;
3411
      cse_not_expected = true;
3412
      addr = memory_address (Pmode, addr);
3413
      cse_not_expected = old_cse_not_expected;
3414
      seq = get_insns ();
3415
      end_sequence ();
3416
 
3417
      acost = seq_cost (seq);
3418
      acost += address_cost (addr, Pmode);
3419
 
3420
      if (!acost)
3421
        acost = 1;
3422
      costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3423
    }
3424
 
3425
  return cost + acost;
3426
}
3427
 
3428
/* Estimates cost of forcing expression EXPR into a variable.  */
3429
 
3430
unsigned
3431
force_expr_to_var_cost (tree expr)
3432
{
3433
  static bool costs_initialized = false;
3434
  static unsigned integer_cost;
3435
  static unsigned symbol_cost;
3436
  static unsigned address_cost;
3437
  tree op0, op1;
3438
  unsigned cost0, cost1, cost;
3439
  enum machine_mode mode;
3440
 
3441
  if (!costs_initialized)
3442
    {
3443
      tree var = create_tmp_var_raw (integer_type_node, "test_var");
3444
      rtx x = gen_rtx_MEM (DECL_MODE (var),
3445
                           gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3446
      tree addr;
3447
      tree type = build_pointer_type (integer_type_node);
3448
 
3449
      integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3450
                                                           2000));
3451
 
3452
      SET_DECL_RTL (var, x);
3453
      TREE_STATIC (var) = 1;
3454
      addr = build1 (ADDR_EXPR, type, var);
3455
      symbol_cost = computation_cost (addr) + 1;
3456
 
3457
      address_cost
3458
        = computation_cost (build2 (PLUS_EXPR, type,
3459
                                    addr,
3460
                                    build_int_cst_type (type, 2000))) + 1;
3461
      if (dump_file && (dump_flags & TDF_DETAILS))
3462
        {
3463
          fprintf (dump_file, "force_expr_to_var_cost:\n");
3464
          fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3465
          fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3466
          fprintf (dump_file, "  address %d\n", (int) address_cost);
3467
          fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3468
          fprintf (dump_file, "\n");
3469
        }
3470
 
3471
      costs_initialized = true;
3472
    }
3473
 
3474
  STRIP_NOPS (expr);
3475
 
3476
  if (SSA_VAR_P (expr))
3477
    return 0;
3478
 
3479
  if (TREE_INVARIANT (expr))
3480
    {
3481
      if (TREE_CODE (expr) == INTEGER_CST)
3482
        return integer_cost;
3483
 
3484
      if (TREE_CODE (expr) == ADDR_EXPR)
3485
        {
3486
          tree obj = TREE_OPERAND (expr, 0);
3487
 
3488
          if (TREE_CODE (obj) == VAR_DECL
3489
              || TREE_CODE (obj) == PARM_DECL
3490
              || TREE_CODE (obj) == RESULT_DECL)
3491
            return symbol_cost;
3492
        }
3493
 
3494
      return address_cost;
3495
    }
3496
 
3497
  switch (TREE_CODE (expr))
3498
    {
3499
    case PLUS_EXPR:
3500
    case MINUS_EXPR:
3501
    case MULT_EXPR:
3502
      op0 = TREE_OPERAND (expr, 0);
3503
      op1 = TREE_OPERAND (expr, 1);
3504
      STRIP_NOPS (op0);
3505
      STRIP_NOPS (op1);
3506
 
3507
      if (is_gimple_val (op0))
3508
        cost0 = 0;
3509
      else
3510
        cost0 = force_expr_to_var_cost (op0);
3511
 
3512
      if (is_gimple_val (op1))
3513
        cost1 = 0;
3514
      else
3515
        cost1 = force_expr_to_var_cost (op1);
3516
 
3517
      break;
3518
 
3519
    default:
3520
      /* Just an arbitrary value, FIXME.  */
3521
      return target_spill_cost;
3522
    }
3523
 
3524
  mode = TYPE_MODE (TREE_TYPE (expr));
3525
  switch (TREE_CODE (expr))
3526
    {
3527
    case PLUS_EXPR:
3528
    case MINUS_EXPR:
3529
      cost = add_cost (mode);
3530
      break;
3531
 
3532
    case MULT_EXPR:
3533
      if (cst_and_fits_in_hwi (op0))
3534
        cost = multiply_by_cost (int_cst_value (op0), mode);
3535
      else if (cst_and_fits_in_hwi (op1))
3536
        cost = multiply_by_cost (int_cst_value (op1), mode);
3537
      else
3538
        return target_spill_cost;
3539
      break;
3540
 
3541
    default:
3542
      gcc_unreachable ();
3543
    }
3544
 
3545
  cost += cost0;
3546
  cost += cost1;
3547
 
3548
  /* Bound the cost by target_spill_cost.  The parts of complicated
3549
     computations often are either loop invariant or at least can
3550
     be shared between several iv uses, so letting this grow without
3551
     limits would not give reasonable results.  */
3552
  return cost < target_spill_cost ? cost : target_spill_cost;
3553
}
3554
 
3555
/* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3556
   invariants the computation depends on.  */
3557
 
3558
static unsigned
3559
force_var_cost (struct ivopts_data *data,
3560
                tree expr, bitmap *depends_on)
3561
{
3562
  if (depends_on)
3563
    {
3564
      fd_ivopts_data = data;
3565
      walk_tree (&expr, find_depends, depends_on, NULL);
3566
    }
3567
 
3568
  return force_expr_to_var_cost (expr);
3569
}
3570
 
3571
/* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3572
   value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3573
   to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3574
   invariants the computation depends on.  */
3575
 
3576
static unsigned
3577
split_address_cost (struct ivopts_data *data,
3578
                    tree addr, bool *symbol_present, bool *var_present,
3579
                    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3580
{
3581
  tree core;
3582
  HOST_WIDE_INT bitsize;
3583
  HOST_WIDE_INT bitpos;
3584
  tree toffset;
3585
  enum machine_mode mode;
3586
  int unsignedp, volatilep;
3587
 
3588
  core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3589
                              &unsignedp, &volatilep, false);
3590
 
3591
  if (toffset != 0
3592
      || bitpos % BITS_PER_UNIT != 0
3593
      || TREE_CODE (core) != VAR_DECL)
3594
    {
3595
      *symbol_present = false;
3596
      *var_present = true;
3597
      fd_ivopts_data = data;
3598
      walk_tree (&addr, find_depends, depends_on, NULL);
3599
      return target_spill_cost;
3600
    }
3601
 
3602
  *offset += bitpos / BITS_PER_UNIT;
3603
  if (TREE_STATIC (core)
3604
      || DECL_EXTERNAL (core))
3605
    {
3606
      *symbol_present = true;
3607
      *var_present = false;
3608
      return 0;
3609
    }
3610
 
3611
  *symbol_present = false;
3612
  *var_present = true;
3613
  return 0;
3614
}
3615
 
3616
/* Estimates cost of expressing difference of addresses E1 - E2 as
3617
   var + symbol + offset.  The value of offset is added to OFFSET,
3618
   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3619
   part is missing.  DEPENDS_ON is a set of the invariants the computation
3620
   depends on.  */
3621
 
3622
static unsigned
3623
ptr_difference_cost (struct ivopts_data *data,
3624
                     tree e1, tree e2, bool *symbol_present, bool *var_present,
3625
                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3626
{
3627
  HOST_WIDE_INT diff = 0;
3628
  unsigned cost;
3629
 
3630
  gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3631
 
3632
  if (ptr_difference_const (e1, e2, &diff))
3633
    {
3634
      *offset += diff;
3635
      *symbol_present = false;
3636
      *var_present = false;
3637
      return 0;
3638
    }
3639
 
3640
  if (e2 == integer_zero_node)
3641
    return split_address_cost (data, TREE_OPERAND (e1, 0),
3642
                               symbol_present, var_present, offset, depends_on);
3643
 
3644
  *symbol_present = false;
3645
  *var_present = true;
3646
 
3647
  cost = force_var_cost (data, e1, depends_on);
3648
  cost += force_var_cost (data, e2, depends_on);
3649
  cost += add_cost (Pmode);
3650
 
3651
  return cost;
3652
}
3653
 
3654
/* Estimates cost of expressing difference E1 - E2 as
3655
   var + symbol + offset.  The value of offset is added to OFFSET,
3656
   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3657
   part is missing.  DEPENDS_ON is a set of the invariants the computation
3658
   depends on.  */
3659
 
3660
static unsigned
3661
difference_cost (struct ivopts_data *data,
3662
                 tree e1, tree e2, bool *symbol_present, bool *var_present,
3663
                 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3664
{
3665
  unsigned cost;
3666
  enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3667
  unsigned HOST_WIDE_INT off1, off2;
3668
 
3669
  e1 = strip_offset (e1, &off1);
3670
  e2 = strip_offset (e2, &off2);
3671
  *offset += off1 - off2;
3672
 
3673
  STRIP_NOPS (e1);
3674
  STRIP_NOPS (e2);
3675
 
3676
  if (TREE_CODE (e1) == ADDR_EXPR)
3677
    return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3678
                                depends_on);
3679
  *symbol_present = false;
3680
 
3681
  if (operand_equal_p (e1, e2, 0))
3682
    {
3683
      *var_present = false;
3684
      return 0;
3685
    }
3686
  *var_present = true;
3687
  if (zero_p (e2))
3688
    return force_var_cost (data, e1, depends_on);
3689
 
3690
  if (zero_p (e1))
3691
    {
3692
      cost = force_var_cost (data, e2, depends_on);
3693
      cost += multiply_by_cost (-1, mode);
3694
 
3695
      return cost;
3696
    }
3697
 
3698
  cost = force_var_cost (data, e1, depends_on);
3699
  cost += force_var_cost (data, e2, depends_on);
3700
  cost += add_cost (mode);
3701
 
3702
  return cost;
3703
}
3704
 
3705
/* Determines the cost of the computation by that USE is expressed
3706
   from induction variable CAND.  If ADDRESS_P is true, we just need
3707
   to create an address from it, otherwise we want to get it into
3708
   register.  A set of invariants we depend on is stored in
3709
   DEPENDS_ON.  AT is the statement at that the value is computed.  */
3710
 
3711
static unsigned
3712
get_computation_cost_at (struct ivopts_data *data,
3713
                         struct iv_use *use, struct iv_cand *cand,
3714
                         bool address_p, bitmap *depends_on, tree at)
3715
{
3716
  tree ubase = use->iv->base, ustep = use->iv->step;
3717
  tree cbase, cstep;
3718
  tree utype = TREE_TYPE (ubase), ctype;
3719
  unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3720
  HOST_WIDE_INT ratio, aratio;
3721
  bool var_present, symbol_present;
3722
  unsigned cost = 0, n_sums;
3723
 
3724
  *depends_on = NULL;
3725
 
3726
  /* Only consider real candidates.  */
3727
  if (!cand->iv)
3728
    return INFTY;
3729
 
3730
  cbase = cand->iv->base;
3731
  cstep = cand->iv->step;
3732
  ctype = TREE_TYPE (cbase);
3733
 
3734
  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3735
    {
3736
      /* We do not have a precision to express the values of use.  */
3737
      return INFTY;
3738
    }
3739
 
3740
  if (address_p)
3741
    {
3742
      /* Do not try to express address of an object with computation based
3743
         on address of a different object.  This may cause problems in rtl
3744
         level alias analysis (that does not expect this to be happening,
3745
         as this is illegal in C), and would be unlikely to be useful
3746
         anyway.  */
3747
      if (use->iv->base_object
3748
          && cand->iv->base_object
3749
          && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3750
        return INFTY;
3751
    }
3752
 
3753
  if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3754
    {
3755
      /* TODO -- add direct handling of this case.  */
3756
      goto fallback;
3757
    }
3758
 
3759
  /* CSTEPI is removed from the offset in case statement is after the
3760
     increment.  If the step is not constant, we use zero instead.
3761
     This is a bit imprecise (there is the extra addition), but
3762
     redundancy elimination is likely to transform the code so that
3763
     it uses value of the variable before increment anyway,
3764
     so it is not that much unrealistic.  */
3765
  if (cst_and_fits_in_hwi (cstep))
3766
    cstepi = int_cst_value (cstep);
3767
  else
3768
    cstepi = 0;
3769
 
3770
  if (cst_and_fits_in_hwi (ustep)
3771
      && cst_and_fits_in_hwi (cstep))
3772
    {
3773
      ustepi = int_cst_value (ustep);
3774
 
3775
      if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3776
        return INFTY;
3777
    }
3778
  else
3779
    {
3780
      tree rat;
3781
 
3782
      rat = constant_multiple_of (utype, ustep, cstep);
3783
 
3784
      if (!rat)
3785
        return INFTY;
3786
 
3787
      if (cst_and_fits_in_hwi (rat))
3788
        ratio = int_cst_value (rat);
3789
      else if (integer_onep (rat))
3790
        ratio = 1;
3791
      else if (integer_all_onesp (rat))
3792
        ratio = -1;
3793
      else
3794
        return INFTY;
3795
    }
3796
 
3797
  /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3798
     or ratio == 1, it is better to handle this like
3799
 
3800
     ubase - ratio * cbase + ratio * var
3801
 
3802
     (also holds in the case ratio == -1, TODO.  */
3803
 
3804
  if (cst_and_fits_in_hwi (cbase))
3805
    {
3806
      offset = - ratio * int_cst_value (cbase);
3807
      cost += difference_cost (data,
3808
                               ubase, integer_zero_node,
3809
                               &symbol_present, &var_present, &offset,
3810
                               depends_on);
3811
    }
3812
  else if (ratio == 1)
3813
    {
3814
      cost += difference_cost (data,
3815
                               ubase, cbase,
3816
                               &symbol_present, &var_present, &offset,
3817
                               depends_on);
3818
    }
3819
  else
3820
    {
3821
      cost += force_var_cost (data, cbase, depends_on);
3822
      cost += add_cost (TYPE_MODE (ctype));
3823
      cost += difference_cost (data,
3824
                               ubase, integer_zero_node,
3825
                               &symbol_present, &var_present, &offset,
3826
                               depends_on);
3827
    }
3828
 
3829
  /* If we are after the increment, the value of the candidate is higher by
3830
     one iteration.  */
3831
  if (stmt_after_increment (data->current_loop, cand, at))
3832
    offset -= ratio * cstepi;
3833
 
3834
  /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3835
     (symbol/var/const parts may be omitted).  If we are looking for an address,
3836
     find the cost of addressing this.  */
3837
  if (address_p)
3838
    return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3839
 
3840
  /* Otherwise estimate the costs for computing the expression.  */
3841
  aratio = ratio > 0 ? ratio : -ratio;
3842
  if (!symbol_present && !var_present && !offset)
3843
    {
3844
      if (ratio != 1)
3845
        cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3846
 
3847
      return cost;
3848
    }
3849
 
3850
  if (aratio != 1)
3851
    cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3852
 
3853
  n_sums = 1;
3854
  if (var_present
3855
      /* Symbol + offset should be compile-time computable.  */
3856
      && (symbol_present || offset))
3857
    n_sums++;
3858
 
3859
  return cost + n_sums * add_cost (TYPE_MODE (ctype));
3860
 
3861
fallback:
3862
  {
3863
    /* Just get the expression, expand it and measure the cost.  */
3864
    tree comp = get_computation_at (data->current_loop, use, cand, at);
3865
 
3866
    if (!comp)
3867
      return INFTY;
3868
 
3869
    if (address_p)
3870
      comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3871
 
3872
    return computation_cost (comp);
3873
  }
3874
}
3875
 
3876
/* Determines the cost of the computation by that USE is expressed
3877
   from induction variable CAND.  If ADDRESS_P is true, we just need
3878
   to create an address from it, otherwise we want to get it into
3879
   register.  A set of invariants we depend on is stored in
3880
   DEPENDS_ON.  */
3881
 
3882
static unsigned
3883
get_computation_cost (struct ivopts_data *data,
3884
                      struct iv_use *use, struct iv_cand *cand,
3885
                      bool address_p, bitmap *depends_on)
3886
{
3887
  return get_computation_cost_at (data,
3888
                                  use, cand, address_p, depends_on, use->stmt);
3889
}
3890
 
3891
/* Determines cost of basing replacement of USE on CAND in a generic
3892
   expression.  */
3893
 
3894
static bool
3895
determine_use_iv_cost_generic (struct ivopts_data *data,
3896
                               struct iv_use *use, struct iv_cand *cand)
3897
{
3898
  bitmap depends_on;
3899
  unsigned cost;
3900
 
3901
  /* The simple case first -- if we need to express value of the preserved
3902
     original biv, the cost is 0.  This also prevents us from counting the
3903
     cost of increment twice -- once at this use and once in the cost of
3904
     the candidate.  */
3905
  if (cand->pos == IP_ORIGINAL
3906
      && cand->incremented_at == use->stmt)
3907
    {
3908
      set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3909
      return true;
3910
    }
3911
 
3912
  cost = get_computation_cost (data, use, cand, false, &depends_on);
3913
  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3914
 
3915
  return cost != INFTY;
3916
}
3917
 
3918
/* Determines cost of basing replacement of USE on CAND in an address.  */
3919
 
3920
static bool
3921
determine_use_iv_cost_address (struct ivopts_data *data,
3922
                               struct iv_use *use, struct iv_cand *cand)
3923
{
3924
  bitmap depends_on;
3925
  unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3926
 
3927
  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3928
 
3929
  return cost != INFTY;
3930
}
3931
 
3932
/* Computes value of induction variable IV in iteration NITER.  */
3933
 
3934
static tree
3935
iv_value (struct iv *iv, tree niter)
3936
{
3937
  tree val;
3938
  tree type = TREE_TYPE (iv->base);
3939
 
3940
  niter = fold_convert (type, niter);
3941
  val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3942
 
3943
  return fold_build2 (PLUS_EXPR, type, iv->base, val);
3944
}
3945
 
3946
/* Computes value of candidate CAND at position AT in iteration NITER.  */
3947
 
3948
static tree
3949
cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3950
{
3951
  tree val = iv_value (cand->iv, niter);
3952
  tree type = TREE_TYPE (cand->iv->base);
3953
 
3954
  if (stmt_after_increment (loop, cand, at))
3955
    val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3956
 
3957
  return val;
3958
}
3959
 
3960
/* Returns period of induction variable iv.  */
3961
 
3962
static tree
3963
iv_period (struct iv *iv)
3964
{
3965
  tree step = iv->step, period, type;
3966
  tree pow2div;
3967
 
3968
  gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3969
 
3970
  /* Period of the iv is gcd (step, type range).  Since type range is power
3971
     of two, it suffices to determine the maximum power of two that divides
3972
     step.  */
3973
  pow2div = num_ending_zeros (step);
3974
  type = unsigned_type_for (TREE_TYPE (step));
3975
 
3976
  period = build_low_bits_mask (type,
3977
                                (TYPE_PRECISION (type)
3978
                                 - tree_low_cst (pow2div, 1)));
3979
 
3980
  return period;
3981
}
3982
 
3983
/* Returns the comparison operator used when eliminating the iv USE.  */
3984
 
3985
static enum tree_code
3986
iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3987
{
3988
  struct loop *loop = data->current_loop;
3989
  basic_block ex_bb;
3990
  edge exit;
3991
 
3992
  ex_bb = bb_for_stmt (use->stmt);
3993
  exit = EDGE_SUCC (ex_bb, 0);
3994
  if (flow_bb_inside_loop_p (loop, exit->dest))
3995
    exit = EDGE_SUCC (ex_bb, 1);
3996
 
3997
  return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3998
}
3999
 
4000
/* Check whether it is possible to express the condition in USE by comparison
4001
   of candidate CAND.  If so, store the value compared with to BOUND.  */
4002
 
4003
static bool
4004
may_eliminate_iv (struct ivopts_data *data,
4005
                  struct iv_use *use, struct iv_cand *cand, tree *bound)
4006
{
4007
  basic_block ex_bb;
4008
  edge exit;
4009
  tree nit, nit_type;
4010
  tree wider_type, period, per_type;
4011
  struct loop *loop = data->current_loop;
4012
 
4013
  if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4014
    return false;
4015
 
4016
  /* For now works only for exits that dominate the loop latch.  TODO -- extend
4017
     for other conditions inside loop body.  */
4018
  ex_bb = bb_for_stmt (use->stmt);
4019
  if (use->stmt != last_stmt (ex_bb)
4020
      || TREE_CODE (use->stmt) != COND_EXPR)
4021
    return false;
4022
  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4023
    return false;
4024
 
4025
  exit = EDGE_SUCC (ex_bb, 0);
4026
  if (flow_bb_inside_loop_p (loop, exit->dest))
4027
    exit = EDGE_SUCC (ex_bb, 1);
4028
  if (flow_bb_inside_loop_p (loop, exit->dest))
4029
    return false;
4030
 
4031
  nit = niter_for_exit (data, exit);
4032
  if (!nit)
4033
    return false;
4034
 
4035
  nit_type = TREE_TYPE (nit);
4036
 
4037
  /* Determine whether we may use the variable to test whether niter iterations
4038
     elapsed.  This is the case iff the period of the induction variable is
4039
     greater than the number of iterations.  */
4040
  period = iv_period (cand->iv);
4041
  if (!period)
4042
    return false;
4043
  per_type = TREE_TYPE (period);
4044
 
4045
  wider_type = TREE_TYPE (period);
4046
  if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4047
    wider_type = per_type;
4048
  else
4049
    wider_type = nit_type;
4050
 
4051
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4052
                                      fold_convert (wider_type, period),
4053
                                      fold_convert (wider_type, nit))))
4054
    return false;
4055
 
4056
  *bound = cand_value_at (loop, cand, use->stmt, nit);
4057
  return true;
4058
}
4059
 
4060
/* Determines cost of basing replacement of USE on CAND in a condition.  */
4061
 
4062
static bool
4063
determine_use_iv_cost_condition (struct ivopts_data *data,
4064
                                 struct iv_use *use, struct iv_cand *cand)
4065
{
4066
  tree bound = NULL_TREE, op, cond;
4067
  bitmap depends_on = NULL;
4068
  unsigned cost;
4069
 
4070
  /* Only consider real candidates.  */
4071
  if (!cand->iv)
4072
    {
4073
      set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4074
      return false;
4075
    }
4076
 
4077
  if (may_eliminate_iv (data, use, cand, &bound))
4078
    {
4079
      cost = force_var_cost (data, bound, &depends_on);
4080
 
4081
      set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4082
      return cost != INFTY;
4083
    }
4084
 
4085
  /* The induction variable elimination failed; just express the original
4086
     giv.  If it is compared with an invariant, note that we cannot get
4087
     rid of it.  */
4088
  cost = get_computation_cost (data, use, cand, false, &depends_on);
4089
 
4090
  cond = *use->op_p;
4091
  if (TREE_CODE (cond) != SSA_NAME)
4092
    {
4093
      op = TREE_OPERAND (cond, 0);
4094
      if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4095
        op = TREE_OPERAND (cond, 1);
4096
      if (TREE_CODE (op) == SSA_NAME)
4097
        {
4098
          op = get_iv (data, op)->base;
4099
          fd_ivopts_data = data;
4100
          walk_tree (&op, find_depends, &depends_on, NULL);
4101
        }
4102
    }
4103
 
4104
  set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4105
  return cost != INFTY;
4106
}
4107
 
4108
/* Checks whether it is possible to replace the final value of USE by
4109
   a direct computation.  If so, the formula is stored to *VALUE.  */
4110
 
4111
static bool
4112
may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4113
                         tree *value)
4114
{
4115
  struct loop *loop = data->current_loop;
4116
  edge exit;
4117
  tree nit;
4118
 
4119
  exit = single_dom_exit (loop);
4120
  if (!exit)
4121
    return false;
4122
 
4123
  gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4124
                              bb_for_stmt (use->stmt)));
4125
 
4126
  nit = niter_for_single_dom_exit (data);
4127
  if (!nit)
4128
    return false;
4129
 
4130
  *value = iv_value (use->iv, nit);
4131
 
4132
  return true;
4133
}
4134
 
4135
/* Determines cost of replacing final value of USE using CAND.  */
4136
 
4137
static bool
4138
determine_use_iv_cost_outer (struct ivopts_data *data,
4139
                             struct iv_use *use, struct iv_cand *cand)
4140
{
4141
  bitmap depends_on;
4142
  unsigned cost;
4143
  edge exit;
4144
  tree value = NULL_TREE;
4145
  struct loop *loop = data->current_loop;
4146
 
4147
  /* The simple case first -- if we need to express value of the preserved
4148
     original biv, the cost is 0.  This also prevents us from counting the
4149
     cost of increment twice -- once at this use and once in the cost of
4150
     the candidate.  */
4151
  if (cand->pos == IP_ORIGINAL
4152
      && cand->incremented_at == use->stmt)
4153
    {
4154
      set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4155
      return true;
4156
    }
4157
 
4158
  if (!cand->iv)
4159
    {
4160
      if (!may_replace_final_value (data, use, &value))
4161
        {
4162
          set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4163
          return false;
4164
        }
4165
 
4166
      depends_on = NULL;
4167
      cost = force_var_cost (data, value, &depends_on);
4168
 
4169
      cost /= AVG_LOOP_NITER (loop);
4170
 
4171
      set_use_iv_cost (data, use, cand, cost, depends_on, value);
4172
      return cost != INFTY;
4173
    }
4174
 
4175
  exit = single_dom_exit (loop);
4176
  if (exit)
4177
    {
4178
      /* If there is just a single exit, we may use value of the candidate
4179
         after we take it to determine the value of use.  */
4180
      cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4181
                                      last_stmt (exit->src));
4182
      if (cost != INFTY)
4183
        cost /= AVG_LOOP_NITER (loop);
4184
    }
4185
  else
4186
    {
4187
      /* Otherwise we just need to compute the iv.  */
4188
      cost = get_computation_cost (data, use, cand, false, &depends_on);
4189
    }
4190
 
4191
  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4192
 
4193
  return cost != INFTY;
4194
}
4195
 
4196
/* Determines cost of basing replacement of USE on CAND.  Returns false
4197
   if USE cannot be based on CAND.  */
4198
 
4199
static bool
4200
determine_use_iv_cost (struct ivopts_data *data,
4201
                       struct iv_use *use, struct iv_cand *cand)
4202
{
4203
  switch (use->type)
4204
    {
4205
    case USE_NONLINEAR_EXPR:
4206
      return determine_use_iv_cost_generic (data, use, cand);
4207
 
4208
    case USE_OUTER:
4209
      return determine_use_iv_cost_outer (data, use, cand);
4210
 
4211
    case USE_ADDRESS:
4212
      return determine_use_iv_cost_address (data, use, cand);
4213
 
4214
    case USE_COMPARE:
4215
      return determine_use_iv_cost_condition (data, use, cand);
4216
 
4217
    default:
4218
      gcc_unreachable ();
4219
    }
4220
}
4221
 
4222
/* Determines costs of basing the use of the iv on an iv candidate.  */
4223
 
4224
static void
4225
determine_use_iv_costs (struct ivopts_data *data)
4226
{
4227
  unsigned i, j;
4228
  struct iv_use *use;
4229
  struct iv_cand *cand;
4230
  bitmap to_clear = BITMAP_ALLOC (NULL);
4231
 
4232
  alloc_use_cost_map (data);
4233
 
4234
  for (i = 0; i < n_iv_uses (data); i++)
4235
    {
4236
      use = iv_use (data, i);
4237
 
4238
      if (data->consider_all_candidates)
4239
        {
4240
          for (j = 0; j < n_iv_cands (data); j++)
4241
            {
4242
              cand = iv_cand (data, j);
4243
              determine_use_iv_cost (data, use, cand);
4244
            }
4245
        }
4246
      else
4247
        {
4248
          bitmap_iterator bi;
4249
 
4250
          EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4251
            {
4252
              cand = iv_cand (data, j);
4253
              if (!determine_use_iv_cost (data, use, cand))
4254
                bitmap_set_bit (to_clear, j);
4255
            }
4256
 
4257
          /* Remove the candidates for that the cost is infinite from
4258
             the list of related candidates.  */
4259
          bitmap_and_compl_into (use->related_cands, to_clear);
4260
          bitmap_clear (to_clear);
4261
        }
4262
    }
4263
 
4264
  BITMAP_FREE (to_clear);
4265
 
4266
  if (dump_file && (dump_flags & TDF_DETAILS))
4267
    {
4268
      fprintf (dump_file, "Use-candidate costs:\n");
4269
 
4270
      for (i = 0; i < n_iv_uses (data); i++)
4271
        {
4272
          use = iv_use (data, i);
4273
 
4274
          fprintf (dump_file, "Use %d:\n", i);
4275
          fprintf (dump_file, "  cand\tcost\tdepends on\n");
4276
          for (j = 0; j < use->n_map_members; j++)
4277
            {
4278
              if (!use->cost_map[j].cand
4279
                  || use->cost_map[j].cost == INFTY)
4280
                continue;
4281
 
4282
              fprintf (dump_file, "  %d\t%d\t",
4283
                       use->cost_map[j].cand->id,
4284
                       use->cost_map[j].cost);
4285
              if (use->cost_map[j].depends_on)
4286
                bitmap_print (dump_file,
4287
                              use->cost_map[j].depends_on, "","");
4288
              fprintf (dump_file, "\n");
4289
            }
4290
 
4291
          fprintf (dump_file, "\n");
4292
        }
4293
      fprintf (dump_file, "\n");
4294
    }
4295
}
4296
 
4297
/* Determines cost of the candidate CAND.  */
4298
 
4299
static void
4300
determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4301
{
4302
  unsigned cost_base, cost_step;
4303
  tree base;
4304
 
4305
  if (!cand->iv)
4306
    {
4307
      cand->cost = 0;
4308
      return;
4309
    }
4310
 
4311
  /* There are two costs associated with the candidate -- its increment
4312
     and its initialization.  The second is almost negligible for any loop
4313
     that rolls enough, so we take it just very little into account.  */
4314
 
4315
  base = cand->iv->base;
4316
  cost_base = force_var_cost (data, base, NULL);
4317
  cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4318
 
4319
  cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4320
 
4321
  /* Prefer the original iv unless we may gain something by replacing it;
4322
     this is not really relevant for artificial ivs created by other
4323
     passes.  */
4324
  if (cand->pos == IP_ORIGINAL
4325
      && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4326
    cand->cost--;
4327
 
4328
  /* Prefer not to insert statements into latch unless there are some
4329
     already (so that we do not create unnecessary jumps).  */
4330
  if (cand->pos == IP_END
4331
      && empty_block_p (ip_end_pos (data->current_loop)))
4332
    cand->cost++;
4333
}
4334
 
4335
/* Determines costs of computation of the candidates.  */
4336
 
4337
static void
4338
determine_iv_costs (struct ivopts_data *data)
4339
{
4340
  unsigned i;
4341
 
4342
  if (dump_file && (dump_flags & TDF_DETAILS))
4343
    {
4344
      fprintf (dump_file, "Candidate costs:\n");
4345
      fprintf (dump_file, "  cand\tcost\n");
4346
    }
4347
 
4348
  for (i = 0; i < n_iv_cands (data); i++)
4349
    {
4350
      struct iv_cand *cand = iv_cand (data, i);
4351
 
4352
      determine_iv_cost (data, cand);
4353
 
4354
      if (dump_file && (dump_flags & TDF_DETAILS))
4355
        fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4356
    }
4357
 
4358
if (dump_file && (dump_flags & TDF_DETAILS))
4359
      fprintf (dump_file, "\n");
4360
}
4361
 
4362
/* Calculates cost for having SIZE induction variables.  */
4363
 
4364
static unsigned
4365
ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4366
{
4367
  return global_cost_for_size (size,
4368
                               loop_data (data->current_loop)->regs_used,
4369
                               n_iv_uses (data));
4370
}
4371
 
4372
/* For each size of the induction variable set determine the penalty.  */
4373
 
4374
static void
4375
determine_set_costs (struct ivopts_data *data)
4376
{
4377
  unsigned j, n;
4378
  tree phi, op;
4379
  struct loop *loop = data->current_loop;
4380
  bitmap_iterator bi;
4381
 
4382
  /* We use the following model (definitely improvable, especially the
4383
     cost function -- TODO):
4384
 
4385
     We estimate the number of registers available (using MD data), name it A.
4386
 
4387
     We estimate the number of registers used by the loop, name it U.  This
4388
     number is obtained as the number of loop phi nodes (not counting virtual
4389
     registers and bivs) + the number of variables from outside of the loop.
4390
 
4391
     We set a reserve R (free regs that are used for temporary computations,
4392
     etc.).  For now the reserve is a constant 3.
4393
 
4394
     Let I be the number of induction variables.
4395
 
4396
     -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4397
        make a lot of ivs without a reason).
4398
     -- if A - R < U + I <= A, the cost is I * PRES_COST
4399
     -- if U + I > A, the cost is I * PRES_COST and
4400
        number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4401
 
4402
  if (dump_file && (dump_flags & TDF_DETAILS))
4403
    {
4404
      fprintf (dump_file, "Global costs:\n");
4405
      fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4406
      fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4407
      fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4408
      fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4409
    }
4410
 
4411
  n = 0;
4412
  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4413
    {
4414
      op = PHI_RESULT (phi);
4415
 
4416
      if (!is_gimple_reg (op))
4417
        continue;
4418
 
4419
      if (get_iv (data, op))
4420
        continue;
4421
 
4422
      n++;
4423
    }
4424
 
4425
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4426
    {
4427
      struct version_info *info = ver_info (data, j);
4428
 
4429
      if (info->inv_id && info->has_nonlin_use)
4430
        n++;
4431
    }
4432
 
4433
  loop_data (loop)->regs_used = n;
4434
  if (dump_file && (dump_flags & TDF_DETAILS))
4435
    fprintf (dump_file, "  regs_used %d\n", n);
4436
 
4437
  if (dump_file && (dump_flags & TDF_DETAILS))
4438
    {
4439
      fprintf (dump_file, "  cost for size:\n");
4440
      fprintf (dump_file, "  ivs\tcost\n");
4441
      for (j = 0; j <= 2 * target_avail_regs; j++)
4442
        fprintf (dump_file, "  %d\t%d\n", j,
4443
                 ivopts_global_cost_for_size (data, j));
4444
      fprintf (dump_file, "\n");
4445
    }
4446
}
4447
 
4448
/* Returns true if A is a cheaper cost pair than B.  */
4449
 
4450
static bool
4451
cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4452
{
4453
  if (!a)
4454
    return false;
4455
 
4456
  if (!b)
4457
    return true;
4458
 
4459
  if (a->cost < b->cost)
4460
    return true;
4461
 
4462
  if (a->cost > b->cost)
4463
    return false;
4464
 
4465
  /* In case the costs are the same, prefer the cheaper candidate.  */
4466
  if (a->cand->cost < b->cand->cost)
4467
    return true;
4468
 
4469
  return false;
4470
}
4471
 
4472
/* Computes the cost field of IVS structure.  */
4473
 
4474
static void
4475
iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4476
{
4477
  unsigned cost = 0;
4478
 
4479
  cost += ivs->cand_use_cost;
4480
  cost += ivs->cand_cost;
4481
  cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4482
 
4483
  ivs->cost = cost;
4484
}
4485
 
4486
/* Remove invariants in set INVS to set IVS.  */
4487
 
4488
static void
4489
iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4490
{
4491
  bitmap_iterator bi;
4492
  unsigned iid;
4493
 
4494
  if (!invs)
4495
    return;
4496
 
4497
  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4498
    {
4499
      ivs->n_invariant_uses[iid]--;
4500
      if (ivs->n_invariant_uses[iid] == 0)
4501
        ivs->n_regs--;
4502
    }
4503
}
4504
 
4505
/* Set USE not to be expressed by any candidate in IVS.  */
4506
 
4507
static void
4508
iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4509
                 struct iv_use *use)
4510
{
4511
  unsigned uid = use->id, cid;
4512
  struct cost_pair *cp;
4513
 
4514
  cp = ivs->cand_for_use[uid];
4515
  if (!cp)
4516
    return;
4517
  cid = cp->cand->id;
4518
 
4519
  ivs->bad_uses++;
4520
  ivs->cand_for_use[uid] = NULL;
4521
  ivs->n_cand_uses[cid]--;
4522
 
4523
  if (ivs->n_cand_uses[cid] == 0)
4524
    {
4525
      bitmap_clear_bit (ivs->cands, cid);
4526
      /* Do not count the pseudocandidates.  */
4527
      if (cp->cand->iv)
4528
        ivs->n_regs--;
4529
      ivs->n_cands--;
4530
      ivs->cand_cost -= cp->cand->cost;
4531
 
4532
      iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4533
    }
4534
 
4535
  ivs->cand_use_cost -= cp->cost;
4536
 
4537
  iv_ca_set_remove_invariants (ivs, cp->depends_on);
4538
  iv_ca_recount_cost (data, ivs);
4539
}
4540
 
4541
/* Add invariants in set INVS to set IVS.  */
4542
 
4543
static void
4544
iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4545
{
4546
  bitmap_iterator bi;
4547
  unsigned iid;
4548
 
4549
  if (!invs)
4550
    return;
4551
 
4552
  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4553
    {
4554
      ivs->n_invariant_uses[iid]++;
4555
      if (ivs->n_invariant_uses[iid] == 1)
4556
        ivs->n_regs++;
4557
    }
4558
}
4559
 
4560
/* Set cost pair for USE in set IVS to CP.  */
4561
 
4562
static void
4563
iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4564
              struct iv_use *use, struct cost_pair *cp)
4565
{
4566
  unsigned uid = use->id, cid;
4567
 
4568
  if (ivs->cand_for_use[uid] == cp)
4569
    return;
4570
 
4571
  if (ivs->cand_for_use[uid])
4572
    iv_ca_set_no_cp (data, ivs, use);
4573
 
4574
  if (cp)
4575
    {
4576
      cid = cp->cand->id;
4577
 
4578
      ivs->bad_uses--;
4579
      ivs->cand_for_use[uid] = cp;
4580
      ivs->n_cand_uses[cid]++;
4581
      if (ivs->n_cand_uses[cid] == 1)
4582
        {
4583
          bitmap_set_bit (ivs->cands, cid);
4584
          /* Do not count the pseudocandidates.  */
4585
          if (cp->cand->iv)
4586
            ivs->n_regs++;
4587
          ivs->n_cands++;
4588
          ivs->cand_cost += cp->cand->cost;
4589
 
4590
          iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4591
        }
4592
 
4593
      ivs->cand_use_cost += cp->cost;
4594
      iv_ca_set_add_invariants (ivs, cp->depends_on);
4595
      iv_ca_recount_cost (data, ivs);
4596
    }
4597
}
4598
 
4599
/* Extend set IVS by expressing USE by some of the candidates in it
4600
   if possible.  */
4601
 
4602
static void
4603
iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4604
               struct iv_use *use)
4605
{
4606
  struct cost_pair *best_cp = NULL, *cp;
4607
  bitmap_iterator bi;
4608
  unsigned i;
4609
 
4610
  gcc_assert (ivs->upto >= use->id);
4611
 
4612
  if (ivs->upto == use->id)
4613
    {
4614
      ivs->upto++;
4615
      ivs->bad_uses++;
4616
    }
4617
 
4618
  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4619
    {
4620
      cp = get_use_iv_cost (data, use, iv_cand (data, i));
4621
 
4622
      if (cheaper_cost_pair (cp, best_cp))
4623
        best_cp = cp;
4624
    }
4625
 
4626
  iv_ca_set_cp (data, ivs, use, best_cp);
4627
}
4628
 
4629
/* Get cost for assignment IVS.  */
4630
 
4631
static unsigned
4632
iv_ca_cost (struct iv_ca *ivs)
4633
{
4634
  return (ivs->bad_uses ? INFTY : ivs->cost);
4635
}
4636
 
4637
/* Returns true if all dependences of CP are among invariants in IVS.  */
4638
 
4639
static bool
4640
iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4641
{
4642
  unsigned i;
4643
  bitmap_iterator bi;
4644
 
4645
  if (!cp->depends_on)
4646
    return true;
4647
 
4648
  EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4649
    {
4650
      if (ivs->n_invariant_uses[i] == 0)
4651
        return false;
4652
    }
4653
 
4654
  return true;
4655
}
4656
 
4657
/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4658
   it before NEXT_CHANGE.  */
4659
 
4660
static struct iv_ca_delta *
4661
iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4662
                 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4663
{
4664
  struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4665
 
4666
  change->use = use;
4667
  change->old_cp = old_cp;
4668
  change->new_cp = new_cp;
4669
  change->next_change = next_change;
4670
 
4671
  return change;
4672
}
4673
 
4674
/* Joins two lists of changes L1 and L2.  Destructive -- old lists
4675
   are rewritten.  */
4676
 
4677
static struct iv_ca_delta *
4678
iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4679
{
4680
  struct iv_ca_delta *last;
4681
 
4682
  if (!l2)
4683
    return l1;
4684
 
4685
  if (!l1)
4686
    return l2;
4687
 
4688
  for (last = l1; last->next_change; last = last->next_change)
4689
    continue;
4690
  last->next_change = l2;
4691
 
4692
  return l1;
4693
}
4694
 
4695
/* Returns candidate by that USE is expressed in IVS.  */
4696
 
4697
static struct cost_pair *
4698
iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4699
{
4700
  return ivs->cand_for_use[use->id];
4701
}
4702
 
4703
/* Reverse the list of changes DELTA, forming the inverse to it.  */
4704
 
4705
static struct iv_ca_delta *
4706
iv_ca_delta_reverse (struct iv_ca_delta *delta)
4707
{
4708
  struct iv_ca_delta *act, *next, *prev = NULL;
4709
  struct cost_pair *tmp;
4710
 
4711
  for (act = delta; act; act = next)
4712
    {
4713
      next = act->next_change;
4714
      act->next_change = prev;
4715
      prev = act;
4716
 
4717
      tmp = act->old_cp;
4718
      act->old_cp = act->new_cp;
4719
      act->new_cp = tmp;
4720
    }
4721
 
4722
  return prev;
4723
}
4724
 
4725
/* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4726
   reverted instead.  */
4727
 
4728
static void
4729
iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4730
                    struct iv_ca_delta *delta, bool forward)
4731
{
4732
  struct cost_pair *from, *to;
4733
  struct iv_ca_delta *act;
4734
 
4735
  if (!forward)
4736
    delta = iv_ca_delta_reverse (delta);
4737
 
4738
  for (act = delta; act; act = act->next_change)
4739
    {
4740
      from = act->old_cp;
4741
      to = act->new_cp;
4742
      gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4743
      iv_ca_set_cp (data, ivs, act->use, to);
4744
    }
4745
 
4746
  if (!forward)
4747
    iv_ca_delta_reverse (delta);
4748
}
4749
 
4750
/* Returns true if CAND is used in IVS.  */
4751
 
4752
static bool
4753
iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4754
{
4755
  return ivs->n_cand_uses[cand->id] > 0;
4756
}
4757
 
4758
/* Returns number of induction variable candidates in the set IVS.  */
4759
 
4760
static unsigned
4761
iv_ca_n_cands (struct iv_ca *ivs)
4762
{
4763
  return ivs->n_cands;
4764
}
4765
 
4766
/* Free the list of changes DELTA.  */
4767
 
4768
static void
4769
iv_ca_delta_free (struct iv_ca_delta **delta)
4770
{
4771
  struct iv_ca_delta *act, *next;
4772
 
4773
  for (act = *delta; act; act = next)
4774
    {
4775
      next = act->next_change;
4776
      free (act);
4777
    }
4778
 
4779
  *delta = NULL;
4780
}
4781
 
4782
/* Allocates new iv candidates assignment.  */
4783
 
4784
static struct iv_ca *
4785
iv_ca_new (struct ivopts_data *data)
4786
{
4787
  struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4788
 
4789
  nw->upto = 0;
4790
  nw->bad_uses = 0;
4791
  nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4792
  nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4793
  nw->cands = BITMAP_ALLOC (NULL);
4794
  nw->n_cands = 0;
4795
  nw->n_regs = 0;
4796
  nw->cand_use_cost = 0;
4797
  nw->cand_cost = 0;
4798
  nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4799
  nw->cost = 0;
4800
 
4801
  return nw;
4802
}
4803
 
4804
/* Free memory occupied by the set IVS.  */
4805
 
4806
static void
4807
iv_ca_free (struct iv_ca **ivs)
4808
{
4809
  free ((*ivs)->cand_for_use);
4810
  free ((*ivs)->n_cand_uses);
4811
  BITMAP_FREE ((*ivs)->cands);
4812
  free ((*ivs)->n_invariant_uses);
4813
  free (*ivs);
4814
  *ivs = NULL;
4815
}
4816
 
4817
/* Dumps IVS to FILE.  */
4818
 
4819
static void
4820
iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4821
{
4822
  const char *pref = "  invariants ";
4823
  unsigned i;
4824
 
4825
  fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4826
  bitmap_print (file, ivs->cands, "  candidates ","\n");
4827
 
4828
  for (i = 1; i <= data->max_inv_id; i++)
4829
    if (ivs->n_invariant_uses[i])
4830
      {
4831
        fprintf (file, "%s%d", pref, i);
4832
        pref = ", ";
4833
      }
4834
  fprintf (file, "\n");
4835
}
4836
 
4837
/* Try changing candidate in IVS to CAND for each use.  Return cost of the
4838
   new set, and store differences in DELTA.  Number of induction variables
4839
   in the new set is stored to N_IVS.  */
4840
 
4841
static unsigned
4842
iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4843
              struct iv_cand *cand, struct iv_ca_delta **delta,
4844
              unsigned *n_ivs)
4845
{
4846
  unsigned i, cost;
4847
  struct iv_use *use;
4848
  struct cost_pair *old_cp, *new_cp;
4849
 
4850
  *delta = NULL;
4851
  for (i = 0; i < ivs->upto; i++)
4852
    {
4853
      use = iv_use (data, i);
4854
      old_cp = iv_ca_cand_for_use (ivs, use);
4855
 
4856
      if (old_cp
4857
          && old_cp->cand == cand)
4858
        continue;
4859
 
4860
      new_cp = get_use_iv_cost (data, use, cand);
4861
      if (!new_cp)
4862
        continue;
4863
 
4864
      if (!iv_ca_has_deps (ivs, new_cp))
4865
        continue;
4866
 
4867
      if (!cheaper_cost_pair (new_cp, old_cp))
4868
        continue;
4869
 
4870
      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4871
    }
4872
 
4873
  iv_ca_delta_commit (data, ivs, *delta, true);
4874
  cost = iv_ca_cost (ivs);
4875
  if (n_ivs)
4876
    *n_ivs = iv_ca_n_cands (ivs);
4877
  iv_ca_delta_commit (data, ivs, *delta, false);
4878
 
4879
  return cost;
4880
}
4881
 
4882
/* Try narrowing set IVS by removing CAND.  Return the cost of
4883
   the new set and store the differences in DELTA.  */
4884
 
4885
static unsigned
4886
iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4887
              struct iv_cand *cand, struct iv_ca_delta **delta)
4888
{
4889
  unsigned i, ci;
4890
  struct iv_use *use;
4891
  struct cost_pair *old_cp, *new_cp, *cp;
4892
  bitmap_iterator bi;
4893
  struct iv_cand *cnd;
4894
  unsigned cost;
4895
 
4896
  *delta = NULL;
4897
  for (i = 0; i < n_iv_uses (data); i++)
4898
    {
4899
      use = iv_use (data, i);
4900
 
4901
      old_cp = iv_ca_cand_for_use (ivs, use);
4902
      if (old_cp->cand != cand)
4903
        continue;
4904
 
4905
      new_cp = NULL;
4906
 
4907
      if (data->consider_all_candidates)
4908
        {
4909
          EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4910
            {
4911
              if (ci == cand->id)
4912
                continue;
4913
 
4914
              cnd = iv_cand (data, ci);
4915
 
4916
              cp = get_use_iv_cost (data, use, cnd);
4917
              if (!cp)
4918
                continue;
4919
              if (!iv_ca_has_deps (ivs, cp))
4920
                continue;
4921
 
4922
              if (!cheaper_cost_pair (cp, new_cp))
4923
                continue;
4924
 
4925
              new_cp = cp;
4926
            }
4927
        }
4928
      else
4929
        {
4930
          EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4931
            {
4932
              if (ci == cand->id)
4933
                continue;
4934
 
4935
              cnd = iv_cand (data, ci);
4936
 
4937
              cp = get_use_iv_cost (data, use, cnd);
4938
              if (!cp)
4939
                continue;
4940
              if (!iv_ca_has_deps (ivs, cp))
4941
                continue;
4942
 
4943
              if (!cheaper_cost_pair (cp, new_cp))
4944
                continue;
4945
 
4946
              new_cp = cp;
4947
            }
4948
        }
4949
 
4950
      if (!new_cp)
4951
        {
4952
          iv_ca_delta_free (delta);
4953
          return INFTY;
4954
        }
4955
 
4956
      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4957
    }
4958
 
4959
  iv_ca_delta_commit (data, ivs, *delta, true);
4960
  cost = iv_ca_cost (ivs);
4961
  iv_ca_delta_commit (data, ivs, *delta, false);
4962
 
4963
  return cost;
4964
}
4965
 
4966
/* Try optimizing the set of candidates IVS by removing candidates different
4967
   from to EXCEPT_CAND from it.  Return cost of the new set, and store
4968
   differences in DELTA.  */
4969
 
4970
static unsigned
4971
iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4972
             struct iv_cand *except_cand, struct iv_ca_delta **delta)
4973
{
4974
  bitmap_iterator bi;
4975
  struct iv_ca_delta *act_delta, *best_delta;
4976
  unsigned i, best_cost, acost;
4977
  struct iv_cand *cand;
4978
 
4979
  best_delta = NULL;
4980
  best_cost = iv_ca_cost (ivs);
4981
 
4982
  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4983
    {
4984
      cand = iv_cand (data, i);
4985
 
4986
      if (cand == except_cand)
4987
        continue;
4988
 
4989
      acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4990
 
4991
      if (acost < best_cost)
4992
        {
4993
          best_cost = acost;
4994
          iv_ca_delta_free (&best_delta);
4995
          best_delta = act_delta;
4996
        }
4997
      else
4998
        iv_ca_delta_free (&act_delta);
4999
    }
5000
 
5001
  if (!best_delta)
5002
    {
5003
      *delta = NULL;
5004
      return best_cost;
5005
    }
5006
 
5007
  /* Recurse to possibly remove other unnecessary ivs.  */
5008
  iv_ca_delta_commit (data, ivs, best_delta, true);
5009
  best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5010
  iv_ca_delta_commit (data, ivs, best_delta, false);
5011
  *delta = iv_ca_delta_join (best_delta, *delta);
5012
  return best_cost;
5013
}
5014
 
5015
/* Tries to extend the sets IVS in the best possible way in order
5016
   to express the USE.  */
5017
 
5018
static bool
5019
try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5020
                  struct iv_use *use)
5021
{
5022
  unsigned best_cost, act_cost;
5023
  unsigned i;
5024
  bitmap_iterator bi;
5025
  struct iv_cand *cand;
5026
  struct iv_ca_delta *best_delta = NULL, *act_delta;
5027
  struct cost_pair *cp;
5028
 
5029
  iv_ca_add_use (data, ivs, use);
5030
  best_cost = iv_ca_cost (ivs);
5031
 
5032
  cp = iv_ca_cand_for_use (ivs, use);
5033
  if (cp)
5034
    {
5035
      best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5036
      iv_ca_set_no_cp (data, ivs, use);
5037
    }
5038
 
5039
  /* First try important candidates.  Only if it fails, try the specific ones.
5040
     Rationale -- in loops with many variables the best choice often is to use
5041
     just one generic biv.  If we added here many ivs specific to the uses,
5042
     the optimization algorithm later would be likely to get stuck in a local
5043
     minimum, thus causing us to create too many ivs.  The approach from
5044
     few ivs to more seems more likely to be successful -- starting from few
5045
     ivs, replacing an expensive use by a specific iv should always be a
5046
     win.  */
5047
  EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5048
    {
5049
      cand = iv_cand (data, i);
5050
 
5051
      if (iv_ca_cand_used_p (ivs, cand))
5052
        continue;
5053
 
5054
      cp = get_use_iv_cost (data, use, cand);
5055
      if (!cp)
5056
        continue;
5057
 
5058
      iv_ca_set_cp (data, ivs, use, cp);
5059
      act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5060
      iv_ca_set_no_cp (data, ivs, use);
5061
      act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5062
 
5063
      if (act_cost < best_cost)
5064
        {
5065
          best_cost = act_cost;
5066
 
5067
          iv_ca_delta_free (&best_delta);
5068
          best_delta = act_delta;
5069
        }
5070
      else
5071
        iv_ca_delta_free (&act_delta);
5072
    }
5073
 
5074
  if (best_cost == INFTY)
5075
    {
5076
      for (i = 0; i < use->n_map_members; i++)
5077
        {
5078
          cp = use->cost_map + i;
5079
          cand = cp->cand;
5080
          if (!cand)
5081
            continue;
5082
 
5083
          /* Already tried this.  */
5084
          if (cand->important)
5085
            continue;
5086
 
5087
          if (iv_ca_cand_used_p (ivs, cand))
5088
            continue;
5089
 
5090
          act_delta = NULL;
5091
          iv_ca_set_cp (data, ivs, use, cp);
5092
          act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5093
          iv_ca_set_no_cp (data, ivs, use);
5094
          act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5095
                                       cp, act_delta);
5096
 
5097
          if (act_cost < best_cost)
5098
            {
5099
              best_cost = act_cost;
5100
 
5101
              if (best_delta)
5102
                iv_ca_delta_free (&best_delta);
5103
              best_delta = act_delta;
5104
            }
5105
          else
5106
            iv_ca_delta_free (&act_delta);
5107
        }
5108
    }
5109
 
5110
  iv_ca_delta_commit (data, ivs, best_delta, true);
5111
  iv_ca_delta_free (&best_delta);
5112
 
5113
  return (best_cost != INFTY);
5114
}
5115
 
5116
/* Finds an initial assignment of candidates to uses.  */
5117
 
5118
static struct iv_ca *
5119
get_initial_solution (struct ivopts_data *data)
5120
{
5121
  struct iv_ca *ivs = iv_ca_new (data);
5122
  unsigned i;
5123
 
5124
  for (i = 0; i < n_iv_uses (data); i++)
5125
    if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5126
      {
5127
        iv_ca_free (&ivs);
5128
        return NULL;
5129
      }
5130
 
5131
  return ivs;
5132
}
5133
 
5134
/* Tries to improve set of induction variables IVS.  */
5135
 
5136
static bool
5137
try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5138
{
5139
  unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5140
  struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5141
  struct iv_cand *cand;
5142
 
5143
  /* Try extending the set of induction variables by one.  */
5144
  for (i = 0; i < n_iv_cands (data); i++)
5145
    {
5146
      cand = iv_cand (data, i);
5147
 
5148
      if (iv_ca_cand_used_p (ivs, cand))
5149
        continue;
5150
 
5151
      acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5152
      if (!act_delta)
5153
        continue;
5154
 
5155
      /* If we successfully added the candidate and the set is small enough,
5156
         try optimizing it by removing other candidates.  */
5157
      if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5158
        {
5159
          iv_ca_delta_commit (data, ivs, act_delta, true);
5160
          acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5161
          iv_ca_delta_commit (data, ivs, act_delta, false);
5162
          act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5163
        }
5164
 
5165
      if (acost < best_cost)
5166
        {
5167
          best_cost = acost;
5168
          iv_ca_delta_free (&best_delta);
5169
          best_delta = act_delta;
5170
        }
5171
      else
5172
        iv_ca_delta_free (&act_delta);
5173
    }
5174
 
5175
  if (!best_delta)
5176
    {
5177
      /* Try removing the candidates from the set instead.  */
5178
      best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5179
 
5180
      /* Nothing more we can do.  */
5181
      if (!best_delta)
5182
        return false;
5183
    }
5184
 
5185
  iv_ca_delta_commit (data, ivs, best_delta, true);
5186
  gcc_assert (best_cost == iv_ca_cost (ivs));
5187
  iv_ca_delta_free (&best_delta);
5188
  return true;
5189
}
5190
 
5191
/* Attempts to find the optimal set of induction variables.  We do simple
5192
   greedy heuristic -- we try to replace at most one candidate in the selected
5193
   solution and remove the unused ivs while this improves the cost.  */
5194
 
5195
static struct iv_ca *
5196
find_optimal_iv_set (struct ivopts_data *data)
5197
{
5198
  unsigned i;
5199
  struct iv_ca *set;
5200
  struct iv_use *use;
5201
 
5202
  /* Get the initial solution.  */
5203
  set = get_initial_solution (data);
5204
  if (!set)
5205
    {
5206
      if (dump_file && (dump_flags & TDF_DETAILS))
5207
        fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5208
      return NULL;
5209
    }
5210
 
5211
  if (dump_file && (dump_flags & TDF_DETAILS))
5212
    {
5213
      fprintf (dump_file, "Initial set of candidates:\n");
5214
      iv_ca_dump (data, dump_file, set);
5215
    }
5216
 
5217
  while (try_improve_iv_set (data, set))
5218
    {
5219
      if (dump_file && (dump_flags & TDF_DETAILS))
5220
        {
5221
          fprintf (dump_file, "Improved to:\n");
5222
          iv_ca_dump (data, dump_file, set);
5223
        }
5224
    }
5225
 
5226
  if (dump_file && (dump_flags & TDF_DETAILS))
5227
    fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5228
 
5229
  for (i = 0; i < n_iv_uses (data); i++)
5230
    {
5231
      use = iv_use (data, i);
5232
      use->selected = iv_ca_cand_for_use (set, use)->cand;
5233
    }
5234
 
5235
  return set;
5236
}
5237
 
5238
/* Creates a new induction variable corresponding to CAND.  */
5239
 
5240
static void
5241
create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5242
{
5243
  block_stmt_iterator incr_pos;
5244
  tree base;
5245
  bool after = false;
5246
 
5247
  if (!cand->iv)
5248
    return;
5249
 
5250
  switch (cand->pos)
5251
    {
5252
    case IP_NORMAL:
5253
      incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5254
      break;
5255
 
5256
    case IP_END:
5257
      incr_pos = bsi_last (ip_end_pos (data->current_loop));
5258
      after = true;
5259
      break;
5260
 
5261
    case IP_ORIGINAL:
5262
      /* Mark that the iv is preserved.  */
5263
      name_info (data, cand->var_before)->preserve_biv = true;
5264
      name_info (data, cand->var_after)->preserve_biv = true;
5265
 
5266
      /* Rewrite the increment so that it uses var_before directly.  */
5267
      find_interesting_uses_op (data, cand->var_after)->selected = cand;
5268
 
5269
      return;
5270
    }
5271
 
5272
  gimple_add_tmp_var (cand->var_before);
5273
  add_referenced_tmp_var (cand->var_before);
5274
 
5275
  base = unshare_expr (cand->iv->base);
5276
 
5277
  create_iv (base, unshare_expr (cand->iv->step),
5278
             cand->var_before, data->current_loop,
5279
             &incr_pos, after, &cand->var_before, &cand->var_after);
5280
}
5281
 
5282
/* Creates new induction variables described in SET.  */
5283
 
5284
static void
5285
create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5286
{
5287
  unsigned i;
5288
  struct iv_cand *cand;
5289
  bitmap_iterator bi;
5290
 
5291
  EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5292
    {
5293
      cand = iv_cand (data, i);
5294
      create_new_iv (data, cand);
5295
    }
5296
}
5297
 
5298
/* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5299
   is true, remove also the ssa name defined by the statement.  */
5300
 
5301
static void
5302
remove_statement (tree stmt, bool including_defined_name)
5303
{
5304
  if (TREE_CODE (stmt) == PHI_NODE)
5305
    {
5306
      if (!including_defined_name)
5307
        {
5308
          /* Prevent the ssa name defined by the statement from being removed.  */
5309
          SET_PHI_RESULT (stmt, NULL);
5310
        }
5311
      remove_phi_node (stmt, NULL_TREE);
5312
    }
5313
  else
5314
    {
5315
      block_stmt_iterator bsi = bsi_for_stmt (stmt);
5316
 
5317
      bsi_remove (&bsi);
5318
    }
5319
}
5320
 
5321
/* Rewrites USE (definition of iv used in a nonlinear expression)
5322
   using candidate CAND.  */
5323
 
5324
static void
5325
rewrite_use_nonlinear_expr (struct ivopts_data *data,
5326
                            struct iv_use *use, struct iv_cand *cand)
5327
{
5328
  tree comp;
5329
  tree op, stmts, tgt, ass;
5330
  block_stmt_iterator bsi, pbsi;
5331
 
5332
  /* An important special case -- if we are asked to express value of
5333
     the original iv by itself, just exit; there is no need to
5334
     introduce a new computation (that might also need casting the
5335
     variable to unsigned and back).  */
5336
  if (cand->pos == IP_ORIGINAL
5337
      && cand->incremented_at == use->stmt)
5338
    {
5339
      tree step, ctype, utype;
5340
      enum tree_code incr_code = PLUS_EXPR;
5341
 
5342
      gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5343
      gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5344
 
5345
      step = cand->iv->step;
5346
      ctype = TREE_TYPE (step);
5347
      utype = TREE_TYPE (cand->var_after);
5348
      if (TREE_CODE (step) == NEGATE_EXPR)
5349
        {
5350
          incr_code = MINUS_EXPR;
5351
          step = TREE_OPERAND (step, 0);
5352
        }
5353
 
5354
      /* Check whether we may leave the computation unchanged.
5355
         This is the case only if it does not rely on other
5356
         computations in the loop -- otherwise, the computation
5357
         we rely upon may be removed in remove_unused_ivs,
5358
         thus leading to ICE.  */
5359
      op = TREE_OPERAND (use->stmt, 1);
5360
      if (TREE_CODE (op) == PLUS_EXPR
5361
          || TREE_CODE (op) == MINUS_EXPR)
5362
        {
5363
          if (TREE_OPERAND (op, 0) == cand->var_before)
5364
            op = TREE_OPERAND (op, 1);
5365
          else if (TREE_CODE (op) == PLUS_EXPR
5366
                   && TREE_OPERAND (op, 1) == cand->var_before)
5367
            op = TREE_OPERAND (op, 0);
5368
          else
5369
            op = NULL_TREE;
5370
        }
5371
      else
5372
        op = NULL_TREE;
5373
 
5374
      if (op
5375
          && (TREE_CODE (op) == INTEGER_CST
5376
              || operand_equal_p (op, step, 0)))
5377
        return;
5378
 
5379
      /* Otherwise, add the necessary computations to express
5380
         the iv.  */
5381
      op = fold_convert (ctype, cand->var_before);
5382
      comp = fold_convert (utype,
5383
                           build2 (incr_code, ctype, op,
5384
                                   unshare_expr (step)));
5385
    }
5386
  else
5387
    comp = get_computation (data->current_loop, use, cand);
5388
 
5389
  switch (TREE_CODE (use->stmt))
5390
    {
5391
    case PHI_NODE:
5392
      tgt = PHI_RESULT (use->stmt);
5393
 
5394
      /* If we should keep the biv, do not replace it.  */
5395
      if (name_info (data, tgt)->preserve_biv)
5396
        return;
5397
 
5398
      pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5399
      while (!bsi_end_p (pbsi)
5400
             && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5401
        {
5402
          bsi = pbsi;
5403
          bsi_next (&pbsi);
5404
        }
5405
      break;
5406
 
5407
    case MODIFY_EXPR:
5408
      tgt = TREE_OPERAND (use->stmt, 0);
5409
      bsi = bsi_for_stmt (use->stmt);
5410
      break;
5411
 
5412
    default:
5413
      gcc_unreachable ();
5414
    }
5415
 
5416
  op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5417
 
5418
  if (TREE_CODE (use->stmt) == PHI_NODE)
5419
    {
5420
      if (stmts)
5421
        bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5422
      ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5423
      bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5424
      remove_statement (use->stmt, false);
5425
      SSA_NAME_DEF_STMT (tgt) = ass;
5426
    }
5427
  else
5428
    {
5429
      if (stmts)
5430
        bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5431
      TREE_OPERAND (use->stmt, 1) = op;
5432
    }
5433
}
5434
 
5435
/* Replaces ssa name in index IDX by its basic variable.  Callback for
5436
   for_each_index.  */
5437
 
5438
static bool
5439
idx_remove_ssa_names (tree base, tree *idx,
5440
                      void *data ATTRIBUTE_UNUSED)
5441
{
5442
  tree *op;
5443
 
5444
  if (TREE_CODE (*idx) == SSA_NAME)
5445
    *idx = SSA_NAME_VAR (*idx);
5446
 
5447
  if (TREE_CODE (base) == ARRAY_REF)
5448
    {
5449
      op = &TREE_OPERAND (base, 2);
5450
      if (*op
5451
          && TREE_CODE (*op) == SSA_NAME)
5452
        *op = SSA_NAME_VAR (*op);
5453
      op = &TREE_OPERAND (base, 3);
5454
      if (*op
5455
          && TREE_CODE (*op) == SSA_NAME)
5456
        *op = SSA_NAME_VAR (*op);
5457
    }
5458
 
5459
  return true;
5460
}
5461
 
5462
/* Unshares REF and replaces ssa names inside it by their basic variables.  */
5463
 
5464
static tree
5465
unshare_and_remove_ssa_names (tree ref)
5466
{
5467
  ref = unshare_expr (ref);
5468
  for_each_index (&ref, idx_remove_ssa_names, NULL);
5469
 
5470
  return ref;
5471
}
5472
 
5473
/* Extract the alias analysis info for the memory reference REF.  There are
5474
   several ways how this information may be stored and what precisely is
5475
   its semantics depending on the type of the reference, but there always is
5476
   somewhere hidden one _DECL node that is used to determine the set of
5477
   virtual operands for the reference.  The code below deciphers this jungle
5478
   and extracts this single useful piece of information.  */
5479
 
5480
static tree
5481
get_ref_tag (tree ref, tree orig)
5482
{
5483
  tree var = get_base_address (ref);
5484
  tree tag, sv;
5485
  unsigned HOST_WIDE_INT offset, size;
5486
 
5487
  for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5488
    if (okay_component_ref_for_subvars (sv, &offset, &size))
5489
      break;
5490
 
5491
  if (handled_component_p (sv))
5492
    return unshare_expr (sv);
5493
 
5494
  if (!var)
5495
    return NULL_TREE;
5496
 
5497
  if (TREE_CODE (var) == INDIRECT_REF)
5498
    {
5499
      /* In case the base is a dereference of a pointer, first check its name
5500
         mem tag, and if it does not have one, use type mem tag.  */
5501
      var = TREE_OPERAND (var, 0);
5502
      if (TREE_CODE (var) != SSA_NAME)
5503
        return NULL_TREE;
5504
 
5505
      if (SSA_NAME_PTR_INFO (var))
5506
        {
5507
          tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5508
          if (tag)
5509
            return tag;
5510
        }
5511
 
5512
      var = SSA_NAME_VAR (var);
5513
      tag = var_ann (var)->type_mem_tag;
5514
      gcc_assert (tag != NULL_TREE);
5515
      return tag;
5516
    }
5517
  else
5518
    {
5519
      if (!DECL_P (var))
5520
        return NULL_TREE;
5521
 
5522
      tag = var_ann (var)->type_mem_tag;
5523
      if (tag)
5524
        return tag;
5525
 
5526
      return var;
5527
    }
5528
}
5529
 
5530
/* Copies the reference information from OLD_REF to NEW_REF.  */
5531
 
5532
static void
5533
copy_ref_info (tree new_ref, tree old_ref)
5534
{
5535
  if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5536
    copy_mem_ref_info (new_ref, old_ref);
5537
  else
5538
    {
5539
      TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5540
      TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5541
    }
5542
}
5543
 
5544
/* Rewrites USE (address that is an iv) using candidate CAND.  */
5545
 
5546
static void
5547
rewrite_use_address (struct ivopts_data *data,
5548
                     struct iv_use *use, struct iv_cand *cand)
5549
{
5550
  struct affine_tree_combination aff;
5551
  block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5552
  tree ref;
5553
 
5554
  get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5555
  unshare_aff_combination (&aff);
5556
 
5557
  ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5558
  copy_ref_info (ref, *use->op_p);
5559
  *use->op_p = ref;
5560
}
5561
 
5562
/* Rewrites USE (the condition such that one of the arguments is an iv) using
5563
   candidate CAND.  */
5564
 
5565
static void
5566
rewrite_use_compare (struct ivopts_data *data,
5567
                     struct iv_use *use, struct iv_cand *cand)
5568
{
5569
  tree comp;
5570
  tree *op_p, cond, op, stmts, bound;
5571
  block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5572
  enum tree_code compare;
5573
  struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5574
 
5575
  bound = cp->value;
5576
  if (bound)
5577
    {
5578
      tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5579
      tree var_type = TREE_TYPE (var);
5580
 
5581
      compare = iv_elimination_compare (data, use);
5582
      bound = fold_convert (var_type, bound);
5583
      op = force_gimple_operand (unshare_expr (bound), &stmts,
5584
                                 true, NULL_TREE);
5585
 
5586
      if (stmts)
5587
        bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5588
 
5589
      *use->op_p = build2 (compare, boolean_type_node, var, op);
5590
      update_stmt (use->stmt);
5591
      return;
5592
    }
5593
 
5594
  /* The induction variable elimination failed; just express the original
5595
     giv.  */
5596
  comp = get_computation (data->current_loop, use, cand);
5597
 
5598
  cond = *use->op_p;
5599
  op_p = &TREE_OPERAND (cond, 0);
5600
  if (TREE_CODE (*op_p) != SSA_NAME
5601
      || zero_p (get_iv (data, *op_p)->step))
5602
    op_p = &TREE_OPERAND (cond, 1);
5603
 
5604
  op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5605
  if (stmts)
5606
    bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5607
 
5608
  *op_p = op;
5609
}
5610
 
5611
/* Ensure that operand *OP_P may be used at the end of EXIT without
5612
   violating loop closed ssa form.  */
5613
 
5614
static void
5615
protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5616
{
5617
  basic_block def_bb;
5618
  struct loop *def_loop;
5619
  tree phi, use;
5620
 
5621
  use = USE_FROM_PTR (op_p);
5622
  if (TREE_CODE (use) != SSA_NAME)
5623
    return;
5624
 
5625
  def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5626
  if (!def_bb)
5627
    return;
5628
 
5629
  def_loop = def_bb->loop_father;
5630
  if (flow_bb_inside_loop_p (def_loop, exit->dest))
5631
    return;
5632
 
5633
  /* Try finding a phi node that copies the value out of the loop.  */
5634
  for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5635
    if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5636
      break;
5637
 
5638
  if (!phi)
5639
    {
5640
      /* Create such a phi node.  */
5641
      tree new_name = duplicate_ssa_name (use, NULL);
5642
 
5643
      phi = create_phi_node (new_name, exit->dest);
5644
      SSA_NAME_DEF_STMT (new_name) = phi;
5645
      add_phi_arg (phi, use, exit);
5646
    }
5647
 
5648
  SET_USE (op_p, PHI_RESULT (phi));
5649
}
5650
 
5651
/* Ensure that operands of STMT may be used at the end of EXIT without
5652
   violating loop closed ssa form.  */
5653
 
5654
static void
5655
protect_loop_closed_ssa_form (edge exit, tree stmt)
5656
{
5657
  ssa_op_iter iter;
5658
  use_operand_p use_p;
5659
 
5660
  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5661
    protect_loop_closed_ssa_form_use (exit, use_p);
5662
}
5663
 
5664
/* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5665
   so that they are emitted on the correct place, and so that the loop closed
5666
   ssa form is preserved.  */
5667
 
5668
void
5669
compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5670
{
5671
  tree_stmt_iterator tsi;
5672
  block_stmt_iterator bsi;
5673
  tree phi, stmt, def, next;
5674
 
5675
  if (!single_pred_p (exit->dest))
5676
    split_loop_exit_edge (exit);
5677
 
5678
  /* Ensure there is label in exit->dest, so that we can
5679
     insert after it.  */
5680
  tree_block_label (exit->dest);
5681
  bsi = bsi_after_labels (exit->dest);
5682
 
5683
  if (TREE_CODE (stmts) == STATEMENT_LIST)
5684
    {
5685
      for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5686
        {
5687
          tree stmt = tsi_stmt (tsi);
5688
          bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5689
          protect_loop_closed_ssa_form (exit, stmt);
5690
        }
5691
    }
5692
  else
5693
    {
5694
      bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5695
      protect_loop_closed_ssa_form (exit, stmts);
5696
    }
5697
 
5698
  if (!op)
5699
    return;
5700
 
5701
  for (phi = phi_nodes (exit->dest); phi; phi = next)
5702
    {
5703
      next = PHI_CHAIN (phi);
5704
 
5705
      if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5706
        {
5707
          def = PHI_RESULT (phi);
5708
          remove_statement (phi, false);
5709
          stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5710
                        def, op);
5711
          SSA_NAME_DEF_STMT (def) = stmt;
5712
          bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5713
        }
5714
    }
5715
}
5716
 
5717
/* Rewrites the final value of USE (that is only needed outside of the loop)
5718
   using candidate CAND.  */
5719
 
5720
static void
5721
rewrite_use_outer (struct ivopts_data *data,
5722
                   struct iv_use *use, struct iv_cand *cand)
5723
{
5724
  edge exit;
5725
  tree value, op, stmts, tgt;
5726
  tree phi;
5727
 
5728
  switch (TREE_CODE (use->stmt))
5729
    {
5730
    case PHI_NODE:
5731
      tgt = PHI_RESULT (use->stmt);
5732
      break;
5733
    case MODIFY_EXPR:
5734
      tgt = TREE_OPERAND (use->stmt, 0);
5735
      break;
5736
    default:
5737
      gcc_unreachable ();
5738
    }
5739
 
5740
  exit = single_dom_exit (data->current_loop);
5741
 
5742
  if (exit)
5743
    {
5744
      if (!cand->iv)
5745
        {
5746
          struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5747
          value = unshare_expr (cp->value);
5748
        }
5749
      else
5750
        value = get_computation_at (data->current_loop,
5751
                                    use, cand, last_stmt (exit->src));
5752
 
5753
      op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5754
 
5755
      /* If we will preserve the iv anyway and we would need to perform
5756
         some computation to replace the final value, do nothing.  */
5757
      if (stmts && name_info (data, tgt)->preserve_biv)
5758
        return;
5759
 
5760
      for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5761
        {
5762
          use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5763
 
5764
          if (USE_FROM_PTR (use_p) == tgt)
5765
            SET_USE (use_p, op);
5766
        }
5767
 
5768
      if (stmts)
5769
        compute_phi_arg_on_exit (exit, stmts, op);
5770
 
5771
      /* Enable removal of the statement.  We cannot remove it directly,
5772
         since we may still need the aliasing information attached to the
5773
         ssa name defined by it.  */
5774
      name_info (data, tgt)->iv->have_use_for = false;
5775
      return;
5776
    }
5777
 
5778
  /* If the variable is going to be preserved anyway, there is nothing to
5779
     do.  */
5780
  if (name_info (data, tgt)->preserve_biv)
5781
    return;
5782
 
5783
  /* Otherwise we just need to compute the iv.  */
5784
  rewrite_use_nonlinear_expr (data, use, cand);
5785
}
5786
 
5787
/* Rewrites USE using candidate CAND.  */
5788
 
5789
static void
5790
rewrite_use (struct ivopts_data *data,
5791
             struct iv_use *use, struct iv_cand *cand)
5792
{
5793
  switch (use->type)
5794
    {
5795
      case USE_NONLINEAR_EXPR:
5796
        rewrite_use_nonlinear_expr (data, use, cand);
5797
        break;
5798
 
5799
      case USE_OUTER:
5800
        rewrite_use_outer (data, use, cand);
5801
        break;
5802
 
5803
      case USE_ADDRESS:
5804
        rewrite_use_address (data, use, cand);
5805
        break;
5806
 
5807
      case USE_COMPARE:
5808
        rewrite_use_compare (data, use, cand);
5809
        break;
5810
 
5811
      default:
5812
        gcc_unreachable ();
5813
    }
5814
  update_stmt (use->stmt);
5815
}
5816
 
5817
/* Rewrite the uses using the selected induction variables.  */
5818
 
5819
static void
5820
rewrite_uses (struct ivopts_data *data)
5821
{
5822
  unsigned i;
5823
  struct iv_cand *cand;
5824
  struct iv_use *use;
5825
 
5826
  for (i = 0; i < n_iv_uses (data); i++)
5827
    {
5828
      use = iv_use (data, i);
5829
      cand = use->selected;
5830
      gcc_assert (cand);
5831
 
5832
      rewrite_use (data, use, cand);
5833
    }
5834
}
5835
 
5836
/* Removes the ivs that are not used after rewriting.  */
5837
 
5838
static void
5839
remove_unused_ivs (struct ivopts_data *data)
5840
{
5841
  unsigned j;
5842
  bitmap_iterator bi;
5843
 
5844
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5845
    {
5846
      struct version_info *info;
5847
 
5848
      info = ver_info (data, j);
5849
      if (info->iv
5850
          && !zero_p (info->iv->step)
5851
          && !info->inv_id
5852
          && !info->iv->have_use_for
5853
          && !info->preserve_biv)
5854
        remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5855
    }
5856
}
5857
 
5858
/* Frees data allocated by the optimization of a single loop.  */
5859
 
5860
static void
5861
free_loop_data (struct ivopts_data *data)
5862
{
5863
  unsigned i, j;
5864
  bitmap_iterator bi;
5865
  tree obj;
5866
 
5867
  htab_empty (data->niters);
5868
 
5869
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5870
    {
5871
      struct version_info *info;
5872
 
5873
      info = ver_info (data, i);
5874
      if (info->iv)
5875
        free (info->iv);
5876
      info->iv = NULL;
5877
      info->has_nonlin_use = false;
5878
      info->preserve_biv = false;
5879
      info->inv_id = 0;
5880
    }
5881
  bitmap_clear (data->relevant);
5882
  bitmap_clear (data->important_candidates);
5883
 
5884
  for (i = 0; i < n_iv_uses (data); i++)
5885
    {
5886
      struct iv_use *use = iv_use (data, i);
5887
 
5888
      free (use->iv);
5889
      BITMAP_FREE (use->related_cands);
5890
      for (j = 0; j < use->n_map_members; j++)
5891
        if (use->cost_map[j].depends_on)
5892
          BITMAP_FREE (use->cost_map[j].depends_on);
5893
      free (use->cost_map);
5894
      free (use);
5895
    }
5896
  VEC_truncate (iv_use_p, data->iv_uses, 0);
5897
 
5898
  for (i = 0; i < n_iv_cands (data); i++)
5899
    {
5900
      struct iv_cand *cand = iv_cand (data, i);
5901
 
5902
      if (cand->iv)
5903
        free (cand->iv);
5904
      if (cand->depends_on)
5905
        BITMAP_FREE (cand->depends_on);
5906
      free (cand);
5907
    }
5908
  VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5909
 
5910
  if (data->version_info_size < num_ssa_names)
5911
    {
5912
      data->version_info_size = 2 * num_ssa_names;
5913
      free (data->version_info);
5914
      data->version_info = xcalloc (data->version_info_size,
5915
                                    sizeof (struct version_info));
5916
    }
5917
 
5918
  data->max_inv_id = 0;
5919
 
5920
  for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5921
    SET_DECL_RTL (obj, NULL_RTX);
5922
 
5923
  VEC_truncate (tree, decl_rtl_to_reset, 0);
5924
}
5925
 
5926
/* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5927
   loop tree.  */
5928
 
5929
static void
5930
tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5931
{
5932
  unsigned i;
5933
 
5934
  for (i = 1; i < loops->num; i++)
5935
    if (loops->parray[i])
5936
      {
5937
        free (loops->parray[i]->aux);
5938
        loops->parray[i]->aux = NULL;
5939
      }
5940
 
5941
  free_loop_data (data);
5942
  free (data->version_info);
5943
  BITMAP_FREE (data->relevant);
5944
  BITMAP_FREE (data->important_candidates);
5945
  htab_delete (data->niters);
5946
 
5947
  VEC_free (tree, heap, decl_rtl_to_reset);
5948
  VEC_free (iv_use_p, heap, data->iv_uses);
5949
  VEC_free (iv_cand_p, heap, data->iv_candidates);
5950
}
5951
 
5952
/* Optimizes the LOOP.  Returns true if anything changed.  */
5953
 
5954
static bool
5955
tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5956
{
5957
  bool changed = false;
5958
  struct iv_ca *iv_ca;
5959
  edge exit;
5960
 
5961
  data->current_loop = loop;
5962
 
5963
  if (dump_file && (dump_flags & TDF_DETAILS))
5964
    {
5965
      fprintf (dump_file, "Processing loop %d\n", loop->num);
5966
 
5967
      exit = single_dom_exit (loop);
5968
      if (exit)
5969
        {
5970
          fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5971
                   exit->src->index, exit->dest->index);
5972
          print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5973
          fprintf (dump_file, "\n");
5974
        }
5975
 
5976
      fprintf (dump_file, "\n");
5977
    }
5978
 
5979
  /* For each ssa name determines whether it behaves as an induction variable
5980
     in some loop.  */
5981
  if (!find_induction_variables (data))
5982
    goto finish;
5983
 
5984
  /* Finds interesting uses (item 1).  */
5985
  find_interesting_uses (data);
5986
  if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5987
    goto finish;
5988
 
5989
  /* Finds candidates for the induction variables (item 2).  */
5990
  find_iv_candidates (data);
5991
 
5992
  /* Calculates the costs (item 3, part 1).  */
5993
  determine_use_iv_costs (data);
5994
  determine_iv_costs (data);
5995
  determine_set_costs (data);
5996
 
5997
  /* Find the optimal set of induction variables (item 3, part 2).  */
5998
  iv_ca = find_optimal_iv_set (data);
5999
  if (!iv_ca)
6000
    goto finish;
6001
  changed = true;
6002
 
6003
  /* Create the new induction variables (item 4, part 1).  */
6004
  create_new_ivs (data, iv_ca);
6005
  iv_ca_free (&iv_ca);
6006
 
6007
  /* Rewrite the uses (item 4, part 2).  */
6008
  rewrite_uses (data);
6009
 
6010
  /* Remove the ivs that are unused after rewriting.  */
6011
  remove_unused_ivs (data);
6012
 
6013
  /* We have changed the structure of induction variables; it might happen
6014
     that definitions in the scev database refer to some of them that were
6015
     eliminated.  */
6016
  scev_reset ();
6017
 
6018
finish:
6019
  free_loop_data (data);
6020
 
6021
  return changed;
6022
}
6023
 
6024
/* Main entry point.  Optimizes induction variables in LOOPS.  */
6025
 
6026
void
6027
tree_ssa_iv_optimize (struct loops *loops)
6028
{
6029
  struct loop *loop;
6030
  struct ivopts_data data;
6031
 
6032
  tree_ssa_iv_optimize_init (loops, &data);
6033
 
6034
  /* Optimize the loops starting with the innermost ones.  */
6035
  loop = loops->tree_root;
6036
  while (loop->inner)
6037
    loop = loop->inner;
6038
 
6039
  /* Scan the loops, inner ones first.  */
6040
  while (loop != loops->tree_root)
6041
    {
6042
      if (dump_file && (dump_flags & TDF_DETAILS))
6043
        flow_loop_dump (loop, dump_file, NULL, 1);
6044
 
6045
      tree_ssa_iv_optimize_loop (&data, loop);
6046
 
6047
      if (loop->next)
6048
        {
6049
          loop = loop->next;
6050
          while (loop->inner)
6051
            loop = loop->inner;
6052
        }
6053
      else
6054
        loop = loop->outer;
6055
    }
6056
 
6057
  tree_ssa_iv_optimize_finalize (loops, &data);
6058
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.