OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [tree-ssa-loop-ivopts.c] - Blame information for rev 455

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Induction variable optimizations.
2
   Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
3
 
4
This file is part of GCC.
5
 
6
GCC is free software; you can redistribute it and/or modify it
7
under the terms of the GNU General Public License as published by the
8
Free Software Foundation; either version 3, or (at your option) any
9
later version.
10
 
11
GCC is distributed in the hope that it will be useful, but WITHOUT
12
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GCC; see the file COPYING3.  If not see
18
<http://www.gnu.org/licenses/>.  */
19
 
20
/* This pass tries to find the optimal set of induction variables for the loop.
21
   It optimizes just the basic linear induction variables (although adding
22
   support for other types should not be too hard).  It includes the
23
   optimizations commonly known as strength reduction, induction variable
24
   coalescing and induction variable elimination.  It does it in the
25
   following steps:
26
 
27
   1) The interesting uses of induction variables are found.  This includes
28
 
29
      -- uses of induction variables in non-linear expressions
30
      -- addresses of arrays
31
      -- comparisons of induction variables
32
 
33
   2) Candidates for the induction variables are found.  This includes
34
 
35
      -- old induction variables
36
      -- the variables defined by expressions derived from the "interesting
37
         uses" above
38
 
39
   3) The optimal (w.r. to a cost function) set of variables is chosen.  The
40
      cost function assigns a cost to sets of induction variables and consists
41
      of three parts:
42
 
43
      -- The use costs.  Each of the interesting uses chooses the best induction
44
         variable in the set and adds its cost to the sum.  The cost reflects
45
         the time spent on modifying the induction variables value to be usable
46
         for the given purpose (adding base and offset for arrays, etc.).
47
      -- The variable costs.  Each of the variables has a cost assigned that
48
         reflects the costs associated with incrementing the value of the
49
         variable.  The original variables are somewhat preferred.
50
      -- The set cost.  Depending on the size of the set, extra cost may be
51
         added to reflect register pressure.
52
 
53
      All the costs are defined in a machine-specific way, using the target
54
      hooks and machine descriptions to determine them.
55
 
56
   4) The trees are transformed to use the new variables, the dead code is
57
      removed.
58
 
59
   All of this is done loop by loop.  Doing it globally is theoretically
60
   possible, it might give a better performance and it might enable us
61
   to decide costs more precisely, but getting all the interactions right
62
   would be complicated.  */
63
 
64
#include "config.h"
65
#include "system.h"
66
#include "coretypes.h"
67
#include "tm.h"
68
#include "tree.h"
69
#include "rtl.h"
70
#include "tm_p.h"
71
#include "hard-reg-set.h"
72
#include "basic-block.h"
73
#include "output.h"
74
#include "diagnostic.h"
75
#include "tree-flow.h"
76
#include "tree-dump.h"
77
#include "timevar.h"
78
#include "cfgloop.h"
79
#include "varray.h"
80
#include "expr.h"
81
#include "tree-pass.h"
82
#include "ggc.h"
83
#include "insn-config.h"
84
#include "recog.h"
85
#include "hashtab.h"
86
#include "tree-chrec.h"
87
#include "tree-scalar-evolution.h"
88
#include "cfgloop.h"
89
#include "params.h"
90
#include "langhooks.h"
91
 
92
/* The infinite cost.  */
93
#define INFTY 10000000
94
 
95
/* The expected number of loop iterations.  TODO -- use profiling instead of
96
   this.  */
97
#define AVG_LOOP_NITER(LOOP) 5
98
 
99
 
100
/* Representation of the induction variable.  */
101
struct iv
102
{
103
  tree base;            /* Initial value of the iv.  */
104
  tree base_object;     /* A memory object to that the induction variable points.  */
105
  tree step;            /* Step of the iv (constant only).  */
106
  tree ssa_name;        /* The ssa name with the value.  */
107
  bool biv_p;           /* Is it a biv?  */
108
  bool have_use_for;    /* Do we already have a use for it?  */
109
  unsigned use_id;      /* The identifier in the use if it is the case.  */
110
};
111
 
112
/* Per-ssa version information (induction variable descriptions, etc.).  */
113
struct version_info
114
{
115
  tree name;            /* The ssa name.  */
116
  struct iv *iv;        /* Induction variable description.  */
117
  bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
118
                           an expression that is not an induction variable.  */
119
  unsigned inv_id;      /* Id of an invariant.  */
120
  bool preserve_biv;    /* For the original biv, whether to preserve it.  */
121
};
122
 
123
/* Types of uses.  */
124
enum use_type
125
{
126
  USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
127
  USE_ADDRESS,          /* Use in an address.  */
128
  USE_COMPARE           /* Use is a compare.  */
129
};
130
 
131
/* The candidate - cost pair.  */
132
struct cost_pair
133
{
134
  struct iv_cand *cand; /* The candidate.  */
135
  unsigned cost;        /* The cost.  */
136
  bitmap depends_on;    /* The list of invariants that have to be
137
                           preserved.  */
138
  tree value;           /* For final value elimination, the expression for
139
                           the final value of the iv.  For iv elimination,
140
                           the new bound to compare with.  */
141
};
142
 
143
/* Use.  */
144
struct iv_use
145
{
146
  unsigned id;          /* The id of the use.  */
147
  enum use_type type;   /* Type of the use.  */
148
  struct iv *iv;        /* The induction variable it is based on.  */
149
  tree stmt;            /* Statement in that it occurs.  */
150
  tree *op_p;           /* The place where it occurs.  */
151
  bitmap related_cands; /* The set of "related" iv candidates, plus the common
152
                           important ones.  */
153
 
154
  unsigned n_map_members; /* Number of candidates in the cost_map list.  */
155
  struct cost_pair *cost_map;
156
                        /* The costs wrto the iv candidates.  */
157
 
158
  struct iv_cand *selected;
159
                        /* The selected candidate.  */
160
};
161
 
162
/* The position where the iv is computed.  */
163
enum iv_position
164
{
165
  IP_NORMAL,            /* At the end, just before the exit condition.  */
166
  IP_END,               /* At the end of the latch block.  */
167
  IP_ORIGINAL           /* The original biv.  */
168
};
169
 
170
/* The induction variable candidate.  */
171
struct iv_cand
172
{
173
  unsigned id;          /* The number of the candidate.  */
174
  bool important;       /* Whether this is an "important" candidate, i.e. such
175
                           that it should be considered by all uses.  */
176
  enum iv_position pos; /* Where it is computed.  */
177
  tree incremented_at;  /* For original biv, the statement where it is
178
                           incremented.  */
179
  tree var_before;      /* The variable used for it before increment.  */
180
  tree var_after;       /* The variable used for it after increment.  */
181
  struct iv *iv;        /* The value of the candidate.  NULL for
182
                           "pseudocandidate" used to indicate the possibility
183
                           to replace the final value of an iv by direct
184
                           computation of the value.  */
185
  unsigned cost;        /* Cost of the candidate.  */
186
  bitmap depends_on;    /* The list of invariants that are used in step of the
187
                           biv.  */
188
};
189
 
190
/* The data used by the induction variable optimizations.  */
191
 
192
typedef struct iv_use *iv_use_p;
193
DEF_VEC_P(iv_use_p);
194
DEF_VEC_ALLOC_P(iv_use_p,heap);
195
 
196
typedef struct iv_cand *iv_cand_p;
197
DEF_VEC_P(iv_cand_p);
198
DEF_VEC_ALLOC_P(iv_cand_p,heap);
199
 
200
struct ivopts_data
201
{
202
  /* The currently optimized loop.  */
203
  struct loop *current_loop;
204
 
205
  /* Number of registers used in it.  */
206
  unsigned regs_used;
207
 
208
  /* Numbers of iterations for all exits of the current loop.  */
209
  htab_t niters;
210
 
211
  /* The size of version_info array allocated.  */
212
  unsigned version_info_size;
213
 
214
  /* The array of information for the ssa names.  */
215
  struct version_info *version_info;
216
 
217
  /* The bitmap of indices in version_info whose value was changed.  */
218
  bitmap relevant;
219
 
220
  /* The maximum invariant id.  */
221
  unsigned max_inv_id;
222
 
223
  /* The uses of induction variables.  */
224
  VEC(iv_use_p,heap) *iv_uses;
225
 
226
  /* The candidates.  */
227
  VEC(iv_cand_p,heap) *iv_candidates;
228
 
229
  /* A bitmap of important candidates.  */
230
  bitmap important_candidates;
231
 
232
  /* Whether to consider just related and important candidates when replacing a
233
     use.  */
234
  bool consider_all_candidates;
235
};
236
 
237
/* An assignment of iv candidates to uses.  */
238
 
239
struct iv_ca
240
{
241
  /* The number of uses covered by the assignment.  */
242
  unsigned upto;
243
 
244
  /* Number of uses that cannot be expressed by the candidates in the set.  */
245
  unsigned bad_uses;
246
 
247
  /* Candidate assigned to a use, together with the related costs.  */
248
  struct cost_pair **cand_for_use;
249
 
250
  /* Number of times each candidate is used.  */
251
  unsigned *n_cand_uses;
252
 
253
  /* The candidates used.  */
254
  bitmap cands;
255
 
256
  /* The number of candidates in the set.  */
257
  unsigned n_cands;
258
 
259
  /* Total number of registers needed.  */
260
  unsigned n_regs;
261
 
262
  /* Total cost of expressing uses.  */
263
  unsigned cand_use_cost;
264
 
265
  /* Total cost of candidates.  */
266
  unsigned cand_cost;
267
 
268
  /* Number of times each invariant is used.  */
269
  unsigned *n_invariant_uses;
270
 
271
  /* Total cost of the assignment.  */
272
  unsigned cost;
273
};
274
 
275
/* Difference of two iv candidate assignments.  */
276
 
277
struct iv_ca_delta
278
{
279
  /* Changed use.  */
280
  struct iv_use *use;
281
 
282
  /* An old assignment (for rollback purposes).  */
283
  struct cost_pair *old_cp;
284
 
285
  /* A new assignment.  */
286
  struct cost_pair *new_cp;
287
 
288
  /* Next change in the list.  */
289
  struct iv_ca_delta *next_change;
290
};
291
 
292
/* Bound on number of candidates below that all candidates are considered.  */
293
 
294
#define CONSIDER_ALL_CANDIDATES_BOUND \
295
  ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
296
 
297
/* If there are more iv occurrences, we just give up (it is quite unlikely that
298
   optimizing such a loop would help, and it would take ages).  */
299
 
300
#define MAX_CONSIDERED_USES \
301
  ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
302
 
303
/* If there are at most this number of ivs in the set, try removing unnecessary
304
   ivs from the set always.  */
305
 
306
#define ALWAYS_PRUNE_CAND_SET_BOUND \
307
  ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
308
 
309
/* The list of trees for that the decl_rtl field must be reset is stored
310
   here.  */
311
 
312
static VEC(tree,heap) *decl_rtl_to_reset;
313
 
314
/* Number of uses recorded in DATA.  */
315
 
316
static inline unsigned
317
n_iv_uses (struct ivopts_data *data)
318
{
319
  return VEC_length (iv_use_p, data->iv_uses);
320
}
321
 
322
/* Ith use recorded in DATA.  */
323
 
324
static inline struct iv_use *
325
iv_use (struct ivopts_data *data, unsigned i)
326
{
327
  return VEC_index (iv_use_p, data->iv_uses, i);
328
}
329
 
330
/* Number of candidates recorded in DATA.  */
331
 
332
static inline unsigned
333
n_iv_cands (struct ivopts_data *data)
334
{
335
  return VEC_length (iv_cand_p, data->iv_candidates);
336
}
337
 
338
/* Ith candidate recorded in DATA.  */
339
 
340
static inline struct iv_cand *
341
iv_cand (struct ivopts_data *data, unsigned i)
342
{
343
  return VEC_index (iv_cand_p, data->iv_candidates, i);
344
}
345
 
346
/* The single loop exit if it dominates the latch, NULL otherwise.  */
347
 
348
edge
349
single_dom_exit (struct loop *loop)
350
{
351
  edge exit = loop->single_exit;
352
 
353
  if (!exit)
354
    return NULL;
355
 
356
  if (!just_once_each_iteration_p (loop, exit->src))
357
    return NULL;
358
 
359
  return exit;
360
}
361
 
362
/* Dumps information about the induction variable IV to FILE.  */
363
 
364
extern void dump_iv (FILE *, struct iv *);
365
void
366
dump_iv (FILE *file, struct iv *iv)
367
{
368
  if (iv->ssa_name)
369
    {
370
      fprintf (file, "ssa name ");
371
      print_generic_expr (file, iv->ssa_name, TDF_SLIM);
372
      fprintf (file, "\n");
373
    }
374
 
375
  fprintf (file, "  type ");
376
  print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
377
  fprintf (file, "\n");
378
 
379
  if (iv->step)
380
    {
381
      fprintf (file, "  base ");
382
      print_generic_expr (file, iv->base, TDF_SLIM);
383
      fprintf (file, "\n");
384
 
385
      fprintf (file, "  step ");
386
      print_generic_expr (file, iv->step, TDF_SLIM);
387
      fprintf (file, "\n");
388
    }
389
  else
390
    {
391
      fprintf (file, "  invariant ");
392
      print_generic_expr (file, iv->base, TDF_SLIM);
393
      fprintf (file, "\n");
394
    }
395
 
396
  if (iv->base_object)
397
    {
398
      fprintf (file, "  base object ");
399
      print_generic_expr (file, iv->base_object, TDF_SLIM);
400
      fprintf (file, "\n");
401
    }
402
 
403
  if (iv->biv_p)
404
    fprintf (file, "  is a biv\n");
405
}
406
 
407
/* Dumps information about the USE to FILE.  */
408
 
409
extern void dump_use (FILE *, struct iv_use *);
410
void
411
dump_use (FILE *file, struct iv_use *use)
412
{
413
  fprintf (file, "use %d\n", use->id);
414
 
415
  switch (use->type)
416
    {
417
    case USE_NONLINEAR_EXPR:
418
      fprintf (file, "  generic\n");
419
      break;
420
 
421
    case USE_ADDRESS:
422
      fprintf (file, "  address\n");
423
      break;
424
 
425
    case USE_COMPARE:
426
      fprintf (file, "  compare\n");
427
      break;
428
 
429
    default:
430
      gcc_unreachable ();
431
    }
432
 
433
  fprintf (file, "  in statement ");
434
  print_generic_expr (file, use->stmt, TDF_SLIM);
435
  fprintf (file, "\n");
436
 
437
  fprintf (file, "  at position ");
438
  if (use->op_p)
439
    print_generic_expr (file, *use->op_p, TDF_SLIM);
440
  fprintf (file, "\n");
441
 
442
  dump_iv (file, use->iv);
443
 
444
  if (use->related_cands)
445
    {
446
      fprintf (file, "  related candidates ");
447
      dump_bitmap (file, use->related_cands);
448
    }
449
}
450
 
451
/* Dumps information about the uses to FILE.  */
452
 
453
extern void dump_uses (FILE *, struct ivopts_data *);
454
void
455
dump_uses (FILE *file, struct ivopts_data *data)
456
{
457
  unsigned i;
458
  struct iv_use *use;
459
 
460
  for (i = 0; i < n_iv_uses (data); i++)
461
    {
462
      use = iv_use (data, i);
463
 
464
      dump_use (file, use);
465
      fprintf (file, "\n");
466
    }
467
}
468
 
469
/* Dumps information about induction variable candidate CAND to FILE.  */
470
 
471
extern void dump_cand (FILE *, struct iv_cand *);
472
void
473
dump_cand (FILE *file, struct iv_cand *cand)
474
{
475
  struct iv *iv = cand->iv;
476
 
477
  fprintf (file, "candidate %d%s\n",
478
           cand->id, cand->important ? " (important)" : "");
479
 
480
  if (cand->depends_on)
481
    {
482
      fprintf (file, "  depends on ");
483
      dump_bitmap (file, cand->depends_on);
484
    }
485
 
486
  if (!iv)
487
    {
488
      fprintf (file, "  final value replacement\n");
489
      return;
490
    }
491
 
492
  switch (cand->pos)
493
    {
494
    case IP_NORMAL:
495
      fprintf (file, "  incremented before exit test\n");
496
      break;
497
 
498
    case IP_END:
499
      fprintf (file, "  incremented at end\n");
500
      break;
501
 
502
    case IP_ORIGINAL:
503
      fprintf (file, "  original biv\n");
504
      break;
505
    }
506
 
507
  dump_iv (file, iv);
508
}
509
 
510
/* Returns the info for ssa version VER.  */
511
 
512
static inline struct version_info *
513
ver_info (struct ivopts_data *data, unsigned ver)
514
{
515
  return data->version_info + ver;
516
}
517
 
518
/* Returns the info for ssa name NAME.  */
519
 
520
static inline struct version_info *
521
name_info (struct ivopts_data *data, tree name)
522
{
523
  return ver_info (data, SSA_NAME_VERSION (name));
524
}
525
 
526
/* Checks whether there exists number X such that X * B = A, counting modulo
527
   2^BITS.  */
528
 
529
static bool
530
divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
531
        HOST_WIDE_INT *x)
532
{
533
  unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
534
  unsigned HOST_WIDE_INT inv, ex, val;
535
  unsigned i;
536
 
537
  a &= mask;
538
  b &= mask;
539
 
540
  /* First divide the whole equation by 2 as long as possible.  */
541
  while (!(a & 1) && !(b & 1))
542
    {
543
      a >>= 1;
544
      b >>= 1;
545
      bits--;
546
      mask >>= 1;
547
    }
548
 
549
  if (!(b & 1))
550
    {
551
      /* If b is still even, a is odd and there is no such x.  */
552
      return false;
553
    }
554
 
555
  /* Find the inverse of b.  We compute it as
556
     b^(2^(bits - 1) - 1) (mod 2^bits).  */
557
  inv = 1;
558
  ex = b;
559
  for (i = 0; i < bits - 1; i++)
560
    {
561
      inv = (inv * ex) & mask;
562
      ex = (ex * ex) & mask;
563
    }
564
 
565
  val = (a * inv) & mask;
566
 
567
  gcc_assert (((val * b) & mask) == a);
568
 
569
  if ((val >> (bits - 1)) & 1)
570
    val |= ~mask;
571
 
572
  *x = val;
573
 
574
  return true;
575
}
576
 
577
/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
578
   emitted in LOOP.  */
579
 
580
static bool
581
stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
582
{
583
  basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
584
 
585
  gcc_assert (bb);
586
 
587
  if (sbb == loop->latch)
588
    return true;
589
 
590
  if (sbb != bb)
591
    return false;
592
 
593
  return stmt == last_stmt (bb);
594
}
595
 
596
/* Returns true if STMT if after the place where the original induction
597
   variable CAND is incremented.  */
598
 
599
static bool
600
stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
601
{
602
  basic_block cand_bb = bb_for_stmt (cand->incremented_at);
603
  basic_block stmt_bb = bb_for_stmt (stmt);
604
  block_stmt_iterator bsi;
605
 
606
  if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
607
    return false;
608
 
609
  if (stmt_bb != cand_bb)
610
    return true;
611
 
612
  /* Scan the block from the end, since the original ivs are usually
613
     incremented at the end of the loop body.  */
614
  for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
615
    {
616
      if (bsi_stmt (bsi) == cand->incremented_at)
617
        return false;
618
      if (bsi_stmt (bsi) == stmt)
619
        return true;
620
    }
621
}
622
 
623
/* Returns true if STMT if after the place where the induction variable
624
   CAND is incremented in LOOP.  */
625
 
626
static bool
627
stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
628
{
629
  switch (cand->pos)
630
    {
631
    case IP_END:
632
      return false;
633
 
634
    case IP_NORMAL:
635
      return stmt_after_ip_normal_pos (loop, stmt);
636
 
637
    case IP_ORIGINAL:
638
      return stmt_after_ip_original_pos (cand, stmt);
639
 
640
    default:
641
      gcc_unreachable ();
642
    }
643
}
644
 
645
/* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
646
 
647
static bool
648
abnormal_ssa_name_p (tree exp)
649
{
650
  if (!exp)
651
    return false;
652
 
653
  if (TREE_CODE (exp) != SSA_NAME)
654
    return false;
655
 
656
  return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
657
}
658
 
659
/* Returns false if BASE or INDEX contains a ssa name that occurs in an
660
   abnormal phi node.  Callback for for_each_index.  */
661
 
662
static bool
663
idx_contains_abnormal_ssa_name_p (tree base, tree *index,
664
                                  void *data ATTRIBUTE_UNUSED)
665
{
666
  if (TREE_CODE (base) == ARRAY_REF)
667
    {
668
      if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
669
        return false;
670
      if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
671
        return false;
672
    }
673
 
674
  return !abnormal_ssa_name_p (*index);
675
}
676
 
677
/* Returns true if EXPR contains a ssa name that occurs in an
678
   abnormal phi node.  */
679
 
680
bool
681
contains_abnormal_ssa_name_p (tree expr)
682
{
683
  enum tree_code code;
684
  enum tree_code_class class;
685
 
686
  if (!expr)
687
    return false;
688
 
689
  code = TREE_CODE (expr);
690
  class = TREE_CODE_CLASS (code);
691
 
692
  if (code == SSA_NAME)
693
    return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
694
 
695
  if (code == INTEGER_CST
696
      || is_gimple_min_invariant (expr))
697
    return false;
698
 
699
  if (code == ADDR_EXPR)
700
    return !for_each_index (&TREE_OPERAND (expr, 0),
701
                            idx_contains_abnormal_ssa_name_p,
702
                            NULL);
703
 
704
  switch (class)
705
    {
706
    case tcc_binary:
707
    case tcc_comparison:
708
      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
709
        return true;
710
 
711
      /* Fallthru.  */
712
    case tcc_unary:
713
      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
714
        return true;
715
 
716
      break;
717
 
718
    default:
719
      gcc_unreachable ();
720
    }
721
 
722
  return false;
723
}
724
 
725
/* Element of the table in that we cache the numbers of iterations obtained
726
   from exits of the loop.  */
727
 
728
struct nfe_cache_elt
729
{
730
  /* The edge for that the number of iterations is cached.  */
731
  edge exit;
732
 
733
  /* Number of iterations corresponding to this exit, or NULL if it cannot be
734
     determined.  */
735
  tree niter;
736
};
737
 
738
/* Hash function for nfe_cache_elt E.  */
739
 
740
static hashval_t
741
nfe_hash (const void *e)
742
{
743
  const struct nfe_cache_elt *elt = e;
744
 
745
  return htab_hash_pointer (elt->exit);
746
}
747
 
748
/* Equality function for nfe_cache_elt E1 and edge E2.  */
749
 
750
static int
751
nfe_eq (const void *e1, const void *e2)
752
{
753
  const struct nfe_cache_elt *elt1 = e1;
754
 
755
  return elt1->exit == e2;
756
}
757
 
758
/*  Returns tree describing number of iterations determined from
759
    EXIT of DATA->current_loop, or NULL if something goes wrong.  */
760
 
761
static tree
762
niter_for_exit (struct ivopts_data *data, edge exit)
763
{
764
  struct nfe_cache_elt *nfe_desc;
765
  struct tree_niter_desc desc;
766
  PTR *slot;
767
 
768
  slot = htab_find_slot_with_hash (data->niters, exit,
769
                                   htab_hash_pointer (exit),
770
                                   INSERT);
771
 
772
  if (!*slot)
773
    {
774
      nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
775
      nfe_desc->exit = exit;
776
 
777
      /* Try to determine number of iterations.  We must know it
778
         unconditionally (i.e., without possibility of # of iterations
779
         being zero).  Also, we cannot safely work with ssa names that
780
         appear in phi nodes on abnormal edges, so that we do not create
781
         overlapping life ranges for them (PR 27283).  */
782
      if (number_of_iterations_exit (data->current_loop,
783
                                     exit, &desc, true)
784
          && zero_p (desc.may_be_zero)
785
          && !contains_abnormal_ssa_name_p (desc.niter))
786
        nfe_desc->niter = desc.niter;
787
      else
788
        nfe_desc->niter = NULL_TREE;
789
    }
790
  else
791
    nfe_desc = *slot;
792
 
793
  return nfe_desc->niter;
794
}
795
 
796
/* Returns tree describing number of iterations determined from
797
   single dominating exit of DATA->current_loop, or NULL if something
798
   goes wrong.  */
799
 
800
static tree
801
niter_for_single_dom_exit (struct ivopts_data *data)
802
{
803
  edge exit = single_dom_exit (data->current_loop);
804
 
805
  if (!exit)
806
    return NULL;
807
 
808
  return niter_for_exit (data, exit);
809
}
810
 
811
/* Initializes data structures used by the iv optimization pass, stored
812
   in DATA.  */
813
 
814
static void
815
tree_ssa_iv_optimize_init (struct ivopts_data *data)
816
{
817
  data->version_info_size = 2 * num_ssa_names;
818
  data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
819
  data->relevant = BITMAP_ALLOC (NULL);
820
  data->important_candidates = BITMAP_ALLOC (NULL);
821
  data->max_inv_id = 0;
822
  data->niters = htab_create (10, nfe_hash, nfe_eq, free);
823
  data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
824
  data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
825
  decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
826
}
827
 
828
/* Returns a memory object to that EXPR points.  In case we are able to
829
   determine that it does not point to any such object, NULL is returned.  */
830
 
831
static tree
832
determine_base_object (tree expr)
833
{
834
  enum tree_code code = TREE_CODE (expr);
835
  tree base, obj, op0, op1;
836
 
837
  /* If this is a pointer casted to any type, we need to determine
838
     the base object for the pointer; so handle conversions before
839
     throwing away non-pointer expressions.  */
840
  if (TREE_CODE (expr) == NOP_EXPR
841
      || TREE_CODE (expr) == CONVERT_EXPR)
842
    return determine_base_object (TREE_OPERAND (expr, 0));
843
 
844
  if (!POINTER_TYPE_P (TREE_TYPE (expr)))
845
    return NULL_TREE;
846
 
847
  switch (code)
848
    {
849
    case INTEGER_CST:
850
      return NULL_TREE;
851
 
852
    case ADDR_EXPR:
853
      obj = TREE_OPERAND (expr, 0);
854
      base = get_base_address (obj);
855
 
856
      if (!base)
857
        return expr;
858
 
859
      if (TREE_CODE (base) == INDIRECT_REF)
860
        return determine_base_object (TREE_OPERAND (base, 0));
861
 
862
      return fold_convert (ptr_type_node,
863
                           build_fold_addr_expr (base));
864
 
865
    case PLUS_EXPR:
866
    case MINUS_EXPR:
867
      op0 = determine_base_object (TREE_OPERAND (expr, 0));
868
      op1 = determine_base_object (TREE_OPERAND (expr, 1));
869
 
870
      if (!op1)
871
        return op0;
872
 
873
      if (!op0)
874
        return (code == PLUS_EXPR
875
                ? op1
876
                : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
877
 
878
      return fold_build2 (code, ptr_type_node, op0, op1);
879
 
880
    default:
881
      return fold_convert (ptr_type_node, expr);
882
    }
883
}
884
 
885
/* Allocates an induction variable with given initial value BASE and step STEP
886
   for loop LOOP.  */
887
 
888
static struct iv *
889
alloc_iv (tree base, tree step)
890
{
891
  struct iv *iv = XCNEW (struct iv);
892
 
893
  if (step && integer_zerop (step))
894
    step = NULL_TREE;
895
 
896
  iv->base = base;
897
  iv->base_object = determine_base_object (base);
898
  iv->step = step;
899
  iv->biv_p = false;
900
  iv->have_use_for = false;
901
  iv->use_id = 0;
902
  iv->ssa_name = NULL_TREE;
903
 
904
  return iv;
905
}
906
 
907
/* Sets STEP and BASE for induction variable IV.  */
908
 
909
static void
910
set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
911
{
912
  struct version_info *info = name_info (data, iv);
913
 
914
  gcc_assert (!info->iv);
915
 
916
  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
917
  info->iv = alloc_iv (base, step);
918
  info->iv->ssa_name = iv;
919
}
920
 
921
/* Finds induction variable declaration for VAR.  */
922
 
923
static struct iv *
924
get_iv (struct ivopts_data *data, tree var)
925
{
926
  basic_block bb;
927
 
928
  if (!name_info (data, var)->iv)
929
    {
930
      bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
931
 
932
      if (!bb
933
          || !flow_bb_inside_loop_p (data->current_loop, bb))
934
        set_iv (data, var, var, NULL_TREE);
935
    }
936
 
937
  return name_info (data, var)->iv;
938
}
939
 
940
/* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
941
   not define a simple affine biv with nonzero step.  */
942
 
943
static tree
944
determine_biv_step (tree phi)
945
{
946
  struct loop *loop = bb_for_stmt (phi)->loop_father;
947
  tree name = PHI_RESULT (phi);
948
  affine_iv iv;
949
 
950
  if (!is_gimple_reg (name))
951
    return NULL_TREE;
952
 
953
  if (!simple_iv (loop, phi, name, &iv, true))
954
    return NULL_TREE;
955
 
956
  return (zero_p (iv.step) ? NULL_TREE : iv.step);
957
}
958
 
959
/* Finds basic ivs.  */
960
 
961
static bool
962
find_bivs (struct ivopts_data *data)
963
{
964
  tree phi, step, type, base;
965
  bool found = false;
966
  struct loop *loop = data->current_loop;
967
 
968
  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
969
    {
970
      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
971
        continue;
972
 
973
      step = determine_biv_step (phi);
974
      if (!step)
975
        continue;
976
 
977
      base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
978
      base = expand_simple_operations (base);
979
      if (contains_abnormal_ssa_name_p (base)
980
          || contains_abnormal_ssa_name_p (step))
981
        continue;
982
 
983
      type = TREE_TYPE (PHI_RESULT (phi));
984
      base = fold_convert (type, base);
985
      if (step)
986
        step = fold_convert (type, step);
987
 
988
      set_iv (data, PHI_RESULT (phi), base, step);
989
      found = true;
990
    }
991
 
992
  return found;
993
}
994
 
995
/* Marks basic ivs.  */
996
 
997
static void
998
mark_bivs (struct ivopts_data *data)
999
{
1000
  tree phi, var;
1001
  struct iv *iv, *incr_iv;
1002
  struct loop *loop = data->current_loop;
1003
  basic_block incr_bb;
1004
 
1005
  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1006
    {
1007
      iv = get_iv (data, PHI_RESULT (phi));
1008
      if (!iv)
1009
        continue;
1010
 
1011
      var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1012
      incr_iv = get_iv (data, var);
1013
      if (!incr_iv)
1014
        continue;
1015
 
1016
      /* If the increment is in the subloop, ignore it.  */
1017
      incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1018
      if (incr_bb->loop_father != data->current_loop
1019
          || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1020
        continue;
1021
 
1022
      iv->biv_p = true;
1023
      incr_iv->biv_p = true;
1024
    }
1025
}
1026
 
1027
/* Checks whether STMT defines a linear induction variable and stores its
1028
   parameters to IV.  */
1029
 
1030
static bool
1031
find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1032
{
1033
  tree lhs;
1034
  struct loop *loop = data->current_loop;
1035
 
1036
  iv->base = NULL_TREE;
1037
  iv->step = NULL_TREE;
1038
 
1039
  if (TREE_CODE (stmt) != MODIFY_EXPR)
1040
    return false;
1041
 
1042
  lhs = TREE_OPERAND (stmt, 0);
1043
  if (TREE_CODE (lhs) != SSA_NAME)
1044
    return false;
1045
 
1046
  if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1047
    return false;
1048
  iv->base = expand_simple_operations (iv->base);
1049
 
1050
  if (contains_abnormal_ssa_name_p (iv->base)
1051
      || contains_abnormal_ssa_name_p (iv->step))
1052
    return false;
1053
 
1054
  return true;
1055
}
1056
 
1057
/* Finds general ivs in statement STMT.  */
1058
 
1059
static void
1060
find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1061
{
1062
  affine_iv iv;
1063
 
1064
  if (!find_givs_in_stmt_scev (data, stmt, &iv))
1065
    return;
1066
 
1067
  set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1068
}
1069
 
1070
/* Finds general ivs in basic block BB.  */
1071
 
1072
static void
1073
find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1074
{
1075
  block_stmt_iterator bsi;
1076
 
1077
  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1078
    find_givs_in_stmt (data, bsi_stmt (bsi));
1079
}
1080
 
1081
/* Finds general ivs.  */
1082
 
1083
static void
1084
find_givs (struct ivopts_data *data)
1085
{
1086
  struct loop *loop = data->current_loop;
1087
  basic_block *body = get_loop_body_in_dom_order (loop);
1088
  unsigned i;
1089
 
1090
  for (i = 0; i < loop->num_nodes; i++)
1091
    find_givs_in_bb (data, body[i]);
1092
  free (body);
1093
}
1094
 
1095
/* For each ssa name defined in LOOP determines whether it is an induction
1096
   variable and if so, its initial value and step.  */
1097
 
1098
static bool
1099
find_induction_variables (struct ivopts_data *data)
1100
{
1101
  unsigned i;
1102
  bitmap_iterator bi;
1103
 
1104
  if (!find_bivs (data))
1105
    return false;
1106
 
1107
  find_givs (data);
1108
  mark_bivs (data);
1109
 
1110
  if (dump_file && (dump_flags & TDF_DETAILS))
1111
    {
1112
      tree niter = niter_for_single_dom_exit (data);
1113
 
1114
      if (niter)
1115
        {
1116
          fprintf (dump_file, "  number of iterations ");
1117
          print_generic_expr (dump_file, niter, TDF_SLIM);
1118
          fprintf (dump_file, "\n\n");
1119
        };
1120
 
1121
      fprintf (dump_file, "Induction variables:\n\n");
1122
 
1123
      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1124
        {
1125
          if (ver_info (data, i)->iv)
1126
            dump_iv (dump_file, ver_info (data, i)->iv);
1127
        }
1128
    }
1129
 
1130
  return true;
1131
}
1132
 
1133
/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1134
 
1135
static struct iv_use *
1136
record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1137
            tree stmt, enum use_type use_type)
1138
{
1139
  struct iv_use *use = XCNEW (struct iv_use);
1140
 
1141
  use->id = n_iv_uses (data);
1142
  use->type = use_type;
1143
  use->iv = iv;
1144
  use->stmt = stmt;
1145
  use->op_p = use_p;
1146
  use->related_cands = BITMAP_ALLOC (NULL);
1147
 
1148
  /* To avoid showing ssa name in the dumps, if it was not reset by the
1149
     caller.  */
1150
  iv->ssa_name = NULL_TREE;
1151
 
1152
  if (dump_file && (dump_flags & TDF_DETAILS))
1153
    dump_use (dump_file, use);
1154
 
1155
  VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1156
 
1157
  return use;
1158
}
1159
 
1160
/* Checks whether OP is a loop-level invariant and if so, records it.
1161
   NONLINEAR_USE is true if the invariant is used in a way we do not
1162
   handle specially.  */
1163
 
1164
static void
1165
record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1166
{
1167
  basic_block bb;
1168
  struct version_info *info;
1169
 
1170
  if (TREE_CODE (op) != SSA_NAME
1171
      || !is_gimple_reg (op))
1172
    return;
1173
 
1174
  bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1175
  if (bb
1176
      && flow_bb_inside_loop_p (data->current_loop, bb))
1177
    return;
1178
 
1179
  info = name_info (data, op);
1180
  info->name = op;
1181
  info->has_nonlin_use |= nonlinear_use;
1182
  if (!info->inv_id)
1183
    info->inv_id = ++data->max_inv_id;
1184
  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1185
}
1186
 
1187
/* Checks whether the use OP is interesting and if so, records it.  */
1188
 
1189
static struct iv_use *
1190
find_interesting_uses_op (struct ivopts_data *data, tree op)
1191
{
1192
  struct iv *iv;
1193
  struct iv *civ;
1194
  tree stmt;
1195
  struct iv_use *use;
1196
 
1197
  if (TREE_CODE (op) != SSA_NAME)
1198
    return NULL;
1199
 
1200
  iv = get_iv (data, op);
1201
  if (!iv)
1202
    return NULL;
1203
 
1204
  if (iv->have_use_for)
1205
    {
1206
      use = iv_use (data, iv->use_id);
1207
 
1208
      gcc_assert (use->type == USE_NONLINEAR_EXPR);
1209
      return use;
1210
    }
1211
 
1212
  if (zero_p (iv->step))
1213
    {
1214
      record_invariant (data, op, true);
1215
      return NULL;
1216
    }
1217
  iv->have_use_for = true;
1218
 
1219
  civ = XNEW (struct iv);
1220
  *civ = *iv;
1221
 
1222
  stmt = SSA_NAME_DEF_STMT (op);
1223
  gcc_assert (TREE_CODE (stmt) == PHI_NODE
1224
              || TREE_CODE (stmt) == MODIFY_EXPR);
1225
 
1226
  use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1227
  iv->use_id = use->id;
1228
 
1229
  return use;
1230
}
1231
 
1232
/* Checks whether the condition *COND_P in STMT is interesting
1233
   and if so, records it.  */
1234
 
1235
static void
1236
find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1237
{
1238
  tree *op0_p;
1239
  tree *op1_p;
1240
  struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1241
  struct iv const_iv;
1242
  tree zero = integer_zero_node;
1243
 
1244
  const_iv.step = NULL_TREE;
1245
 
1246
  if (TREE_CODE (*cond_p) != SSA_NAME
1247
      && !COMPARISON_CLASS_P (*cond_p))
1248
    return;
1249
 
1250
  if (TREE_CODE (*cond_p) == SSA_NAME)
1251
    {
1252
      op0_p = cond_p;
1253
      op1_p = &zero;
1254
    }
1255
  else
1256
    {
1257
      op0_p = &TREE_OPERAND (*cond_p, 0);
1258
      op1_p = &TREE_OPERAND (*cond_p, 1);
1259
    }
1260
 
1261
  if (TREE_CODE (*op0_p) == SSA_NAME)
1262
    iv0 = get_iv (data, *op0_p);
1263
  else
1264
    iv0 = &const_iv;
1265
 
1266
  if (TREE_CODE (*op1_p) == SSA_NAME)
1267
    iv1 = get_iv (data, *op1_p);
1268
  else
1269
    iv1 = &const_iv;
1270
 
1271
  if (/* When comparing with non-invariant value, we may not do any senseful
1272
         induction variable elimination.  */
1273
      (!iv0 || !iv1)
1274
      /* Eliminating condition based on two ivs would be nontrivial.
1275
         ??? TODO -- it is not really important to handle this case.  */
1276
      || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1277
    {
1278
      find_interesting_uses_op (data, *op0_p);
1279
      find_interesting_uses_op (data, *op1_p);
1280
      return;
1281
    }
1282
 
1283
  if (zero_p (iv0->step) && zero_p (iv1->step))
1284
    {
1285
      /* If both are invariants, this is a work for unswitching.  */
1286
      return;
1287
    }
1288
 
1289
  civ = XNEW (struct iv);
1290
  *civ = zero_p (iv0->step) ? *iv1: *iv0;
1291
  record_use (data, cond_p, civ, stmt, USE_COMPARE);
1292
}
1293
 
1294
/* Returns true if expression EXPR is obviously invariant in LOOP,
1295
   i.e. if all its operands are defined outside of the LOOP.  */
1296
 
1297
bool
1298
expr_invariant_in_loop_p (struct loop *loop, tree expr)
1299
{
1300
  basic_block def_bb;
1301
  unsigned i, len;
1302
 
1303
  if (is_gimple_min_invariant (expr))
1304
    return true;
1305
 
1306
  if (TREE_CODE (expr) == SSA_NAME)
1307
    {
1308
      def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1309
      if (def_bb
1310
          && flow_bb_inside_loop_p (loop, def_bb))
1311
        return false;
1312
 
1313
      return true;
1314
    }
1315
 
1316
  if (!EXPR_P (expr))
1317
    return false;
1318
 
1319
  len = TREE_CODE_LENGTH (TREE_CODE (expr));
1320
  for (i = 0; i < len; i++)
1321
    if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1322
      return false;
1323
 
1324
  return true;
1325
}
1326
 
1327
/* Cumulates the steps of indices into DATA and replaces their values with the
1328
   initial ones.  Returns false when the value of the index cannot be determined.
1329
   Callback for for_each_index.  */
1330
 
1331
struct ifs_ivopts_data
1332
{
1333
  struct ivopts_data *ivopts_data;
1334
  tree stmt;
1335
  tree *step_p;
1336
};
1337
 
1338
static bool
1339
idx_find_step (tree base, tree *idx, void *data)
1340
{
1341
  struct ifs_ivopts_data *dta = data;
1342
  struct iv *iv;
1343
  tree step, iv_base, iv_step, lbound, off;
1344
  struct loop *loop = dta->ivopts_data->current_loop;
1345
 
1346
  if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1347
      || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1348
    return false;
1349
 
1350
  /* If base is a component ref, require that the offset of the reference
1351
     be invariant.  */
1352
  if (TREE_CODE (base) == COMPONENT_REF)
1353
    {
1354
      off = component_ref_field_offset (base);
1355
      return expr_invariant_in_loop_p (loop, off);
1356
    }
1357
 
1358
  /* If base is array, first check whether we will be able to move the
1359
     reference out of the loop (in order to take its address in strength
1360
     reduction).  In order for this to work we need both lower bound
1361
     and step to be loop invariants.  */
1362
  if (TREE_CODE (base) == ARRAY_REF)
1363
    {
1364
      step = array_ref_element_size (base);
1365
      lbound = array_ref_low_bound (base);
1366
 
1367
      if (!expr_invariant_in_loop_p (loop, step)
1368
          || !expr_invariant_in_loop_p (loop, lbound))
1369
        return false;
1370
    }
1371
 
1372
  if (TREE_CODE (*idx) != SSA_NAME)
1373
    return true;
1374
 
1375
  iv = get_iv (dta->ivopts_data, *idx);
1376
  if (!iv)
1377
    return false;
1378
 
1379
  /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1380
          *&x[0], which is not folded and does not trigger the
1381
          ARRAY_REF path below.  */
1382
  *idx = iv->base;
1383
 
1384
  if (!iv->step)
1385
    return true;
1386
 
1387
  if (TREE_CODE (base) == ARRAY_REF)
1388
    {
1389
      step = array_ref_element_size (base);
1390
 
1391
      /* We only handle addresses whose step is an integer constant.  */
1392
      if (TREE_CODE (step) != INTEGER_CST)
1393
        return false;
1394
    }
1395
  else
1396
    /* The step for pointer arithmetics already is 1 byte.  */
1397
    step = build_int_cst (sizetype, 1);
1398
 
1399
  iv_base = iv->base;
1400
  iv_step = iv->step;
1401
  if (!convert_affine_scev (dta->ivopts_data->current_loop,
1402
                            sizetype, &iv_base, &iv_step, dta->stmt,
1403
                            false))
1404
    {
1405
      /* The index might wrap.  */
1406
      return false;
1407
    }
1408
 
1409
  step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1410
 
1411
  if (!*dta->step_p)
1412
    *dta->step_p = step;
1413
  else
1414
    *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1415
 
1416
  return true;
1417
}
1418
 
1419
/* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1420
   object is passed to it in DATA.  */
1421
 
1422
static bool
1423
idx_record_use (tree base, tree *idx,
1424
                void *data)
1425
{
1426
  find_interesting_uses_op (data, *idx);
1427
  if (TREE_CODE (base) == ARRAY_REF)
1428
    {
1429
      find_interesting_uses_op (data, array_ref_element_size (base));
1430
      find_interesting_uses_op (data, array_ref_low_bound (base));
1431
    }
1432
  return true;
1433
}
1434
 
1435
/* Returns true if memory reference REF may be unaligned.  */
1436
 
1437
static bool
1438
may_be_unaligned_p (tree ref)
1439
{
1440
  tree base;
1441
  tree base_type;
1442
  HOST_WIDE_INT bitsize;
1443
  HOST_WIDE_INT bitpos;
1444
  tree toffset;
1445
  enum machine_mode mode;
1446
  int unsignedp, volatilep;
1447
  unsigned base_align;
1448
 
1449
  /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1450
     thus they are not misaligned.  */
1451
  if (TREE_CODE (ref) == TARGET_MEM_REF)
1452
    return false;
1453
 
1454
  /* The test below is basically copy of what expr.c:normal_inner_ref
1455
     does to check whether the object must be loaded by parts when
1456
     STRICT_ALIGNMENT is true.  */
1457
  base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1458
                              &unsignedp, &volatilep, true);
1459
  base_type = TREE_TYPE (base);
1460
  base_align = TYPE_ALIGN (base_type);
1461
 
1462
  if (mode != BLKmode
1463
      && (base_align < GET_MODE_ALIGNMENT (mode)
1464
          || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1465
          || bitpos % BITS_PER_UNIT != 0))
1466
    return true;
1467
 
1468
  return false;
1469
}
1470
 
1471
/* Return true if EXPR may be non-addressable.   */
1472
 
1473
static bool
1474
may_be_nonaddressable_p (tree expr)
1475
{
1476
  switch (TREE_CODE (expr))
1477
    {
1478
    case COMPONENT_REF:
1479
      return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1480
             || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1481
 
1482
    case ARRAY_REF:
1483
    case ARRAY_RANGE_REF:
1484
      return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1485
 
1486
    case VIEW_CONVERT_EXPR:
1487
      /* This kind of view-conversions may wrap non-addressable objects
1488
         and make them look addressable.  After some processing the
1489
         non-addressability may be uncovered again, causing ADDR_EXPRs
1490
         of inappropriate objects to be built.  */
1491
      return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1492
             && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1493
 
1494
    default:
1495
      break;
1496
    }
1497
 
1498
  return false;
1499
}
1500
 
1501
/* Finds addresses in *OP_P inside STMT.  */
1502
 
1503
static void
1504
find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1505
{
1506
  tree base = *op_p, step = NULL;
1507
  struct iv *civ;
1508
  struct ifs_ivopts_data ifs_ivopts_data;
1509
 
1510
  /* Do not play with volatile memory references.  A bit too conservative,
1511
     perhaps, but safe.  */
1512
  if (stmt_ann (stmt)->has_volatile_ops)
1513
    goto fail;
1514
 
1515
  /* Ignore bitfields for now.  Not really something terribly complicated
1516
     to handle.  TODO.  */
1517
  if (TREE_CODE (base) == BIT_FIELD_REF)
1518
    goto fail;
1519
 
1520
  if (may_be_nonaddressable_p (base))
1521
    goto fail;
1522
 
1523
  if (STRICT_ALIGNMENT
1524
      && may_be_unaligned_p (base))
1525
    goto fail;
1526
 
1527
  base = unshare_expr (base);
1528
 
1529
  if (TREE_CODE (base) == TARGET_MEM_REF)
1530
    {
1531
      tree type = build_pointer_type (TREE_TYPE (base));
1532
      tree astep;
1533
 
1534
      if (TMR_BASE (base)
1535
          && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1536
        {
1537
          civ = get_iv (data, TMR_BASE (base));
1538
          if (!civ)
1539
            goto fail;
1540
 
1541
          TMR_BASE (base) = civ->base;
1542
          step = civ->step;
1543
        }
1544
      if (TMR_INDEX (base)
1545
          && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1546
        {
1547
          civ = get_iv (data, TMR_INDEX (base));
1548
          if (!civ)
1549
            goto fail;
1550
 
1551
          TMR_INDEX (base) = civ->base;
1552
          astep = civ->step;
1553
 
1554
          if (astep)
1555
            {
1556
              if (TMR_STEP (base))
1557
                astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1558
 
1559
              if (step)
1560
                step = fold_build2 (PLUS_EXPR, type, step, astep);
1561
              else
1562
                step = astep;
1563
            }
1564
        }
1565
 
1566
      if (zero_p (step))
1567
        goto fail;
1568
      base = tree_mem_ref_addr (type, base);
1569
    }
1570
  else
1571
    {
1572
      ifs_ivopts_data.ivopts_data = data;
1573
      ifs_ivopts_data.stmt = stmt;
1574
      ifs_ivopts_data.step_p = &step;
1575
      if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1576
          || zero_p (step))
1577
        goto fail;
1578
 
1579
      gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1580
      gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1581
 
1582
      base = build_fold_addr_expr (base);
1583
 
1584
      /* Substituting bases of IVs into the base expression might
1585
         have caused folding opportunities.  */
1586
      if (TREE_CODE (base) == ADDR_EXPR)
1587
        {
1588
          tree *ref = &TREE_OPERAND (base, 0);
1589
          while (handled_component_p (*ref))
1590
            ref = &TREE_OPERAND (*ref, 0);
1591
          if (TREE_CODE (*ref) == INDIRECT_REF)
1592
            *ref = fold_indirect_ref (*ref);
1593
        }
1594
    }
1595
 
1596
  civ = alloc_iv (base, step);
1597
  record_use (data, op_p, civ, stmt, USE_ADDRESS);
1598
  return;
1599
 
1600
fail:
1601
  for_each_index (op_p, idx_record_use, data);
1602
}
1603
 
1604
/* Finds and records invariants used in STMT.  */
1605
 
1606
static void
1607
find_invariants_stmt (struct ivopts_data *data, tree stmt)
1608
{
1609
  ssa_op_iter iter;
1610
  use_operand_p use_p;
1611
  tree op;
1612
 
1613
  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1614
    {
1615
      op = USE_FROM_PTR (use_p);
1616
      record_invariant (data, op, false);
1617
    }
1618
}
1619
 
1620
/* Finds interesting uses of induction variables in the statement STMT.  */
1621
 
1622
static void
1623
find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1624
{
1625
  struct iv *iv;
1626
  tree op, lhs, rhs;
1627
  ssa_op_iter iter;
1628
  use_operand_p use_p;
1629
 
1630
  find_invariants_stmt (data, stmt);
1631
 
1632
  if (TREE_CODE (stmt) == COND_EXPR)
1633
    {
1634
      find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1635
      return;
1636
    }
1637
 
1638
  if (TREE_CODE (stmt) == MODIFY_EXPR)
1639
    {
1640
      lhs = TREE_OPERAND (stmt, 0);
1641
      rhs = TREE_OPERAND (stmt, 1);
1642
 
1643
      if (TREE_CODE (lhs) == SSA_NAME)
1644
        {
1645
          /* If the statement defines an induction variable, the uses are not
1646
             interesting by themselves.  */
1647
 
1648
          iv = get_iv (data, lhs);
1649
 
1650
          if (iv && !zero_p (iv->step))
1651
            return;
1652
        }
1653
 
1654
      switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1655
        {
1656
        case tcc_comparison:
1657
          find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1658
          return;
1659
 
1660
        case tcc_reference:
1661
          find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1662
          if (REFERENCE_CLASS_P (lhs))
1663
            find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1664
          return;
1665
 
1666
        default: ;
1667
        }
1668
 
1669
      if (REFERENCE_CLASS_P (lhs)
1670
          && is_gimple_val (rhs))
1671
        {
1672
          find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1673
          find_interesting_uses_op (data, rhs);
1674
          return;
1675
        }
1676
 
1677
      /* TODO -- we should also handle address uses of type
1678
 
1679
         memory = call (whatever);
1680
 
1681
         and
1682
 
1683
         call (memory).  */
1684
    }
1685
 
1686
  if (TREE_CODE (stmt) == PHI_NODE
1687
      && bb_for_stmt (stmt) == data->current_loop->header)
1688
    {
1689
      lhs = PHI_RESULT (stmt);
1690
      iv = get_iv (data, lhs);
1691
 
1692
      if (iv && !zero_p (iv->step))
1693
        return;
1694
    }
1695
 
1696
  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1697
    {
1698
      op = USE_FROM_PTR (use_p);
1699
 
1700
      if (TREE_CODE (op) != SSA_NAME)
1701
        continue;
1702
 
1703
      iv = get_iv (data, op);
1704
      if (!iv)
1705
        continue;
1706
 
1707
      find_interesting_uses_op (data, op);
1708
    }
1709
}
1710
 
1711
/* Finds interesting uses of induction variables outside of loops
1712
   on loop exit edge EXIT.  */
1713
 
1714
static void
1715
find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1716
{
1717
  tree phi, def;
1718
 
1719
  for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1720
    {
1721
      def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1722
      find_interesting_uses_op (data, def);
1723
    }
1724
}
1725
 
1726
/* Finds uses of the induction variables that are interesting.  */
1727
 
1728
static void
1729
find_interesting_uses (struct ivopts_data *data)
1730
{
1731
  basic_block bb;
1732
  block_stmt_iterator bsi;
1733
  tree phi;
1734
  basic_block *body = get_loop_body (data->current_loop);
1735
  unsigned i;
1736
  struct version_info *info;
1737
  edge e;
1738
 
1739
  if (dump_file && (dump_flags & TDF_DETAILS))
1740
    fprintf (dump_file, "Uses:\n\n");
1741
 
1742
  for (i = 0; i < data->current_loop->num_nodes; i++)
1743
    {
1744
      edge_iterator ei;
1745
      bb = body[i];
1746
 
1747
      FOR_EACH_EDGE (e, ei, bb->succs)
1748
        if (e->dest != EXIT_BLOCK_PTR
1749
            && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1750
          find_interesting_uses_outside (data, e);
1751
 
1752
      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1753
        find_interesting_uses_stmt (data, phi);
1754
      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1755
        find_interesting_uses_stmt (data, bsi_stmt (bsi));
1756
    }
1757
 
1758
  if (dump_file && (dump_flags & TDF_DETAILS))
1759
    {
1760
      bitmap_iterator bi;
1761
 
1762
      fprintf (dump_file, "\n");
1763
 
1764
      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1765
        {
1766
          info = ver_info (data, i);
1767
          if (info->inv_id)
1768
            {
1769
              fprintf (dump_file, "  ");
1770
              print_generic_expr (dump_file, info->name, TDF_SLIM);
1771
              fprintf (dump_file, " is invariant (%d)%s\n",
1772
                       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1773
            }
1774
        }
1775
 
1776
      fprintf (dump_file, "\n");
1777
    }
1778
 
1779
  free (body);
1780
}
1781
 
1782
/* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1783
   is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1784
   we are at the top-level of the processed address.  */
1785
 
1786
static tree
1787
strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1788
                unsigned HOST_WIDE_INT *offset)
1789
{
1790
  tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1791
  enum tree_code code;
1792
  tree type, orig_type = TREE_TYPE (expr);
1793
  unsigned HOST_WIDE_INT off0, off1, st;
1794
  tree orig_expr = expr;
1795
 
1796
  STRIP_NOPS (expr);
1797
 
1798
  type = TREE_TYPE (expr);
1799
  code = TREE_CODE (expr);
1800
  *offset = 0;
1801
 
1802
  switch (code)
1803
    {
1804
    case INTEGER_CST:
1805
      if (!cst_and_fits_in_hwi (expr)
1806
          || zero_p (expr))
1807
        return orig_expr;
1808
 
1809
      *offset = int_cst_value (expr);
1810
      return build_int_cst (orig_type, 0);
1811
 
1812
    case PLUS_EXPR:
1813
    case MINUS_EXPR:
1814
      op0 = TREE_OPERAND (expr, 0);
1815
      op1 = TREE_OPERAND (expr, 1);
1816
 
1817
      op0 = strip_offset_1 (op0, false, false, &off0);
1818
      op1 = strip_offset_1 (op1, false, false, &off1);
1819
 
1820
      *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1821
      if (op0 == TREE_OPERAND (expr, 0)
1822
          && op1 == TREE_OPERAND (expr, 1))
1823
        return orig_expr;
1824
 
1825
      if (zero_p (op1))
1826
        expr = op0;
1827
      else if (zero_p (op0))
1828
        {
1829
          if (code == PLUS_EXPR)
1830
            expr = op1;
1831
          else
1832
            expr = fold_build1 (NEGATE_EXPR, type, op1);
1833
        }
1834
      else
1835
        expr = fold_build2 (code, type, op0, op1);
1836
 
1837
      return fold_convert (orig_type, expr);
1838
 
1839
    case ARRAY_REF:
1840
      if (!inside_addr)
1841
        return orig_expr;
1842
 
1843
      step = array_ref_element_size (expr);
1844
      if (!cst_and_fits_in_hwi (step))
1845
        break;
1846
 
1847
      st = int_cst_value (step);
1848
      op1 = TREE_OPERAND (expr, 1);
1849
      op1 = strip_offset_1 (op1, false, false, &off1);
1850
      *offset = off1 * st;
1851
 
1852
      if (top_compref
1853
          && zero_p (op1))
1854
        {
1855
          /* Strip the component reference completely.  */
1856
          op0 = TREE_OPERAND (expr, 0);
1857
          op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1858
          *offset += off0;
1859
          return op0;
1860
        }
1861
      break;
1862
 
1863
    case COMPONENT_REF:
1864
      if (!inside_addr)
1865
        return orig_expr;
1866
 
1867
      tmp = component_ref_field_offset (expr);
1868
      if (top_compref
1869
          && cst_and_fits_in_hwi (tmp))
1870
        {
1871
          /* Strip the component reference completely.  */
1872
          op0 = TREE_OPERAND (expr, 0);
1873
          op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1874
          *offset = off0 + int_cst_value (tmp);
1875
          return op0;
1876
        }
1877
      break;
1878
 
1879
    case ADDR_EXPR:
1880
      op0 = TREE_OPERAND (expr, 0);
1881
      op0 = strip_offset_1 (op0, true, true, &off0);
1882
      *offset += off0;
1883
 
1884
      if (op0 == TREE_OPERAND (expr, 0))
1885
        return orig_expr;
1886
 
1887
      expr = build_fold_addr_expr (op0);
1888
      return fold_convert (orig_type, expr);
1889
 
1890
    case INDIRECT_REF:
1891
      inside_addr = false;
1892
      break;
1893
 
1894
    default:
1895
      return orig_expr;
1896
    }
1897
 
1898
  /* Default handling of expressions for that we want to recurse into
1899
     the first operand.  */
1900
  op0 = TREE_OPERAND (expr, 0);
1901
  op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1902
  *offset += off0;
1903
 
1904
  if (op0 == TREE_OPERAND (expr, 0)
1905
      && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1906
    return orig_expr;
1907
 
1908
  expr = copy_node (expr);
1909
  TREE_OPERAND (expr, 0) = op0;
1910
  if (op1)
1911
    TREE_OPERAND (expr, 1) = op1;
1912
 
1913
  /* Inside address, we might strip the top level component references,
1914
     thus changing type of the expression.  Handling of ADDR_EXPR
1915
     will fix that.  */
1916
  expr = fold_convert (orig_type, expr);
1917
 
1918
  return expr;
1919
}
1920
 
1921
/* Strips constant offsets from EXPR and stores them to OFFSET.  */
1922
 
1923
static tree
1924
strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1925
{
1926
  return strip_offset_1 (expr, false, false, offset);
1927
}
1928
 
1929
/* Returns variant of TYPE that can be used as base for different uses.
1930
   We return unsigned type with the same precision, which avoids problems
1931
   with overflows.  */
1932
 
1933
static tree
1934
generic_type_for (tree type)
1935
{
1936
  if (POINTER_TYPE_P (type))
1937
    return unsigned_type_for (type);
1938
 
1939
  if (TYPE_UNSIGNED (type))
1940
    return type;
1941
 
1942
  return unsigned_type_for (type);
1943
}
1944
 
1945
/* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1946
   the bitmap to that we should store it.  */
1947
 
1948
static struct ivopts_data *fd_ivopts_data;
1949
static tree
1950
find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1951
{
1952
  bitmap *depends_on = data;
1953
  struct version_info *info;
1954
 
1955
  if (TREE_CODE (*expr_p) != SSA_NAME)
1956
    return NULL_TREE;
1957
  info = name_info (fd_ivopts_data, *expr_p);
1958
 
1959
  if (!info->inv_id || info->has_nonlin_use)
1960
    return NULL_TREE;
1961
 
1962
  if (!*depends_on)
1963
    *depends_on = BITMAP_ALLOC (NULL);
1964
  bitmap_set_bit (*depends_on, info->inv_id);
1965
 
1966
  return NULL_TREE;
1967
}
1968
 
1969
/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1970
   position to POS.  If USE is not NULL, the candidate is set as related to
1971
   it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1972
   replacement of the final value of the iv by a direct computation.  */
1973
 
1974
static struct iv_cand *
1975
add_candidate_1 (struct ivopts_data *data,
1976
                 tree base, tree step, bool important, enum iv_position pos,
1977
                 struct iv_use *use, tree incremented_at)
1978
{
1979
  unsigned i;
1980
  struct iv_cand *cand = NULL;
1981
  tree type, orig_type;
1982
 
1983
  if (base)
1984
    {
1985
      orig_type = TREE_TYPE (base);
1986
      type = generic_type_for (orig_type);
1987
      if (type != orig_type)
1988
        {
1989
          base = fold_convert (type, base);
1990
          if (step)
1991
            step = fold_convert (type, step);
1992
        }
1993
    }
1994
 
1995
  for (i = 0; i < n_iv_cands (data); i++)
1996
    {
1997
      cand = iv_cand (data, i);
1998
 
1999
      if (cand->pos != pos)
2000
        continue;
2001
 
2002
      if (cand->incremented_at != incremented_at)
2003
        continue;
2004
 
2005
      if (!cand->iv)
2006
        {
2007
          if (!base && !step)
2008
            break;
2009
 
2010
          continue;
2011
        }
2012
 
2013
      if (!base && !step)
2014
        continue;
2015
 
2016
      if (!operand_equal_p (base, cand->iv->base, 0))
2017
        continue;
2018
 
2019
      if (zero_p (cand->iv->step))
2020
        {
2021
          if (zero_p (step))
2022
            break;
2023
        }
2024
      else
2025
        {
2026
          if (step && operand_equal_p (step, cand->iv->step, 0))
2027
            break;
2028
        }
2029
    }
2030
 
2031
  if (i == n_iv_cands (data))
2032
    {
2033
      cand = XCNEW (struct iv_cand);
2034
      cand->id = i;
2035
 
2036
      if (!base && !step)
2037
        cand->iv = NULL;
2038
      else
2039
        cand->iv = alloc_iv (base, step);
2040
 
2041
      cand->pos = pos;
2042
      if (pos != IP_ORIGINAL && cand->iv)
2043
        {
2044
          cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2045
          cand->var_after = cand->var_before;
2046
        }
2047
      cand->important = important;
2048
      cand->incremented_at = incremented_at;
2049
      VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2050
 
2051
      if (step
2052
          && TREE_CODE (step) != INTEGER_CST)
2053
        {
2054
          fd_ivopts_data = data;
2055
          walk_tree (&step, find_depends, &cand->depends_on, NULL);
2056
        }
2057
 
2058
      if (dump_file && (dump_flags & TDF_DETAILS))
2059
        dump_cand (dump_file, cand);
2060
    }
2061
 
2062
  if (important && !cand->important)
2063
    {
2064
      cand->important = true;
2065
      if (dump_file && (dump_flags & TDF_DETAILS))
2066
        fprintf (dump_file, "Candidate %d is important\n", cand->id);
2067
    }
2068
 
2069
  if (use)
2070
    {
2071
      bitmap_set_bit (use->related_cands, i);
2072
      if (dump_file && (dump_flags & TDF_DETAILS))
2073
        fprintf (dump_file, "Candidate %d is related to use %d\n",
2074
                 cand->id, use->id);
2075
    }
2076
 
2077
  return cand;
2078
}
2079
 
2080
/* Returns true if incrementing the induction variable at the end of the LOOP
2081
   is allowed.
2082
 
2083
   The purpose is to avoid splitting latch edge with a biv increment, thus
2084
   creating a jump, possibly confusing other optimization passes and leaving
2085
   less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2086
   is not available (so we do not have a better alternative), or if the latch
2087
   edge is already nonempty.  */
2088
 
2089
static bool
2090
allow_ip_end_pos_p (struct loop *loop)
2091
{
2092
  if (!ip_normal_pos (loop))
2093
    return true;
2094
 
2095
  if (!empty_block_p (ip_end_pos (loop)))
2096
    return true;
2097
 
2098
  return false;
2099
}
2100
 
2101
/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2102
   position to POS.  If USE is not NULL, the candidate is set as related to
2103
   it.  The candidate computation is scheduled on all available positions.  */
2104
 
2105
static void
2106
add_candidate (struct ivopts_data *data,
2107
               tree base, tree step, bool important, struct iv_use *use)
2108
{
2109
  if (ip_normal_pos (data->current_loop))
2110
    add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2111
  if (ip_end_pos (data->current_loop)
2112
      && allow_ip_end_pos_p (data->current_loop))
2113
    add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2114
}
2115
 
2116
/* Add a standard "0 + 1 * iteration" iv candidate for a
2117
   type with SIZE bits.  */
2118
 
2119
static void
2120
add_standard_iv_candidates_for_size (struct ivopts_data *data,
2121
                                     unsigned int size)
2122
{
2123
  tree type = lang_hooks.types.type_for_size (size, true);
2124
  add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2125
                 true, NULL);
2126
}
2127
 
2128
/* Adds standard iv candidates.  */
2129
 
2130
static void
2131
add_standard_iv_candidates (struct ivopts_data *data)
2132
{
2133
  add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2134
 
2135
  /* The same for a double-integer type if it is still fast enough.  */
2136
  if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2137
    add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2138
}
2139
 
2140
 
2141
/* Adds candidates bases on the old induction variable IV.  */
2142
 
2143
static void
2144
add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2145
{
2146
  tree phi, def;
2147
  struct iv_cand *cand;
2148
 
2149
  add_candidate (data, iv->base, iv->step, true, NULL);
2150
 
2151
  /* The same, but with initial value zero.  */
2152
  add_candidate (data,
2153
                 build_int_cst (TREE_TYPE (iv->base), 0),
2154
                 iv->step, true, NULL);
2155
 
2156
  phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2157
  if (TREE_CODE (phi) == PHI_NODE)
2158
    {
2159
      /* Additionally record the possibility of leaving the original iv
2160
         untouched.  */
2161
      def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2162
      cand = add_candidate_1 (data,
2163
                              iv->base, iv->step, true, IP_ORIGINAL, NULL,
2164
                              SSA_NAME_DEF_STMT (def));
2165
      cand->var_before = iv->ssa_name;
2166
      cand->var_after = def;
2167
    }
2168
}
2169
 
2170
/* Adds candidates based on the old induction variables.  */
2171
 
2172
static void
2173
add_old_ivs_candidates (struct ivopts_data *data)
2174
{
2175
  unsigned i;
2176
  struct iv *iv;
2177
  bitmap_iterator bi;
2178
 
2179
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2180
    {
2181
      iv = ver_info (data, i)->iv;
2182
      if (iv && iv->biv_p && !zero_p (iv->step))
2183
        add_old_iv_candidates (data, iv);
2184
    }
2185
}
2186
 
2187
/* Adds candidates based on the value of the induction variable IV and USE.  */
2188
 
2189
static void
2190
add_iv_value_candidates (struct ivopts_data *data,
2191
                         struct iv *iv, struct iv_use *use)
2192
{
2193
  unsigned HOST_WIDE_INT offset;
2194
  tree base;
2195
 
2196
  add_candidate (data, iv->base, iv->step, false, use);
2197
 
2198
  /* The same, but with initial value zero.  Make such variable important,
2199
     since it is generic enough so that possibly many uses may be based
2200
     on it.  */
2201
  add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2202
                 iv->step, true, use);
2203
 
2204
  /* Third, try removing the constant offset.  */
2205
  base = strip_offset (iv->base, &offset);
2206
  if (offset)
2207
    add_candidate (data, base, iv->step, false, use);
2208
}
2209
 
2210
/* Adds candidates based on the uses.  */
2211
 
2212
static void
2213
add_derived_ivs_candidates (struct ivopts_data *data)
2214
{
2215
  unsigned i;
2216
 
2217
  for (i = 0; i < n_iv_uses (data); i++)
2218
    {
2219
      struct iv_use *use = iv_use (data, i);
2220
 
2221
      if (!use)
2222
        continue;
2223
 
2224
      switch (use->type)
2225
        {
2226
        case USE_NONLINEAR_EXPR:
2227
        case USE_COMPARE:
2228
        case USE_ADDRESS:
2229
          /* Just add the ivs based on the value of the iv used here.  */
2230
          add_iv_value_candidates (data, use->iv, use);
2231
          break;
2232
 
2233
        default:
2234
          gcc_unreachable ();
2235
        }
2236
    }
2237
}
2238
 
2239
/* Record important candidates and add them to related_cands bitmaps
2240
   if needed.  */
2241
 
2242
static void
2243
record_important_candidates (struct ivopts_data *data)
2244
{
2245
  unsigned i;
2246
  struct iv_use *use;
2247
 
2248
  for (i = 0; i < n_iv_cands (data); i++)
2249
    {
2250
      struct iv_cand *cand = iv_cand (data, i);
2251
 
2252
      if (cand->important)
2253
        bitmap_set_bit (data->important_candidates, i);
2254
    }
2255
 
2256
  data->consider_all_candidates = (n_iv_cands (data)
2257
                                   <= CONSIDER_ALL_CANDIDATES_BOUND);
2258
 
2259
  if (data->consider_all_candidates)
2260
    {
2261
      /* We will not need "related_cands" bitmaps in this case,
2262
         so release them to decrease peak memory consumption.  */
2263
      for (i = 0; i < n_iv_uses (data); i++)
2264
        {
2265
          use = iv_use (data, i);
2266
          BITMAP_FREE (use->related_cands);
2267
        }
2268
    }
2269
  else
2270
    {
2271
      /* Add important candidates to the related_cands bitmaps.  */
2272
      for (i = 0; i < n_iv_uses (data); i++)
2273
        bitmap_ior_into (iv_use (data, i)->related_cands,
2274
                         data->important_candidates);
2275
    }
2276
}
2277
 
2278
/* Finds the candidates for the induction variables.  */
2279
 
2280
static void
2281
find_iv_candidates (struct ivopts_data *data)
2282
{
2283
  /* Add commonly used ivs.  */
2284
  add_standard_iv_candidates (data);
2285
 
2286
  /* Add old induction variables.  */
2287
  add_old_ivs_candidates (data);
2288
 
2289
  /* Add induction variables derived from uses.  */
2290
  add_derived_ivs_candidates (data);
2291
 
2292
  /* Record the important candidates.  */
2293
  record_important_candidates (data);
2294
}
2295
 
2296
/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2297
   If consider_all_candidates is true, we use a two-dimensional array, otherwise
2298
   we allocate a simple list to every use.  */
2299
 
2300
static void
2301
alloc_use_cost_map (struct ivopts_data *data)
2302
{
2303
  unsigned i, size, s, j;
2304
 
2305
  for (i = 0; i < n_iv_uses (data); i++)
2306
    {
2307
      struct iv_use *use = iv_use (data, i);
2308
      bitmap_iterator bi;
2309
 
2310
      if (data->consider_all_candidates)
2311
        size = n_iv_cands (data);
2312
      else
2313
        {
2314
          s = 0;
2315
          EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2316
            {
2317
              s++;
2318
            }
2319
 
2320
          /* Round up to the power of two, so that moduling by it is fast.  */
2321
          for (size = 1; size < s; size <<= 1)
2322
            continue;
2323
        }
2324
 
2325
      use->n_map_members = size;
2326
      use->cost_map = XCNEWVEC (struct cost_pair, size);
2327
    }
2328
}
2329
 
2330
/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2331
   on invariants DEPENDS_ON and that the value used in expressing it
2332
   is VALUE.*/
2333
 
2334
static void
2335
set_use_iv_cost (struct ivopts_data *data,
2336
                 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2337
                 bitmap depends_on, tree value)
2338
{
2339
  unsigned i, s;
2340
 
2341
  if (cost == INFTY)
2342
    {
2343
      BITMAP_FREE (depends_on);
2344
      return;
2345
    }
2346
 
2347
  if (data->consider_all_candidates)
2348
    {
2349
      use->cost_map[cand->id].cand = cand;
2350
      use->cost_map[cand->id].cost = cost;
2351
      use->cost_map[cand->id].depends_on = depends_on;
2352
      use->cost_map[cand->id].value = value;
2353
      return;
2354
    }
2355
 
2356
  /* n_map_members is a power of two, so this computes modulo.  */
2357
  s = cand->id & (use->n_map_members - 1);
2358
  for (i = s; i < use->n_map_members; i++)
2359
    if (!use->cost_map[i].cand)
2360
      goto found;
2361
  for (i = 0; i < s; i++)
2362
    if (!use->cost_map[i].cand)
2363
      goto found;
2364
 
2365
  gcc_unreachable ();
2366
 
2367
found:
2368
  use->cost_map[i].cand = cand;
2369
  use->cost_map[i].cost = cost;
2370
  use->cost_map[i].depends_on = depends_on;
2371
  use->cost_map[i].value = value;
2372
}
2373
 
2374
/* Gets cost of (USE, CANDIDATE) pair.  */
2375
 
2376
static struct cost_pair *
2377
get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2378
                 struct iv_cand *cand)
2379
{
2380
  unsigned i, s;
2381
  struct cost_pair *ret;
2382
 
2383
  if (!cand)
2384
    return NULL;
2385
 
2386
  if (data->consider_all_candidates)
2387
    {
2388
      ret = use->cost_map + cand->id;
2389
      if (!ret->cand)
2390
        return NULL;
2391
 
2392
      return ret;
2393
    }
2394
 
2395
  /* n_map_members is a power of two, so this computes modulo.  */
2396
  s = cand->id & (use->n_map_members - 1);
2397
  for (i = s; i < use->n_map_members; i++)
2398
    if (use->cost_map[i].cand == cand)
2399
      return use->cost_map + i;
2400
 
2401
  for (i = 0; i < s; i++)
2402
    if (use->cost_map[i].cand == cand)
2403
      return use->cost_map + i;
2404
 
2405
  return NULL;
2406
}
2407
 
2408
/* Returns estimate on cost of computing SEQ.  */
2409
 
2410
static unsigned
2411
seq_cost (rtx seq)
2412
{
2413
  unsigned cost = 0;
2414
  rtx set;
2415
 
2416
  for (; seq; seq = NEXT_INSN (seq))
2417
    {
2418
      set = single_set (seq);
2419
      if (set)
2420
        cost += rtx_cost (set, SET);
2421
      else
2422
        cost++;
2423
    }
2424
 
2425
  return cost;
2426
}
2427
 
2428
/* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2429
static rtx
2430
produce_memory_decl_rtl (tree obj, int *regno)
2431
{
2432
  rtx x;
2433
 
2434
  gcc_assert (obj);
2435
  if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2436
    {
2437
      const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2438
      x = gen_rtx_SYMBOL_REF (Pmode, name);
2439
    }
2440
  else
2441
    x = gen_raw_REG (Pmode, (*regno)++);
2442
 
2443
  return gen_rtx_MEM (DECL_MODE (obj), x);
2444
}
2445
 
2446
/* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2447
   walk_tree.  DATA contains the actual fake register number.  */
2448
 
2449
static tree
2450
prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2451
{
2452
  tree obj = NULL_TREE;
2453
  rtx x = NULL_RTX;
2454
  int *regno = data;
2455
 
2456
  switch (TREE_CODE (*expr_p))
2457
    {
2458
    case ADDR_EXPR:
2459
      for (expr_p = &TREE_OPERAND (*expr_p, 0);
2460
           handled_component_p (*expr_p);
2461
           expr_p = &TREE_OPERAND (*expr_p, 0))
2462
        continue;
2463
      obj = *expr_p;
2464
      if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2465
        x = produce_memory_decl_rtl (obj, regno);
2466
      break;
2467
 
2468
    case SSA_NAME:
2469
      *ws = 0;
2470
      obj = SSA_NAME_VAR (*expr_p);
2471
      if (!DECL_RTL_SET_P (obj))
2472
        x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2473
      break;
2474
 
2475
    case VAR_DECL:
2476
    case PARM_DECL:
2477
    case RESULT_DECL:
2478
      *ws = 0;
2479
      obj = *expr_p;
2480
 
2481
      if (DECL_RTL_SET_P (obj))
2482
        break;
2483
 
2484
      if (DECL_MODE (obj) == BLKmode)
2485
        x = produce_memory_decl_rtl (obj, regno);
2486
      else
2487
        x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2488
 
2489
      break;
2490
 
2491
    default:
2492
      break;
2493
    }
2494
 
2495
  if (x)
2496
    {
2497
      VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2498
      SET_DECL_RTL (obj, x);
2499
    }
2500
 
2501
  return NULL_TREE;
2502
}
2503
 
2504
/* Determines cost of the computation of EXPR.  */
2505
 
2506
static unsigned
2507
computation_cost (tree expr)
2508
{
2509
  rtx seq, rslt;
2510
  tree type = TREE_TYPE (expr);
2511
  unsigned cost;
2512
  /* Avoid using hard regs in ways which may be unsupported.  */
2513
  int regno = LAST_VIRTUAL_REGISTER + 1;
2514
 
2515
  walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2516
  start_sequence ();
2517
  rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2518
  seq = get_insns ();
2519
  end_sequence ();
2520
 
2521
  cost = seq_cost (seq);
2522
  if (MEM_P (rslt))
2523
    cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2524
 
2525
  return cost;
2526
}
2527
 
2528
/* Returns variable containing the value of candidate CAND at statement AT.  */
2529
 
2530
static tree
2531
var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2532
{
2533
  if (stmt_after_increment (loop, cand, stmt))
2534
    return cand->var_after;
2535
  else
2536
    return cand->var_before;
2537
}
2538
 
2539
/* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2540
   but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2541
 
2542
int
2543
tree_int_cst_sign_bit (tree t)
2544
{
2545
  unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2546
  unsigned HOST_WIDE_INT w;
2547
 
2548
  if (bitno < HOST_BITS_PER_WIDE_INT)
2549
    w = TREE_INT_CST_LOW (t);
2550
  else
2551
    {
2552
      w = TREE_INT_CST_HIGH (t);
2553
      bitno -= HOST_BITS_PER_WIDE_INT;
2554
    }
2555
 
2556
  return (w >> bitno) & 1;
2557
}
2558
 
2559
/* If we can prove that TOP = cst * BOT for some constant cst,
2560
   store cst to MUL and return true.  Otherwise return false.
2561
   The returned value is always sign-extended, regardless of the
2562
   signedness of TOP and BOT.  */
2563
 
2564
static bool
2565
constant_multiple_of (tree top, tree bot, double_int *mul)
2566
{
2567
  tree mby;
2568
  enum tree_code code;
2569
  double_int res, p0, p1;
2570
  unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2571
 
2572
  STRIP_NOPS (top);
2573
  STRIP_NOPS (bot);
2574
 
2575
  if (operand_equal_p (top, bot, 0))
2576
    {
2577
      *mul = double_int_one;
2578
      return true;
2579
    }
2580
 
2581
  code = TREE_CODE (top);
2582
  switch (code)
2583
    {
2584
    case MULT_EXPR:
2585
      mby = TREE_OPERAND (top, 1);
2586
      if (TREE_CODE (mby) != INTEGER_CST)
2587
        return false;
2588
 
2589
      if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2590
        return false;
2591
 
2592
      *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2593
                              precision);
2594
      return true;
2595
 
2596
    case PLUS_EXPR:
2597
    case MINUS_EXPR:
2598
      if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2599
          || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2600
        return false;
2601
 
2602
      if (code == MINUS_EXPR)
2603
        p1 = double_int_neg (p1);
2604
      *mul = double_int_sext (double_int_add (p0, p1), precision);
2605
      return true;
2606
 
2607
    case INTEGER_CST:
2608
      if (TREE_CODE (bot) != INTEGER_CST)
2609
        return false;
2610
 
2611
      p0 = double_int_sext (tree_to_double_int (bot), precision);
2612
      p1 = double_int_sext (tree_to_double_int (top), precision);
2613
      if (double_int_zero_p (p1))
2614
        return false;
2615
      *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2616
                              precision);
2617
      return double_int_zero_p (res);
2618
 
2619
    default:
2620
      return false;
2621
    }
2622
}
2623
 
2624
/* Sets COMB to CST.  */
2625
 
2626
static void
2627
aff_combination_const (struct affine_tree_combination *comb, tree type,
2628
                       unsigned HOST_WIDE_INT cst)
2629
{
2630
  unsigned prec = TYPE_PRECISION (type);
2631
 
2632
  comb->type = type;
2633
  comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2634
 
2635
  comb->n = 0;
2636
  comb->rest = NULL_TREE;
2637
  comb->offset = cst & comb->mask;
2638
}
2639
 
2640
/* Sets COMB to single element ELT.  */
2641
 
2642
static void
2643
aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2644
{
2645
  unsigned prec = TYPE_PRECISION (type);
2646
 
2647
  comb->type = type;
2648
  comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2649
 
2650
  comb->n = 1;
2651
  comb->elts[0] = elt;
2652
  comb->coefs[0] = 1;
2653
  comb->rest = NULL_TREE;
2654
  comb->offset = 0;
2655
}
2656
 
2657
/* Scales COMB by SCALE.  */
2658
 
2659
static void
2660
aff_combination_scale (struct affine_tree_combination *comb,
2661
                       unsigned HOST_WIDE_INT scale)
2662
{
2663
  unsigned i, j;
2664
 
2665
  if (scale == 1)
2666
    return;
2667
 
2668
  if (scale == 0)
2669
    {
2670
      aff_combination_const (comb, comb->type, 0);
2671
      return;
2672
    }
2673
 
2674
  comb->offset = (scale * comb->offset) & comb->mask;
2675
  for (i = 0, j = 0; i < comb->n; i++)
2676
    {
2677
      comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2678
      comb->elts[j] = comb->elts[i];
2679
      if (comb->coefs[j] != 0)
2680
        j++;
2681
    }
2682
  comb->n = j;
2683
 
2684
  if (comb->rest)
2685
    {
2686
      if (comb->n < MAX_AFF_ELTS)
2687
        {
2688
          comb->coefs[comb->n] = scale;
2689
          comb->elts[comb->n] = comb->rest;
2690
          comb->rest = NULL_TREE;
2691
          comb->n++;
2692
        }
2693
      else
2694
        comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2695
                                  build_int_cst_type (comb->type, scale));
2696
    }
2697
}
2698
 
2699
/* Adds ELT * SCALE to COMB.  */
2700
 
2701
static void
2702
aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2703
                         unsigned HOST_WIDE_INT scale)
2704
{
2705
  unsigned i;
2706
 
2707
  if (scale == 0)
2708
    return;
2709
 
2710
  for (i = 0; i < comb->n; i++)
2711
    if (operand_equal_p (comb->elts[i], elt, 0))
2712
      {
2713
        comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2714
        if (comb->coefs[i])
2715
          return;
2716
 
2717
        comb->n--;
2718
        comb->coefs[i] = comb->coefs[comb->n];
2719
        comb->elts[i] = comb->elts[comb->n];
2720
 
2721
        if (comb->rest)
2722
          {
2723
            gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2724
            comb->coefs[comb->n] = 1;
2725
            comb->elts[comb->n] = comb->rest;
2726
            comb->rest = NULL_TREE;
2727
            comb->n++;
2728
          }
2729
        return;
2730
      }
2731
  if (comb->n < MAX_AFF_ELTS)
2732
    {
2733
      comb->coefs[comb->n] = scale;
2734
      comb->elts[comb->n] = elt;
2735
      comb->n++;
2736
      return;
2737
    }
2738
 
2739
  if (scale == 1)
2740
    elt = fold_convert (comb->type, elt);
2741
  else
2742
    elt = fold_build2 (MULT_EXPR, comb->type,
2743
                       fold_convert (comb->type, elt),
2744
                       build_int_cst_type (comb->type, scale));
2745
 
2746
  if (comb->rest)
2747
    comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2748
  else
2749
    comb->rest = elt;
2750
}
2751
 
2752
/* Adds COMB2 to COMB1.  */
2753
 
2754
static void
2755
aff_combination_add (struct affine_tree_combination *comb1,
2756
                     struct affine_tree_combination *comb2)
2757
{
2758
  unsigned i;
2759
 
2760
  comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2761
  for (i = 0; i < comb2->n; i++)
2762
    aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2763
  if (comb2->rest)
2764
    aff_combination_add_elt (comb1, comb2->rest, 1);
2765
}
2766
 
2767
/* Convert COMB to TYPE.  */
2768
 
2769
static void
2770
aff_combination_convert (tree type, struct affine_tree_combination *comb)
2771
{
2772
  unsigned prec = TYPE_PRECISION (type);
2773
  unsigned i;
2774
 
2775
  /* If the precision of both types is the same, it suffices to change the type
2776
     of the whole combination -- the elements are allowed to have another type
2777
     equivalent wrto STRIP_NOPS.  */
2778
  if (prec == TYPE_PRECISION (comb->type))
2779
    {
2780
      comb->type = type;
2781
      return;
2782
    }
2783
 
2784
  comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2785
  comb->offset = comb->offset & comb->mask;
2786
 
2787
  /* The type of the elements can be different from comb->type only as
2788
     much as what STRIP_NOPS would remove.  We can just directly cast
2789
     to TYPE.  */
2790
  for (i = 0; i < comb->n; i++)
2791
    comb->elts[i] = fold_convert (type, comb->elts[i]);
2792
  if (comb->rest)
2793
    comb->rest = fold_convert (type, comb->rest);
2794
 
2795
  comb->type = type;
2796
}
2797
 
2798
/* Splits EXPR into an affine combination of parts.  */
2799
 
2800
static void
2801
tree_to_aff_combination (tree expr, tree type,
2802
                         struct affine_tree_combination *comb)
2803
{
2804
  struct affine_tree_combination tmp;
2805
  enum tree_code code;
2806
  tree cst, core, toffset;
2807
  HOST_WIDE_INT bitpos, bitsize;
2808
  enum machine_mode mode;
2809
  int unsignedp, volatilep;
2810
 
2811
  STRIP_NOPS (expr);
2812
 
2813
  code = TREE_CODE (expr);
2814
  switch (code)
2815
    {
2816
    case INTEGER_CST:
2817
      aff_combination_const (comb, type, int_cst_value (expr));
2818
      return;
2819
 
2820
    case PLUS_EXPR:
2821
    case MINUS_EXPR:
2822
      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2823
      tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2824
      if (code == MINUS_EXPR)
2825
        aff_combination_scale (&tmp, -1);
2826
      aff_combination_add (comb, &tmp);
2827
      return;
2828
 
2829
    case MULT_EXPR:
2830
      cst = TREE_OPERAND (expr, 1);
2831
      if (TREE_CODE (cst) != INTEGER_CST)
2832
        break;
2833
      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2834
      aff_combination_scale (comb, int_cst_value (cst));
2835
      return;
2836
 
2837
    case NEGATE_EXPR:
2838
      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2839
      aff_combination_scale (comb, -1);
2840
      return;
2841
 
2842
    case ADDR_EXPR:
2843
      core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2844
                                  &toffset, &mode, &unsignedp, &volatilep,
2845
                                  false);
2846
      if (bitpos % BITS_PER_UNIT != 0)
2847
        break;
2848
      aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2849
      core = build_fold_addr_expr (core);
2850
      if (TREE_CODE (core) == ADDR_EXPR)
2851
        aff_combination_add_elt (comb, core, 1);
2852
      else
2853
        {
2854
          tree_to_aff_combination (core, type, &tmp);
2855
          aff_combination_add (comb, &tmp);
2856
        }
2857
      if (toffset)
2858
        {
2859
          tree_to_aff_combination (toffset, type, &tmp);
2860
          aff_combination_add (comb, &tmp);
2861
        }
2862
      return;
2863
 
2864
    default:
2865
      break;
2866
    }
2867
 
2868
  aff_combination_elt (comb, type, expr);
2869
}
2870
 
2871
/* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2872
 
2873
static tree
2874
add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2875
                 unsigned HOST_WIDE_INT mask)
2876
{
2877
  enum tree_code code;
2878
 
2879
  scale &= mask;
2880
  elt = fold_convert (type, elt);
2881
 
2882
  if (scale == 1)
2883
    {
2884
      if (!expr)
2885
        return elt;
2886
 
2887
      return fold_build2 (PLUS_EXPR, type, expr, elt);
2888
    }
2889
 
2890
  if (scale == mask)
2891
    {
2892
      if (!expr)
2893
        return fold_build1 (NEGATE_EXPR, type, elt);
2894
 
2895
      return fold_build2 (MINUS_EXPR, type, expr, elt);
2896
    }
2897
 
2898
  if (!expr)
2899
    return fold_build2 (MULT_EXPR, type, elt,
2900
                        build_int_cst_type (type, scale));
2901
 
2902
  if ((scale | (mask >> 1)) == mask)
2903
    {
2904
      /* Scale is negative.  */
2905
      code = MINUS_EXPR;
2906
      scale = (-scale) & mask;
2907
    }
2908
  else
2909
    code = PLUS_EXPR;
2910
 
2911
  elt = fold_build2 (MULT_EXPR, type, elt,
2912
                     build_int_cst_type (type, scale));
2913
  return fold_build2 (code, type, expr, elt);
2914
}
2915
 
2916
/* Copies the tree elements of COMB to ensure that they are not shared.  */
2917
 
2918
static void
2919
unshare_aff_combination (struct affine_tree_combination *comb)
2920
{
2921
  unsigned i;
2922
 
2923
  for (i = 0; i < comb->n; i++)
2924
    comb->elts[i] = unshare_expr (comb->elts[i]);
2925
  if (comb->rest)
2926
    comb->rest = unshare_expr (comb->rest);
2927
}
2928
 
2929
/* Makes tree from the affine combination COMB.  */
2930
 
2931
static tree
2932
aff_combination_to_tree (struct affine_tree_combination *comb)
2933
{
2934
  tree type = comb->type;
2935
  tree expr = comb->rest;
2936
  unsigned i;
2937
  unsigned HOST_WIDE_INT off, sgn;
2938
 
2939
  if (comb->n == 0 && comb->offset == 0)
2940
    {
2941
      if (expr)
2942
        {
2943
          /* Handle the special case produced by get_computation_aff when
2944
             the type does not fit in HOST_WIDE_INT.  */
2945
          return fold_convert (type, expr);
2946
        }
2947
      else
2948
        return build_int_cst (type, 0);
2949
    }
2950
 
2951
  gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2952
 
2953
  for (i = 0; i < comb->n; i++)
2954
    expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2955
                            comb->mask);
2956
 
2957
  if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2958
    {
2959
      /* Offset is negative.  */
2960
      off = (-comb->offset) & comb->mask;
2961
      sgn = comb->mask;
2962
    }
2963
  else
2964
    {
2965
      off = comb->offset;
2966
      sgn = 1;
2967
    }
2968
  return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2969
                          comb->mask);
2970
}
2971
 
2972
/* Folds EXPR using the affine expressions framework.  */
2973
 
2974
static tree
2975
fold_affine_expr (tree expr)
2976
{
2977
  tree type = TREE_TYPE (expr);
2978
  struct affine_tree_combination comb;
2979
 
2980
  if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2981
    return expr;
2982
 
2983
  tree_to_aff_combination (expr, type, &comb);
2984
  return aff_combination_to_tree (&comb);
2985
}
2986
 
2987
/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2988
   same precision that is at least as wide as the precision of TYPE, stores
2989
   BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2990
   type of A and B.  */
2991
 
2992
static tree
2993
determine_common_wider_type (tree *a, tree *b)
2994
{
2995
  tree wider_type = NULL;
2996
  tree suba, subb;
2997
  tree atype = TREE_TYPE (*a);
2998
 
2999
  if ((TREE_CODE (*a) == NOP_EXPR
3000
       || TREE_CODE (*a) == CONVERT_EXPR))
3001
    {
3002
      suba = TREE_OPERAND (*a, 0);
3003
      wider_type = TREE_TYPE (suba);
3004
      if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3005
        return atype;
3006
    }
3007
  else
3008
    return atype;
3009
 
3010
  if ((TREE_CODE (*b) == NOP_EXPR
3011
       || TREE_CODE (*b) == CONVERT_EXPR))
3012
    {
3013
      subb = TREE_OPERAND (*b, 0);
3014
      if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3015
        return atype;
3016
    }
3017
  else
3018
    return atype;
3019
 
3020
  *a = suba;
3021
  *b = subb;
3022
  return wider_type;
3023
}
3024
 
3025
/* Determines the expression by that USE is expressed from induction variable
3026
   CAND at statement AT in LOOP.  The expression is stored in a decomposed
3027
   form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3028
 
3029
static bool
3030
get_computation_aff (struct loop *loop,
3031
                     struct iv_use *use, struct iv_cand *cand, tree at,
3032
                     struct affine_tree_combination *aff)
3033
{
3034
  tree ubase = use->iv->base;
3035
  tree ustep = use->iv->step;
3036
  tree cbase = cand->iv->base;
3037
  tree cstep = cand->iv->step;
3038
  tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3039
  tree common_type;
3040
  tree uutype;
3041
  tree expr, delta;
3042
  tree ratio;
3043
  unsigned HOST_WIDE_INT ustepi, cstepi;
3044
  HOST_WIDE_INT ratioi;
3045
  struct affine_tree_combination cbase_aff, expr_aff;
3046
  tree cstep_orig = cstep, ustep_orig = ustep;
3047
  double_int rat;
3048
 
3049
  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3050
    {
3051
      /* We do not have a precision to express the values of use.  */
3052
      return false;
3053
    }
3054
 
3055
  expr = var_at_stmt (loop, cand, at);
3056
 
3057
  if (TREE_TYPE (expr) != ctype)
3058
    {
3059
      /* This may happen with the original ivs.  */
3060
      expr = fold_convert (ctype, expr);
3061
    }
3062
 
3063
  if (TYPE_UNSIGNED (utype))
3064
    uutype = utype;
3065
  else
3066
    {
3067
      uutype = unsigned_type_for (utype);
3068
      ubase = fold_convert (uutype, ubase);
3069
      ustep = fold_convert (uutype, ustep);
3070
    }
3071
 
3072
  if (uutype != ctype)
3073
    {
3074
      expr = fold_convert (uutype, expr);
3075
      cbase = fold_convert (uutype, cbase);
3076
      cstep = fold_convert (uutype, cstep);
3077
 
3078
      /* If the conversion is not noop, we must take it into account when
3079
         considering the value of the step.  */
3080
      if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3081
        cstep_orig = cstep;
3082
    }
3083
 
3084
  if (cst_and_fits_in_hwi (cstep_orig)
3085
      && cst_and_fits_in_hwi (ustep_orig))
3086
    {
3087
      ustepi = int_cst_value (ustep_orig);
3088
      cstepi = int_cst_value (cstep_orig);
3089
 
3090
      if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3091
        {
3092
          /* TODO maybe consider case when ustep divides cstep and the ratio is
3093
             a power of 2 (so that the division is fast to execute)?  We would
3094
             need to be much more careful with overflows etc. then.  */
3095
          return false;
3096
        }
3097
 
3098
      ratio = build_int_cst_type (uutype, ratioi);
3099
    }
3100
  else
3101
    {
3102
      if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3103
        return false;
3104
      ratio = double_int_to_tree (uutype, rat);
3105
 
3106
      /* Ratioi is only used to detect special cases when the multiplicative
3107
         factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3108
         set it to 0.  */
3109
      if (double_int_fits_in_shwi_p (rat))
3110
        ratioi = double_int_to_shwi (rat);
3111
      else
3112
        ratioi = 0;
3113
    }
3114
 
3115
  /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3116
     type, we achieve better folding by computing their difference in this
3117
     wider type, and cast the result to UUTYPE.  We do not need to worry about
3118
     overflows, as all the arithmetics will in the end be performed in UUTYPE
3119
     anyway.  */
3120
  common_type = determine_common_wider_type (&ubase, &cbase);
3121
 
3122
  /* We may need to shift the value if we are after the increment.  */
3123
  if (stmt_after_increment (loop, cand, at))
3124
    {
3125
      if (uutype != common_type)
3126
        cstep = fold_convert (common_type, cstep);
3127
      cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3128
    }
3129
 
3130
  /* use = ubase - ratio * cbase + ratio * var.
3131
 
3132
     In general case ubase + ratio * (var - cbase) could be better (one less
3133
     multiplication), but often it is possible to eliminate redundant parts
3134
     of computations from (ubase - ratio * cbase) term, and if it does not
3135
     happen, fold is able to apply the distributive law to obtain this form
3136
     anyway.  */
3137
 
3138
  if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3139
    {
3140
      /* Let's compute in trees and just return the result in AFF.  This case
3141
         should not be very common, and fold itself is not that bad either,
3142
         so making the aff. functions more complicated to handle this case
3143
         is not that urgent.  */
3144
      if (ratioi == 1)
3145
        {
3146
          delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3147
          if (uutype != common_type)
3148
            delta = fold_convert (uutype, delta);
3149
          expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3150
        }
3151
      else if (ratioi == -1)
3152
        {
3153
          delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3154
          if (uutype != common_type)
3155
            delta = fold_convert (uutype, delta);
3156
          expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3157
        }
3158
      else
3159
        {
3160
          delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3161
          delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3162
          if (uutype != common_type)
3163
            delta = fold_convert (uutype, delta);
3164
          expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3165
          expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3166
        }
3167
 
3168
      aff->type = uutype;
3169
      aff->n = 0;
3170
      aff->offset = 0;
3171
      aff->mask = 0;
3172
      aff->rest = expr;
3173
      return true;
3174
    }
3175
 
3176
  /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3177
     possible to compute ratioi.  */
3178
  gcc_assert (ratioi);
3179
 
3180
  tree_to_aff_combination (ubase, common_type, aff);
3181
  tree_to_aff_combination (cbase, common_type, &cbase_aff);
3182
  tree_to_aff_combination (expr, uutype, &expr_aff);
3183
  aff_combination_scale (&cbase_aff, -ratioi);
3184
  aff_combination_scale (&expr_aff, ratioi);
3185
  aff_combination_add (aff, &cbase_aff);
3186
  if (common_type != uutype)
3187
    aff_combination_convert (uutype, aff);
3188
  aff_combination_add (aff, &expr_aff);
3189
 
3190
  return true;
3191
}
3192
 
3193
/* Determines the expression by that USE is expressed from induction variable
3194
   CAND at statement AT in LOOP.  The computation is unshared.  */
3195
 
3196
static tree
3197
get_computation_at (struct loop *loop,
3198
                    struct iv_use *use, struct iv_cand *cand, tree at)
3199
{
3200
  struct affine_tree_combination aff;
3201
  tree type = TREE_TYPE (use->iv->base);
3202
 
3203
  if (!get_computation_aff (loop, use, cand, at, &aff))
3204
    return NULL_TREE;
3205
  unshare_aff_combination (&aff);
3206
  return fold_convert (type, aff_combination_to_tree (&aff));
3207
}
3208
 
3209
/* Determines the expression by that USE is expressed from induction variable
3210
   CAND in LOOP.  The computation is unshared.  */
3211
 
3212
static tree
3213
get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3214
{
3215
  return get_computation_at (loop, use, cand, use->stmt);
3216
}
3217
 
3218
/* Returns cost of addition in MODE.  */
3219
 
3220
static unsigned
3221
add_cost (enum machine_mode mode)
3222
{
3223
  static unsigned costs[NUM_MACHINE_MODES];
3224
  rtx seq;
3225
  unsigned cost;
3226
 
3227
  if (costs[mode])
3228
    return costs[mode];
3229
 
3230
  start_sequence ();
3231
  force_operand (gen_rtx_fmt_ee (PLUS, mode,
3232
                                 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3233
                                 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3234
                 NULL_RTX);
3235
  seq = get_insns ();
3236
  end_sequence ();
3237
 
3238
  cost = seq_cost (seq);
3239
  if (!cost)
3240
    cost = 1;
3241
 
3242
  costs[mode] = cost;
3243
 
3244
  if (dump_file && (dump_flags & TDF_DETAILS))
3245
    fprintf (dump_file, "Addition in %s costs %d\n",
3246
             GET_MODE_NAME (mode), cost);
3247
  return cost;
3248
}
3249
 
3250
/* Entry in a hashtable of already known costs for multiplication.  */
3251
struct mbc_entry
3252
{
3253
  HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3254
  enum machine_mode mode;       /* In mode.  */
3255
  unsigned cost;                /* The cost.  */
3256
};
3257
 
3258
/* Counts hash value for the ENTRY.  */
3259
 
3260
static hashval_t
3261
mbc_entry_hash (const void *entry)
3262
{
3263
  const struct mbc_entry *e = entry;
3264
 
3265
  return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3266
}
3267
 
3268
/* Compares the hash table entries ENTRY1 and ENTRY2.  */
3269
 
3270
static int
3271
mbc_entry_eq (const void *entry1, const void *entry2)
3272
{
3273
  const struct mbc_entry *e1 = entry1;
3274
  const struct mbc_entry *e2 = entry2;
3275
 
3276
  return (e1->mode == e2->mode
3277
          && e1->cst == e2->cst);
3278
}
3279
 
3280
/* Returns cost of multiplication by constant CST in MODE.  */
3281
 
3282
unsigned
3283
multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3284
{
3285
  static htab_t costs;
3286
  struct mbc_entry **cached, act;
3287
  rtx seq;
3288
  unsigned cost;
3289
 
3290
  if (!costs)
3291
    costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3292
 
3293
  act.mode = mode;
3294
  act.cst = cst;
3295
  cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3296
  if (*cached)
3297
    return (*cached)->cost;
3298
 
3299
  *cached = XNEW (struct mbc_entry);
3300
  (*cached)->mode = mode;
3301
  (*cached)->cst = cst;
3302
 
3303
  start_sequence ();
3304
  expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3305
               gen_int_mode (cst, mode), NULL_RTX, 0);
3306
  seq = get_insns ();
3307
  end_sequence ();
3308
 
3309
  cost = seq_cost (seq);
3310
 
3311
  if (dump_file && (dump_flags & TDF_DETAILS))
3312
    fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3313
             (int) cst, GET_MODE_NAME (mode), cost);
3314
 
3315
  (*cached)->cost = cost;
3316
 
3317
  return cost;
3318
}
3319
 
3320
/* Returns true if multiplying by RATIO is allowed in address.  */
3321
 
3322
bool
3323
multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3324
{
3325
#define MAX_RATIO 128
3326
  static sbitmap valid_mult;
3327
 
3328
  if (!valid_mult)
3329
    {
3330
      rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3331
      rtx addr;
3332
      HOST_WIDE_INT i;
3333
 
3334
      valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3335
      sbitmap_zero (valid_mult);
3336
      addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3337
      for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3338
        {
3339
          XEXP (addr, 1) = gen_int_mode (i, Pmode);
3340
          if (memory_address_p (Pmode, addr))
3341
            SET_BIT (valid_mult, i + MAX_RATIO);
3342
        }
3343
 
3344
      if (dump_file && (dump_flags & TDF_DETAILS))
3345
        {
3346
          fprintf (dump_file, "  allowed multipliers:");
3347
          for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3348
            if (TEST_BIT (valid_mult, i + MAX_RATIO))
3349
              fprintf (dump_file, " %d", (int) i);
3350
          fprintf (dump_file, "\n");
3351
          fprintf (dump_file, "\n");
3352
        }
3353
    }
3354
 
3355
  if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3356
    return false;
3357
 
3358
  return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3359
}
3360
 
3361
/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3362
   If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3363
   variable is omitted.  The created memory accesses MODE.
3364
 
3365
   TODO -- there must be some better way.  This all is quite crude.  */
3366
 
3367
static unsigned
3368
get_address_cost (bool symbol_present, bool var_present,
3369
                  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3370
{
3371
  static bool initialized = false;
3372
  static HOST_WIDE_INT rat, off;
3373
  static HOST_WIDE_INT min_offset, max_offset;
3374
  static unsigned costs[2][2][2][2];
3375
  unsigned cost, acost;
3376
  bool offset_p, ratio_p;
3377
  HOST_WIDE_INT s_offset;
3378
  unsigned HOST_WIDE_INT mask;
3379
  unsigned bits;
3380
 
3381
  if (!initialized)
3382
    {
3383
      HOST_WIDE_INT i;
3384
      int old_cse_not_expected;
3385
      unsigned sym_p, var_p, off_p, rat_p, add_c;
3386
      rtx seq, addr, base;
3387
      rtx reg0, reg1;
3388
 
3389
      initialized = true;
3390
 
3391
      reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3392
 
3393
      addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3394
      for (i = 1; i <= 1 << 20; i <<= 1)
3395
        {
3396
          XEXP (addr, 1) = gen_int_mode (i, Pmode);
3397
          if (!memory_address_p (Pmode, addr))
3398
            break;
3399
        }
3400
      max_offset = i >> 1;
3401
      off = max_offset;
3402
 
3403
      for (i = 1; i <= 1 << 20; i <<= 1)
3404
        {
3405
          XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3406
          if (!memory_address_p (Pmode, addr))
3407
            break;
3408
        }
3409
      min_offset = -(i >> 1);
3410
 
3411
      if (dump_file && (dump_flags & TDF_DETAILS))
3412
        {
3413
          fprintf (dump_file, "get_address_cost:\n");
3414
          fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3415
          fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3416
        }
3417
 
3418
      rat = 1;
3419
      for (i = 2; i <= MAX_RATIO; i++)
3420
        if (multiplier_allowed_in_address_p (i))
3421
          {
3422
            rat = i;
3423
            break;
3424
          }
3425
 
3426
      /* Compute the cost of various addressing modes.  */
3427
      acost = 0;
3428
      reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3429
      reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3430
 
3431
      for (i = 0; i < 16; i++)
3432
        {
3433
          sym_p = i & 1;
3434
          var_p = (i >> 1) & 1;
3435
          off_p = (i >> 2) & 1;
3436
          rat_p = (i >> 3) & 1;
3437
 
3438
          addr = reg0;
3439
          if (rat_p)
3440
            addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3441
 
3442
          if (var_p)
3443
            addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3444
 
3445
          if (sym_p)
3446
            {
3447
              base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3448
              if (off_p)
3449
                base = gen_rtx_fmt_e (CONST, Pmode,
3450
                                      gen_rtx_fmt_ee (PLUS, Pmode,
3451
                                                      base,
3452
                                                      gen_int_mode (off, Pmode)));
3453
            }
3454
          else if (off_p)
3455
            base = gen_int_mode (off, Pmode);
3456
          else
3457
            base = NULL_RTX;
3458
 
3459
          if (base)
3460
            addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3461
 
3462
          start_sequence ();
3463
          /* To avoid splitting addressing modes, pretend that no cse will
3464
             follow.  */
3465
          old_cse_not_expected = cse_not_expected;
3466
          cse_not_expected = true;
3467
          addr = memory_address (Pmode, addr);
3468
          cse_not_expected = old_cse_not_expected;
3469
          seq = get_insns ();
3470
          end_sequence ();
3471
 
3472
          acost = seq_cost (seq);
3473
          acost += address_cost (addr, Pmode);
3474
 
3475
          if (!acost)
3476
            acost = 1;
3477
          costs[sym_p][var_p][off_p][rat_p] = acost;
3478
        }
3479
 
3480
      /* On some targets, it is quite expensive to load symbol to a register,
3481
         which makes addresses that contain symbols look much more expensive.
3482
         However, the symbol will have to be loaded in any case before the
3483
         loop (and quite likely we have it in register already), so it does not
3484
         make much sense to penalize them too heavily.  So make some final
3485
         tweaks for the SYMBOL_PRESENT modes:
3486
 
3487
         If VAR_PRESENT is false, and the mode obtained by changing symbol to
3488
         var is cheaper, use this mode with small penalty.
3489
         If VAR_PRESENT is true, try whether the mode with
3490
         SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3491
         if this is the case, use it.  */
3492
      add_c = add_cost (Pmode);
3493
      for (i = 0; i < 8; i++)
3494
        {
3495
          var_p = i & 1;
3496
          off_p = (i >> 1) & 1;
3497
          rat_p = (i >> 2) & 1;
3498
 
3499
          acost = costs[0][1][off_p][rat_p] + 1;
3500
          if (var_p)
3501
            acost += add_c;
3502
 
3503
          if (acost < costs[1][var_p][off_p][rat_p])
3504
            costs[1][var_p][off_p][rat_p] = acost;
3505
        }
3506
 
3507
      if (dump_file && (dump_flags & TDF_DETAILS))
3508
        {
3509
          fprintf (dump_file, "Address costs:\n");
3510
 
3511
          for (i = 0; i < 16; i++)
3512
            {
3513
              sym_p = i & 1;
3514
              var_p = (i >> 1) & 1;
3515
              off_p = (i >> 2) & 1;
3516
              rat_p = (i >> 3) & 1;
3517
 
3518
              fprintf (dump_file, "  ");
3519
              if (sym_p)
3520
                fprintf (dump_file, "sym + ");
3521
              if (var_p)
3522
                fprintf (dump_file, "var + ");
3523
              if (off_p)
3524
                fprintf (dump_file, "cst + ");
3525
              if (rat_p)
3526
                fprintf (dump_file, "rat * ");
3527
 
3528
              acost = costs[sym_p][var_p][off_p][rat_p];
3529
              fprintf (dump_file, "index costs %d\n", acost);
3530
            }
3531
          fprintf (dump_file, "\n");
3532
        }
3533
    }
3534
 
3535
  bits = GET_MODE_BITSIZE (Pmode);
3536
  mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3537
  offset &= mask;
3538
  if ((offset >> (bits - 1) & 1))
3539
    offset |= ~mask;
3540
  s_offset = offset;
3541
 
3542
  cost = 0;
3543
  offset_p = (s_offset != 0
3544
              && min_offset <= s_offset && s_offset <= max_offset);
3545
  ratio_p = (ratio != 1
3546
             && multiplier_allowed_in_address_p (ratio));
3547
 
3548
  if (ratio != 1 && !ratio_p)
3549
    cost += multiply_by_cost (ratio, Pmode);
3550
 
3551
  if (s_offset && !offset_p && !symbol_present)
3552
    {
3553
      cost += add_cost (Pmode);
3554
      var_present = true;
3555
    }
3556
 
3557
  acost = costs[symbol_present][var_present][offset_p][ratio_p];
3558
  return cost + acost;
3559
}
3560
 
3561
/* Estimates cost of forcing expression EXPR into a variable.  */
3562
 
3563
unsigned
3564
force_expr_to_var_cost (tree expr)
3565
{
3566
  static bool costs_initialized = false;
3567
  static unsigned integer_cost;
3568
  static unsigned symbol_cost;
3569
  static unsigned address_cost;
3570
  tree op0, op1;
3571
  unsigned cost0, cost1, cost;
3572
  enum machine_mode mode;
3573
 
3574
  if (!costs_initialized)
3575
    {
3576
      tree var = create_tmp_var_raw (integer_type_node, "test_var");
3577
      rtx x = gen_rtx_MEM (DECL_MODE (var),
3578
                           gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3579
      tree addr;
3580
      tree type = build_pointer_type (integer_type_node);
3581
 
3582
      integer_cost = computation_cost (build_int_cst (integer_type_node,
3583
                                                      2000));
3584
 
3585
      SET_DECL_RTL (var, x);
3586
      TREE_STATIC (var) = 1;
3587
      addr = build1 (ADDR_EXPR, type, var);
3588
      symbol_cost = computation_cost (addr) + 1;
3589
 
3590
      address_cost
3591
        = computation_cost (build2 (PLUS_EXPR, type,
3592
                                    addr,
3593
                                    build_int_cst (type, 2000))) + 1;
3594
      if (dump_file && (dump_flags & TDF_DETAILS))
3595
        {
3596
          fprintf (dump_file, "force_expr_to_var_cost:\n");
3597
          fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3598
          fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3599
          fprintf (dump_file, "  address %d\n", (int) address_cost);
3600
          fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3601
          fprintf (dump_file, "\n");
3602
        }
3603
 
3604
      costs_initialized = true;
3605
    }
3606
 
3607
  STRIP_NOPS (expr);
3608
 
3609
  if (SSA_VAR_P (expr))
3610
    return 0;
3611
 
3612
  if (TREE_INVARIANT (expr))
3613
    {
3614
      if (TREE_CODE (expr) == INTEGER_CST)
3615
        return integer_cost;
3616
 
3617
      if (TREE_CODE (expr) == ADDR_EXPR)
3618
        {
3619
          tree obj = TREE_OPERAND (expr, 0);
3620
 
3621
          if (TREE_CODE (obj) == VAR_DECL
3622
              || TREE_CODE (obj) == PARM_DECL
3623
              || TREE_CODE (obj) == RESULT_DECL)
3624
            return symbol_cost;
3625
        }
3626
 
3627
      return address_cost;
3628
    }
3629
 
3630
  switch (TREE_CODE (expr))
3631
    {
3632
    case PLUS_EXPR:
3633
    case MINUS_EXPR:
3634
    case MULT_EXPR:
3635
      op0 = TREE_OPERAND (expr, 0);
3636
      op1 = TREE_OPERAND (expr, 1);
3637
      STRIP_NOPS (op0);
3638
      STRIP_NOPS (op1);
3639
 
3640
      if (is_gimple_val (op0))
3641
        cost0 = 0;
3642
      else
3643
        cost0 = force_expr_to_var_cost (op0);
3644
 
3645
      if (is_gimple_val (op1))
3646
        cost1 = 0;
3647
      else
3648
        cost1 = force_expr_to_var_cost (op1);
3649
 
3650
      break;
3651
 
3652
    default:
3653
      /* Just an arbitrary value, FIXME.  */
3654
      return target_spill_cost;
3655
    }
3656
 
3657
  mode = TYPE_MODE (TREE_TYPE (expr));
3658
  switch (TREE_CODE (expr))
3659
    {
3660
    case PLUS_EXPR:
3661
    case MINUS_EXPR:
3662
      cost = add_cost (mode);
3663
      break;
3664
 
3665
    case MULT_EXPR:
3666
      if (cst_and_fits_in_hwi (op0))
3667
        cost = multiply_by_cost (int_cst_value (op0), mode);
3668
      else if (cst_and_fits_in_hwi (op1))
3669
        cost = multiply_by_cost (int_cst_value (op1), mode);
3670
      else
3671
        return target_spill_cost;
3672
      break;
3673
 
3674
    default:
3675
      gcc_unreachable ();
3676
    }
3677
 
3678
  cost += cost0;
3679
  cost += cost1;
3680
 
3681
  /* Bound the cost by target_spill_cost.  The parts of complicated
3682
     computations often are either loop invariant or at least can
3683
     be shared between several iv uses, so letting this grow without
3684
     limits would not give reasonable results.  */
3685
  return cost < target_spill_cost ? cost : target_spill_cost;
3686
}
3687
 
3688
/* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3689
   invariants the computation depends on.  */
3690
 
3691
static unsigned
3692
force_var_cost (struct ivopts_data *data,
3693
                tree expr, bitmap *depends_on)
3694
{
3695
  if (depends_on)
3696
    {
3697
      fd_ivopts_data = data;
3698
      walk_tree (&expr, find_depends, depends_on, NULL);
3699
    }
3700
 
3701
  return force_expr_to_var_cost (expr);
3702
}
3703
 
3704
/* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3705
   value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3706
   to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3707
   invariants the computation depends on.  */
3708
 
3709
static unsigned
3710
split_address_cost (struct ivopts_data *data,
3711
                    tree addr, bool *symbol_present, bool *var_present,
3712
                    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3713
{
3714
  tree core;
3715
  HOST_WIDE_INT bitsize;
3716
  HOST_WIDE_INT bitpos;
3717
  tree toffset;
3718
  enum machine_mode mode;
3719
  int unsignedp, volatilep;
3720
 
3721
  core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3722
                              &unsignedp, &volatilep, false);
3723
 
3724
  if (toffset != 0
3725
      || bitpos % BITS_PER_UNIT != 0
3726
      || TREE_CODE (core) != VAR_DECL)
3727
    {
3728
      *symbol_present = false;
3729
      *var_present = true;
3730
      fd_ivopts_data = data;
3731
      walk_tree (&addr, find_depends, depends_on, NULL);
3732
      return target_spill_cost;
3733
    }
3734
 
3735
  *offset += bitpos / BITS_PER_UNIT;
3736
  if (TREE_STATIC (core)
3737
      || DECL_EXTERNAL (core))
3738
    {
3739
      *symbol_present = true;
3740
      *var_present = false;
3741
      return 0;
3742
    }
3743
 
3744
  *symbol_present = false;
3745
  *var_present = true;
3746
  return 0;
3747
}
3748
 
3749
/* Estimates cost of expressing difference of addresses E1 - E2 as
3750
   var + symbol + offset.  The value of offset is added to OFFSET,
3751
   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3752
   part is missing.  DEPENDS_ON is a set of the invariants the computation
3753
   depends on.  */
3754
 
3755
static unsigned
3756
ptr_difference_cost (struct ivopts_data *data,
3757
                     tree e1, tree e2, bool *symbol_present, bool *var_present,
3758
                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3759
{
3760
  HOST_WIDE_INT diff = 0;
3761
  unsigned cost;
3762
 
3763
  gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3764
 
3765
  if (ptr_difference_const (e1, e2, &diff))
3766
    {
3767
      *offset += diff;
3768
      *symbol_present = false;
3769
      *var_present = false;
3770
      return 0;
3771
    }
3772
 
3773
  if (e2 == integer_zero_node)
3774
    return split_address_cost (data, TREE_OPERAND (e1, 0),
3775
                               symbol_present, var_present, offset, depends_on);
3776
 
3777
  *symbol_present = false;
3778
  *var_present = true;
3779
 
3780
  cost = force_var_cost (data, e1, depends_on);
3781
  cost += force_var_cost (data, e2, depends_on);
3782
  cost += add_cost (Pmode);
3783
 
3784
  return cost;
3785
}
3786
 
3787
/* Estimates cost of expressing difference E1 - E2 as
3788
   var + symbol + offset.  The value of offset is added to OFFSET,
3789
   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3790
   part is missing.  DEPENDS_ON is a set of the invariants the computation
3791
   depends on.  */
3792
 
3793
static unsigned
3794
difference_cost (struct ivopts_data *data,
3795
                 tree e1, tree e2, bool *symbol_present, bool *var_present,
3796
                 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3797
{
3798
  unsigned cost;
3799
  enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3800
  unsigned HOST_WIDE_INT off1, off2;
3801
 
3802
  e1 = strip_offset (e1, &off1);
3803
  e2 = strip_offset (e2, &off2);
3804
  *offset += off1 - off2;
3805
 
3806
  STRIP_NOPS (e1);
3807
  STRIP_NOPS (e2);
3808
 
3809
  if (TREE_CODE (e1) == ADDR_EXPR)
3810
    return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3811
                                depends_on);
3812
  *symbol_present = false;
3813
 
3814
  if (operand_equal_p (e1, e2, 0))
3815
    {
3816
      *var_present = false;
3817
      return 0;
3818
    }
3819
  *var_present = true;
3820
  if (zero_p (e2))
3821
    return force_var_cost (data, e1, depends_on);
3822
 
3823
  if (zero_p (e1))
3824
    {
3825
      cost = force_var_cost (data, e2, depends_on);
3826
      cost += multiply_by_cost (-1, mode);
3827
 
3828
      return cost;
3829
    }
3830
 
3831
  cost = force_var_cost (data, e1, depends_on);
3832
  cost += force_var_cost (data, e2, depends_on);
3833
  cost += add_cost (mode);
3834
 
3835
  return cost;
3836
}
3837
 
3838
/* Determines the cost of the computation by that USE is expressed
3839
   from induction variable CAND.  If ADDRESS_P is true, we just need
3840
   to create an address from it, otherwise we want to get it into
3841
   register.  A set of invariants we depend on is stored in
3842
   DEPENDS_ON.  AT is the statement at that the value is computed.  */
3843
 
3844
static unsigned
3845
get_computation_cost_at (struct ivopts_data *data,
3846
                         struct iv_use *use, struct iv_cand *cand,
3847
                         bool address_p, bitmap *depends_on, tree at)
3848
{
3849
  tree ubase = use->iv->base, ustep = use->iv->step;
3850
  tree cbase, cstep;
3851
  tree utype = TREE_TYPE (ubase), ctype;
3852
  unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3853
  HOST_WIDE_INT ratio, aratio;
3854
  bool var_present, symbol_present;
3855
  unsigned cost = 0, n_sums;
3856
 
3857
  *depends_on = NULL;
3858
 
3859
  /* Only consider real candidates.  */
3860
  if (!cand->iv)
3861
    return INFTY;
3862
 
3863
  cbase = cand->iv->base;
3864
  cstep = cand->iv->step;
3865
  ctype = TREE_TYPE (cbase);
3866
 
3867
  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3868
    {
3869
      /* We do not have a precision to express the values of use.  */
3870
      return INFTY;
3871
    }
3872
 
3873
  if (address_p)
3874
    {
3875
      /* Do not try to express address of an object with computation based
3876
         on address of a different object.  This may cause problems in rtl
3877
         level alias analysis (that does not expect this to be happening,
3878
         as this is illegal in C), and would be unlikely to be useful
3879
         anyway.  */
3880
      if (use->iv->base_object
3881
          && cand->iv->base_object
3882
          && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3883
        return INFTY;
3884
    }
3885
 
3886
  if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3887
    {
3888
      /* TODO -- add direct handling of this case.  */
3889
      goto fallback;
3890
    }
3891
 
3892
  /* CSTEPI is removed from the offset in case statement is after the
3893
     increment.  If the step is not constant, we use zero instead.
3894
     This is a bit imprecise (there is the extra addition), but
3895
     redundancy elimination is likely to transform the code so that
3896
     it uses value of the variable before increment anyway,
3897
     so it is not that much unrealistic.  */
3898
  if (cst_and_fits_in_hwi (cstep))
3899
    cstepi = int_cst_value (cstep);
3900
  else
3901
    cstepi = 0;
3902
 
3903
  if (cst_and_fits_in_hwi (ustep)
3904
      && cst_and_fits_in_hwi (cstep))
3905
    {
3906
      ustepi = int_cst_value (ustep);
3907
 
3908
      if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3909
        return INFTY;
3910
    }
3911
  else
3912
    {
3913
      double_int rat;
3914
 
3915
      if (!constant_multiple_of (ustep, cstep, &rat))
3916
        return INFTY;
3917
 
3918
      if (double_int_fits_in_shwi_p (rat))
3919
        ratio = double_int_to_shwi (rat);
3920
      else
3921
        return INFTY;
3922
    }
3923
 
3924
  /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3925
     or ratio == 1, it is better to handle this like
3926
 
3927
     ubase - ratio * cbase + ratio * var
3928
 
3929
     (also holds in the case ratio == -1, TODO.  */
3930
 
3931
  if (cst_and_fits_in_hwi (cbase))
3932
    {
3933
      offset = - ratio * int_cst_value (cbase);
3934
      cost += difference_cost (data,
3935
                               ubase, integer_zero_node,
3936
                               &symbol_present, &var_present, &offset,
3937
                               depends_on);
3938
    }
3939
  else if (ratio == 1)
3940
    {
3941
      cost += difference_cost (data,
3942
                               ubase, cbase,
3943
                               &symbol_present, &var_present, &offset,
3944
                               depends_on);
3945
    }
3946
  else
3947
    {
3948
      cost += force_var_cost (data, cbase, depends_on);
3949
      cost += add_cost (TYPE_MODE (ctype));
3950
      cost += difference_cost (data,
3951
                               ubase, integer_zero_node,
3952
                               &symbol_present, &var_present, &offset,
3953
                               depends_on);
3954
    }
3955
 
3956
  /* If we are after the increment, the value of the candidate is higher by
3957
     one iteration.  */
3958
  if (stmt_after_increment (data->current_loop, cand, at))
3959
    offset -= ratio * cstepi;
3960
 
3961
  /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3962
     (symbol/var/const parts may be omitted).  If we are looking for an address,
3963
     find the cost of addressing this.  */
3964
  if (address_p)
3965
    return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3966
 
3967
  /* Otherwise estimate the costs for computing the expression.  */
3968
  aratio = ratio > 0 ? ratio : -ratio;
3969
  if (!symbol_present && !var_present && !offset)
3970
    {
3971
      if (ratio != 1)
3972
        cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3973
 
3974
      return cost;
3975
    }
3976
 
3977
  if (aratio != 1)
3978
    cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3979
 
3980
  n_sums = 1;
3981
  if (var_present
3982
      /* Symbol + offset should be compile-time computable.  */
3983
      && (symbol_present || offset))
3984
    n_sums++;
3985
 
3986
  return cost + n_sums * add_cost (TYPE_MODE (ctype));
3987
 
3988
fallback:
3989
  {
3990
    /* Just get the expression, expand it and measure the cost.  */
3991
    tree comp = get_computation_at (data->current_loop, use, cand, at);
3992
 
3993
    if (!comp)
3994
      return INFTY;
3995
 
3996
    if (address_p)
3997
      comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3998
 
3999
    return computation_cost (comp);
4000
  }
4001
}
4002
 
4003
/* Determines the cost of the computation by that USE is expressed
4004
   from induction variable CAND.  If ADDRESS_P is true, we just need
4005
   to create an address from it, otherwise we want to get it into
4006
   register.  A set of invariants we depend on is stored in
4007
   DEPENDS_ON.  */
4008
 
4009
static unsigned
4010
get_computation_cost (struct ivopts_data *data,
4011
                      struct iv_use *use, struct iv_cand *cand,
4012
                      bool address_p, bitmap *depends_on)
4013
{
4014
  return get_computation_cost_at (data,
4015
                                  use, cand, address_p, depends_on, use->stmt);
4016
}
4017
 
4018
/* Determines cost of basing replacement of USE on CAND in a generic
4019
   expression.  */
4020
 
4021
static bool
4022
determine_use_iv_cost_generic (struct ivopts_data *data,
4023
                               struct iv_use *use, struct iv_cand *cand)
4024
{
4025
  bitmap depends_on;
4026
  unsigned cost;
4027
 
4028
  /* The simple case first -- if we need to express value of the preserved
4029
     original biv, the cost is 0.  This also prevents us from counting the
4030
     cost of increment twice -- once at this use and once in the cost of
4031
     the candidate.  */
4032
  if (cand->pos == IP_ORIGINAL
4033
      && cand->incremented_at == use->stmt)
4034
    {
4035
      set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4036
      return true;
4037
    }
4038
 
4039
  cost = get_computation_cost (data, use, cand, false, &depends_on);
4040
  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4041
 
4042
  return cost != INFTY;
4043
}
4044
 
4045
/* Determines cost of basing replacement of USE on CAND in an address.  */
4046
 
4047
static bool
4048
determine_use_iv_cost_address (struct ivopts_data *data,
4049
                               struct iv_use *use, struct iv_cand *cand)
4050
{
4051
  bitmap depends_on;
4052
  unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
4053
 
4054
  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4055
 
4056
  return cost != INFTY;
4057
}
4058
 
4059
/* Computes value of induction variable IV in iteration NITER.  */
4060
 
4061
static tree
4062
iv_value (struct iv *iv, tree niter)
4063
{
4064
  tree val;
4065
  tree type = TREE_TYPE (iv->base);
4066
 
4067
  niter = fold_convert (type, niter);
4068
  val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4069
 
4070
  return fold_build2 (PLUS_EXPR, type, iv->base, val);
4071
}
4072
 
4073
/* Computes value of candidate CAND at position AT in iteration NITER.  */
4074
 
4075
static tree
4076
cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4077
{
4078
  tree val = iv_value (cand->iv, niter);
4079
  tree type = TREE_TYPE (cand->iv->base);
4080
 
4081
  if (stmt_after_increment (loop, cand, at))
4082
    val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4083
 
4084
  return val;
4085
}
4086
 
4087
/* Returns period of induction variable iv.  */
4088
 
4089
static tree
4090
iv_period (struct iv *iv)
4091
{
4092
  tree step = iv->step, period, type;
4093
  tree pow2div;
4094
 
4095
  gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4096
 
4097
  /* Period of the iv is gcd (step, type range).  Since type range is power
4098
     of two, it suffices to determine the maximum power of two that divides
4099
     step.  */
4100
  pow2div = num_ending_zeros (step);
4101
  type = unsigned_type_for (TREE_TYPE (step));
4102
 
4103
  period = build_low_bits_mask (type,
4104
                                (TYPE_PRECISION (type)
4105
                                 - tree_low_cst (pow2div, 1)));
4106
 
4107
  return period;
4108
}
4109
 
4110
/* Returns the comparison operator used when eliminating the iv USE.  */
4111
 
4112
static enum tree_code
4113
iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4114
{
4115
  struct loop *loop = data->current_loop;
4116
  basic_block ex_bb;
4117
  edge exit;
4118
 
4119
  ex_bb = bb_for_stmt (use->stmt);
4120
  exit = EDGE_SUCC (ex_bb, 0);
4121
  if (flow_bb_inside_loop_p (loop, exit->dest))
4122
    exit = EDGE_SUCC (ex_bb, 1);
4123
 
4124
  return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4125
}
4126
 
4127
/* Check whether it is possible to express the condition in USE by comparison
4128
   of candidate CAND.  If so, store the value compared with to BOUND.  */
4129
 
4130
static bool
4131
may_eliminate_iv (struct ivopts_data *data,
4132
                  struct iv_use *use, struct iv_cand *cand, tree *bound)
4133
{
4134
  basic_block ex_bb;
4135
  edge exit;
4136
  tree nit, nit_type;
4137
  tree wider_type, period, per_type;
4138
  struct loop *loop = data->current_loop;
4139
 
4140
  if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4141
    return false;
4142
 
4143
  /* For now works only for exits that dominate the loop latch.  TODO -- extend
4144
     for other conditions inside loop body.  */
4145
  ex_bb = bb_for_stmt (use->stmt);
4146
  if (use->stmt != last_stmt (ex_bb)
4147
      || TREE_CODE (use->stmt) != COND_EXPR)
4148
    return false;
4149
  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4150
    return false;
4151
 
4152
  exit = EDGE_SUCC (ex_bb, 0);
4153
  if (flow_bb_inside_loop_p (loop, exit->dest))
4154
    exit = EDGE_SUCC (ex_bb, 1);
4155
  if (flow_bb_inside_loop_p (loop, exit->dest))
4156
    return false;
4157
 
4158
  nit = niter_for_exit (data, exit);
4159
  if (!nit)
4160
    return false;
4161
 
4162
  nit_type = TREE_TYPE (nit);
4163
 
4164
  /* Determine whether we may use the variable to test whether niter iterations
4165
     elapsed.  This is the case iff the period of the induction variable is
4166
     greater than the number of iterations.  */
4167
  period = iv_period (cand->iv);
4168
  if (!period)
4169
    return false;
4170
  per_type = TREE_TYPE (period);
4171
 
4172
  wider_type = TREE_TYPE (period);
4173
  if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4174
    wider_type = per_type;
4175
  else
4176
    wider_type = nit_type;
4177
 
4178
  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4179
                                      fold_convert (wider_type, period),
4180
                                      fold_convert (wider_type, nit))))
4181
    return false;
4182
 
4183
  *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4184
  return true;
4185
}
4186
 
4187
/* Determines cost of basing replacement of USE on CAND in a condition.  */
4188
 
4189
static bool
4190
determine_use_iv_cost_condition (struct ivopts_data *data,
4191
                                 struct iv_use *use, struct iv_cand *cand)
4192
{
4193
  tree bound = NULL_TREE, op, cond;
4194
  bitmap depends_on = NULL;
4195
  unsigned cost;
4196
 
4197
  /* Only consider real candidates.  */
4198
  if (!cand->iv)
4199
    {
4200
      set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4201
      return false;
4202
    }
4203
 
4204
  if (may_eliminate_iv (data, use, cand, &bound))
4205
    {
4206
      cost = force_var_cost (data, bound, &depends_on);
4207
 
4208
      set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4209
      return cost != INFTY;
4210
    }
4211
 
4212
  /* The induction variable elimination failed; just express the original
4213
     giv.  If it is compared with an invariant, note that we cannot get
4214
     rid of it.  */
4215
  cost = get_computation_cost (data, use, cand, false, &depends_on);
4216
 
4217
  cond = *use->op_p;
4218
  if (TREE_CODE (cond) != SSA_NAME)
4219
    {
4220
      op = TREE_OPERAND (cond, 0);
4221
      if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4222
        op = TREE_OPERAND (cond, 1);
4223
      if (TREE_CODE (op) == SSA_NAME)
4224
        {
4225
          op = get_iv (data, op)->base;
4226
          fd_ivopts_data = data;
4227
          walk_tree (&op, find_depends, &depends_on, NULL);
4228
        }
4229
    }
4230
 
4231
  set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4232
  return cost != INFTY;
4233
}
4234
 
4235
/* Determines cost of basing replacement of USE on CAND.  Returns false
4236
   if USE cannot be based on CAND.  */
4237
 
4238
static bool
4239
determine_use_iv_cost (struct ivopts_data *data,
4240
                       struct iv_use *use, struct iv_cand *cand)
4241
{
4242
  switch (use->type)
4243
    {
4244
    case USE_NONLINEAR_EXPR:
4245
      return determine_use_iv_cost_generic (data, use, cand);
4246
 
4247
    case USE_ADDRESS:
4248
      return determine_use_iv_cost_address (data, use, cand);
4249
 
4250
    case USE_COMPARE:
4251
      return determine_use_iv_cost_condition (data, use, cand);
4252
 
4253
    default:
4254
      gcc_unreachable ();
4255
    }
4256
}
4257
 
4258
/* Determines costs of basing the use of the iv on an iv candidate.  */
4259
 
4260
static void
4261
determine_use_iv_costs (struct ivopts_data *data)
4262
{
4263
  unsigned i, j;
4264
  struct iv_use *use;
4265
  struct iv_cand *cand;
4266
  bitmap to_clear = BITMAP_ALLOC (NULL);
4267
 
4268
  alloc_use_cost_map (data);
4269
 
4270
  for (i = 0; i < n_iv_uses (data); i++)
4271
    {
4272
      use = iv_use (data, i);
4273
 
4274
      if (data->consider_all_candidates)
4275
        {
4276
          for (j = 0; j < n_iv_cands (data); j++)
4277
            {
4278
              cand = iv_cand (data, j);
4279
              determine_use_iv_cost (data, use, cand);
4280
            }
4281
        }
4282
      else
4283
        {
4284
          bitmap_iterator bi;
4285
 
4286
          EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4287
            {
4288
              cand = iv_cand (data, j);
4289
              if (!determine_use_iv_cost (data, use, cand))
4290
                bitmap_set_bit (to_clear, j);
4291
            }
4292
 
4293
          /* Remove the candidates for that the cost is infinite from
4294
             the list of related candidates.  */
4295
          bitmap_and_compl_into (use->related_cands, to_clear);
4296
          bitmap_clear (to_clear);
4297
        }
4298
    }
4299
 
4300
  BITMAP_FREE (to_clear);
4301
 
4302
  if (dump_file && (dump_flags & TDF_DETAILS))
4303
    {
4304
      fprintf (dump_file, "Use-candidate costs:\n");
4305
 
4306
      for (i = 0; i < n_iv_uses (data); i++)
4307
        {
4308
          use = iv_use (data, i);
4309
 
4310
          fprintf (dump_file, "Use %d:\n", i);
4311
          fprintf (dump_file, "  cand\tcost\tdepends on\n");
4312
          for (j = 0; j < use->n_map_members; j++)
4313
            {
4314
              if (!use->cost_map[j].cand
4315
                  || use->cost_map[j].cost == INFTY)
4316
                continue;
4317
 
4318
              fprintf (dump_file, "  %d\t%d\t",
4319
                       use->cost_map[j].cand->id,
4320
                       use->cost_map[j].cost);
4321
              if (use->cost_map[j].depends_on)
4322
                bitmap_print (dump_file,
4323
                              use->cost_map[j].depends_on, "","");
4324
              fprintf (dump_file, "\n");
4325
            }
4326
 
4327
          fprintf (dump_file, "\n");
4328
        }
4329
      fprintf (dump_file, "\n");
4330
    }
4331
}
4332
 
4333
/* Determines cost of the candidate CAND.  */
4334
 
4335
static void
4336
determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4337
{
4338
  unsigned cost_base, cost_step;
4339
  tree base;
4340
 
4341
  if (!cand->iv)
4342
    {
4343
      cand->cost = 0;
4344
      return;
4345
    }
4346
 
4347
  /* There are two costs associated with the candidate -- its increment
4348
     and its initialization.  The second is almost negligible for any loop
4349
     that rolls enough, so we take it just very little into account.  */
4350
 
4351
  base = cand->iv->base;
4352
  cost_base = force_var_cost (data, base, NULL);
4353
  cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4354
 
4355
  cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4356
 
4357
  /* Prefer the original iv unless we may gain something by replacing it;
4358
     this is not really relevant for artificial ivs created by other
4359
     passes.  */
4360
  if (cand->pos == IP_ORIGINAL
4361
      && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4362
    cand->cost--;
4363
 
4364
  /* Prefer not to insert statements into latch unless there are some
4365
     already (so that we do not create unnecessary jumps).  */
4366
  if (cand->pos == IP_END
4367
      && empty_block_p (ip_end_pos (data->current_loop)))
4368
    cand->cost++;
4369
}
4370
 
4371
/* Determines costs of computation of the candidates.  */
4372
 
4373
static void
4374
determine_iv_costs (struct ivopts_data *data)
4375
{
4376
  unsigned i;
4377
 
4378
  if (dump_file && (dump_flags & TDF_DETAILS))
4379
    {
4380
      fprintf (dump_file, "Candidate costs:\n");
4381
      fprintf (dump_file, "  cand\tcost\n");
4382
    }
4383
 
4384
  for (i = 0; i < n_iv_cands (data); i++)
4385
    {
4386
      struct iv_cand *cand = iv_cand (data, i);
4387
 
4388
      determine_iv_cost (data, cand);
4389
 
4390
      if (dump_file && (dump_flags & TDF_DETAILS))
4391
        fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4392
    }
4393
 
4394
if (dump_file && (dump_flags & TDF_DETAILS))
4395
      fprintf (dump_file, "\n");
4396
}
4397
 
4398
/* Calculates cost for having SIZE induction variables.  */
4399
 
4400
static unsigned
4401
ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4402
{
4403
  return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4404
}
4405
 
4406
/* For each size of the induction variable set determine the penalty.  */
4407
 
4408
static void
4409
determine_set_costs (struct ivopts_data *data)
4410
{
4411
  unsigned j, n;
4412
  tree phi, op;
4413
  struct loop *loop = data->current_loop;
4414
  bitmap_iterator bi;
4415
 
4416
  /* We use the following model (definitely improvable, especially the
4417
     cost function -- TODO):
4418
 
4419
     We estimate the number of registers available (using MD data), name it A.
4420
 
4421
     We estimate the number of registers used by the loop, name it U.  This
4422
     number is obtained as the number of loop phi nodes (not counting virtual
4423
     registers and bivs) + the number of variables from outside of the loop.
4424
 
4425
     We set a reserve R (free regs that are used for temporary computations,
4426
     etc.).  For now the reserve is a constant 3.
4427
 
4428
     Let I be the number of induction variables.
4429
 
4430
     -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4431
        make a lot of ivs without a reason).
4432
     -- if A - R < U + I <= A, the cost is I * PRES_COST
4433
     -- if U + I > A, the cost is I * PRES_COST and
4434
        number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4435
 
4436
  if (dump_file && (dump_flags & TDF_DETAILS))
4437
    {
4438
      fprintf (dump_file, "Global costs:\n");
4439
      fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4440
      fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4441
      fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4442
      fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4443
    }
4444
 
4445
  n = 0;
4446
  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4447
    {
4448
      op = PHI_RESULT (phi);
4449
 
4450
      if (!is_gimple_reg (op))
4451
        continue;
4452
 
4453
      if (get_iv (data, op))
4454
        continue;
4455
 
4456
      n++;
4457
    }
4458
 
4459
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4460
    {
4461
      struct version_info *info = ver_info (data, j);
4462
 
4463
      if (info->inv_id && info->has_nonlin_use)
4464
        n++;
4465
    }
4466
 
4467
  data->regs_used = n;
4468
  if (dump_file && (dump_flags & TDF_DETAILS))
4469
    fprintf (dump_file, "  regs_used %d\n", n);
4470
 
4471
  if (dump_file && (dump_flags & TDF_DETAILS))
4472
    {
4473
      fprintf (dump_file, "  cost for size:\n");
4474
      fprintf (dump_file, "  ivs\tcost\n");
4475
      for (j = 0; j <= 2 * target_avail_regs; j++)
4476
        fprintf (dump_file, "  %d\t%d\n", j,
4477
                 ivopts_global_cost_for_size (data, j));
4478
      fprintf (dump_file, "\n");
4479
    }
4480
}
4481
 
4482
/* Returns true if A is a cheaper cost pair than B.  */
4483
 
4484
static bool
4485
cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4486
{
4487
  if (!a)
4488
    return false;
4489
 
4490
  if (!b)
4491
    return true;
4492
 
4493
  if (a->cost < b->cost)
4494
    return true;
4495
 
4496
  if (a->cost > b->cost)
4497
    return false;
4498
 
4499
  /* In case the costs are the same, prefer the cheaper candidate.  */
4500
  if (a->cand->cost < b->cand->cost)
4501
    return true;
4502
 
4503
  return false;
4504
}
4505
 
4506
/* Computes the cost field of IVS structure.  */
4507
 
4508
static void
4509
iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4510
{
4511
  unsigned cost = 0;
4512
 
4513
  cost += ivs->cand_use_cost;
4514
  cost += ivs->cand_cost;
4515
  cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4516
 
4517
  ivs->cost = cost;
4518
}
4519
 
4520
/* Remove invariants in set INVS to set IVS.  */
4521
 
4522
static void
4523
iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4524
{
4525
  bitmap_iterator bi;
4526
  unsigned iid;
4527
 
4528
  if (!invs)
4529
    return;
4530
 
4531
  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4532
    {
4533
      ivs->n_invariant_uses[iid]--;
4534
      if (ivs->n_invariant_uses[iid] == 0)
4535
        ivs->n_regs--;
4536
    }
4537
}
4538
 
4539
/* Set USE not to be expressed by any candidate in IVS.  */
4540
 
4541
static void
4542
iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4543
                 struct iv_use *use)
4544
{
4545
  unsigned uid = use->id, cid;
4546
  struct cost_pair *cp;
4547
 
4548
  cp = ivs->cand_for_use[uid];
4549
  if (!cp)
4550
    return;
4551
  cid = cp->cand->id;
4552
 
4553
  ivs->bad_uses++;
4554
  ivs->cand_for_use[uid] = NULL;
4555
  ivs->n_cand_uses[cid]--;
4556
 
4557
  if (ivs->n_cand_uses[cid] == 0)
4558
    {
4559
      bitmap_clear_bit (ivs->cands, cid);
4560
      /* Do not count the pseudocandidates.  */
4561
      if (cp->cand->iv)
4562
        ivs->n_regs--;
4563
      ivs->n_cands--;
4564
      ivs->cand_cost -= cp->cand->cost;
4565
 
4566
      iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4567
    }
4568
 
4569
  ivs->cand_use_cost -= cp->cost;
4570
 
4571
  iv_ca_set_remove_invariants (ivs, cp->depends_on);
4572
  iv_ca_recount_cost (data, ivs);
4573
}
4574
 
4575
/* Add invariants in set INVS to set IVS.  */
4576
 
4577
static void
4578
iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4579
{
4580
  bitmap_iterator bi;
4581
  unsigned iid;
4582
 
4583
  if (!invs)
4584
    return;
4585
 
4586
  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4587
    {
4588
      ivs->n_invariant_uses[iid]++;
4589
      if (ivs->n_invariant_uses[iid] == 1)
4590
        ivs->n_regs++;
4591
    }
4592
}
4593
 
4594
/* Set cost pair for USE in set IVS to CP.  */
4595
 
4596
static void
4597
iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4598
              struct iv_use *use, struct cost_pair *cp)
4599
{
4600
  unsigned uid = use->id, cid;
4601
 
4602
  if (ivs->cand_for_use[uid] == cp)
4603
    return;
4604
 
4605
  if (ivs->cand_for_use[uid])
4606
    iv_ca_set_no_cp (data, ivs, use);
4607
 
4608
  if (cp)
4609
    {
4610
      cid = cp->cand->id;
4611
 
4612
      ivs->bad_uses--;
4613
      ivs->cand_for_use[uid] = cp;
4614
      ivs->n_cand_uses[cid]++;
4615
      if (ivs->n_cand_uses[cid] == 1)
4616
        {
4617
          bitmap_set_bit (ivs->cands, cid);
4618
          /* Do not count the pseudocandidates.  */
4619
          if (cp->cand->iv)
4620
            ivs->n_regs++;
4621
          ivs->n_cands++;
4622
          ivs->cand_cost += cp->cand->cost;
4623
 
4624
          iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4625
        }
4626
 
4627
      ivs->cand_use_cost += cp->cost;
4628
      iv_ca_set_add_invariants (ivs, cp->depends_on);
4629
      iv_ca_recount_cost (data, ivs);
4630
    }
4631
}
4632
 
4633
/* Extend set IVS by expressing USE by some of the candidates in it
4634
   if possible.  */
4635
 
4636
static void
4637
iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4638
               struct iv_use *use)
4639
{
4640
  struct cost_pair *best_cp = NULL, *cp;
4641
  bitmap_iterator bi;
4642
  unsigned i;
4643
 
4644
  gcc_assert (ivs->upto >= use->id);
4645
 
4646
  if (ivs->upto == use->id)
4647
    {
4648
      ivs->upto++;
4649
      ivs->bad_uses++;
4650
    }
4651
 
4652
  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4653
    {
4654
      cp = get_use_iv_cost (data, use, iv_cand (data, i));
4655
 
4656
      if (cheaper_cost_pair (cp, best_cp))
4657
        best_cp = cp;
4658
    }
4659
 
4660
  iv_ca_set_cp (data, ivs, use, best_cp);
4661
}
4662
 
4663
/* Get cost for assignment IVS.  */
4664
 
4665
static unsigned
4666
iv_ca_cost (struct iv_ca *ivs)
4667
{
4668
  return (ivs->bad_uses ? INFTY : ivs->cost);
4669
}
4670
 
4671
/* Returns true if all dependences of CP are among invariants in IVS.  */
4672
 
4673
static bool
4674
iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4675
{
4676
  unsigned i;
4677
  bitmap_iterator bi;
4678
 
4679
  if (!cp->depends_on)
4680
    return true;
4681
 
4682
  EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4683
    {
4684
      if (ivs->n_invariant_uses[i] == 0)
4685
        return false;
4686
    }
4687
 
4688
  return true;
4689
}
4690
 
4691
/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4692
   it before NEXT_CHANGE.  */
4693
 
4694
static struct iv_ca_delta *
4695
iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4696
                 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4697
{
4698
  struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4699
 
4700
  change->use = use;
4701
  change->old_cp = old_cp;
4702
  change->new_cp = new_cp;
4703
  change->next_change = next_change;
4704
 
4705
  return change;
4706
}
4707
 
4708
/* Joins two lists of changes L1 and L2.  Destructive -- old lists
4709
   are rewritten.  */
4710
 
4711
static struct iv_ca_delta *
4712
iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4713
{
4714
  struct iv_ca_delta *last;
4715
 
4716
  if (!l2)
4717
    return l1;
4718
 
4719
  if (!l1)
4720
    return l2;
4721
 
4722
  for (last = l1; last->next_change; last = last->next_change)
4723
    continue;
4724
  last->next_change = l2;
4725
 
4726
  return l1;
4727
}
4728
 
4729
/* Returns candidate by that USE is expressed in IVS.  */
4730
 
4731
static struct cost_pair *
4732
iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4733
{
4734
  return ivs->cand_for_use[use->id];
4735
}
4736
 
4737
/* Reverse the list of changes DELTA, forming the inverse to it.  */
4738
 
4739
static struct iv_ca_delta *
4740
iv_ca_delta_reverse (struct iv_ca_delta *delta)
4741
{
4742
  struct iv_ca_delta *act, *next, *prev = NULL;
4743
  struct cost_pair *tmp;
4744
 
4745
  for (act = delta; act; act = next)
4746
    {
4747
      next = act->next_change;
4748
      act->next_change = prev;
4749
      prev = act;
4750
 
4751
      tmp = act->old_cp;
4752
      act->old_cp = act->new_cp;
4753
      act->new_cp = tmp;
4754
    }
4755
 
4756
  return prev;
4757
}
4758
 
4759
/* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4760
   reverted instead.  */
4761
 
4762
static void
4763
iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4764
                    struct iv_ca_delta *delta, bool forward)
4765
{
4766
  struct cost_pair *from, *to;
4767
  struct iv_ca_delta *act;
4768
 
4769
  if (!forward)
4770
    delta = iv_ca_delta_reverse (delta);
4771
 
4772
  for (act = delta; act; act = act->next_change)
4773
    {
4774
      from = act->old_cp;
4775
      to = act->new_cp;
4776
      gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4777
      iv_ca_set_cp (data, ivs, act->use, to);
4778
    }
4779
 
4780
  if (!forward)
4781
    iv_ca_delta_reverse (delta);
4782
}
4783
 
4784
/* Returns true if CAND is used in IVS.  */
4785
 
4786
static bool
4787
iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4788
{
4789
  return ivs->n_cand_uses[cand->id] > 0;
4790
}
4791
 
4792
/* Returns number of induction variable candidates in the set IVS.  */
4793
 
4794
static unsigned
4795
iv_ca_n_cands (struct iv_ca *ivs)
4796
{
4797
  return ivs->n_cands;
4798
}
4799
 
4800
/* Free the list of changes DELTA.  */
4801
 
4802
static void
4803
iv_ca_delta_free (struct iv_ca_delta **delta)
4804
{
4805
  struct iv_ca_delta *act, *next;
4806
 
4807
  for (act = *delta; act; act = next)
4808
    {
4809
      next = act->next_change;
4810
      free (act);
4811
    }
4812
 
4813
  *delta = NULL;
4814
}
4815
 
4816
/* Allocates new iv candidates assignment.  */
4817
 
4818
static struct iv_ca *
4819
iv_ca_new (struct ivopts_data *data)
4820
{
4821
  struct iv_ca *nw = XNEW (struct iv_ca);
4822
 
4823
  nw->upto = 0;
4824
  nw->bad_uses = 0;
4825
  nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4826
  nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4827
  nw->cands = BITMAP_ALLOC (NULL);
4828
  nw->n_cands = 0;
4829
  nw->n_regs = 0;
4830
  nw->cand_use_cost = 0;
4831
  nw->cand_cost = 0;
4832
  nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4833
  nw->cost = 0;
4834
 
4835
  return nw;
4836
}
4837
 
4838
/* Free memory occupied by the set IVS.  */
4839
 
4840
static void
4841
iv_ca_free (struct iv_ca **ivs)
4842
{
4843
  free ((*ivs)->cand_for_use);
4844
  free ((*ivs)->n_cand_uses);
4845
  BITMAP_FREE ((*ivs)->cands);
4846
  free ((*ivs)->n_invariant_uses);
4847
  free (*ivs);
4848
  *ivs = NULL;
4849
}
4850
 
4851
/* Dumps IVS to FILE.  */
4852
 
4853
static void
4854
iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4855
{
4856
  const char *pref = "  invariants ";
4857
  unsigned i;
4858
 
4859
  fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4860
  bitmap_print (file, ivs->cands, "  candidates ","\n");
4861
 
4862
  for (i = 1; i <= data->max_inv_id; i++)
4863
    if (ivs->n_invariant_uses[i])
4864
      {
4865
        fprintf (file, "%s%d", pref, i);
4866
        pref = ", ";
4867
      }
4868
  fprintf (file, "\n");
4869
}
4870
 
4871
/* Try changing candidate in IVS to CAND for each use.  Return cost of the
4872
   new set, and store differences in DELTA.  Number of induction variables
4873
   in the new set is stored to N_IVS.  */
4874
 
4875
static unsigned
4876
iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4877
              struct iv_cand *cand, struct iv_ca_delta **delta,
4878
              unsigned *n_ivs)
4879
{
4880
  unsigned i, cost;
4881
  struct iv_use *use;
4882
  struct cost_pair *old_cp, *new_cp;
4883
 
4884
  *delta = NULL;
4885
  for (i = 0; i < ivs->upto; i++)
4886
    {
4887
      use = iv_use (data, i);
4888
      old_cp = iv_ca_cand_for_use (ivs, use);
4889
 
4890
      if (old_cp
4891
          && old_cp->cand == cand)
4892
        continue;
4893
 
4894
      new_cp = get_use_iv_cost (data, use, cand);
4895
      if (!new_cp)
4896
        continue;
4897
 
4898
      if (!iv_ca_has_deps (ivs, new_cp))
4899
        continue;
4900
 
4901
      if (!cheaper_cost_pair (new_cp, old_cp))
4902
        continue;
4903
 
4904
      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4905
    }
4906
 
4907
  iv_ca_delta_commit (data, ivs, *delta, true);
4908
  cost = iv_ca_cost (ivs);
4909
  if (n_ivs)
4910
    *n_ivs = iv_ca_n_cands (ivs);
4911
  iv_ca_delta_commit (data, ivs, *delta, false);
4912
 
4913
  return cost;
4914
}
4915
 
4916
/* Try narrowing set IVS by removing CAND.  Return the cost of
4917
   the new set and store the differences in DELTA.  */
4918
 
4919
static unsigned
4920
iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4921
              struct iv_cand *cand, struct iv_ca_delta **delta)
4922
{
4923
  unsigned i, ci;
4924
  struct iv_use *use;
4925
  struct cost_pair *old_cp, *new_cp, *cp;
4926
  bitmap_iterator bi;
4927
  struct iv_cand *cnd;
4928
  unsigned cost;
4929
 
4930
  *delta = NULL;
4931
  for (i = 0; i < n_iv_uses (data); i++)
4932
    {
4933
      use = iv_use (data, i);
4934
 
4935
      old_cp = iv_ca_cand_for_use (ivs, use);
4936
      if (old_cp->cand != cand)
4937
        continue;
4938
 
4939
      new_cp = NULL;
4940
 
4941
      if (data->consider_all_candidates)
4942
        {
4943
          EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4944
            {
4945
              if (ci == cand->id)
4946
                continue;
4947
 
4948
              cnd = iv_cand (data, ci);
4949
 
4950
              cp = get_use_iv_cost (data, use, cnd);
4951
              if (!cp)
4952
                continue;
4953
              if (!iv_ca_has_deps (ivs, cp))
4954
                continue;
4955
 
4956
              if (!cheaper_cost_pair (cp, new_cp))
4957
                continue;
4958
 
4959
              new_cp = cp;
4960
            }
4961
        }
4962
      else
4963
        {
4964
          EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4965
            {
4966
              if (ci == cand->id)
4967
                continue;
4968
 
4969
              cnd = iv_cand (data, ci);
4970
 
4971
              cp = get_use_iv_cost (data, use, cnd);
4972
              if (!cp)
4973
                continue;
4974
              if (!iv_ca_has_deps (ivs, cp))
4975
                continue;
4976
 
4977
              if (!cheaper_cost_pair (cp, new_cp))
4978
                continue;
4979
 
4980
              new_cp = cp;
4981
            }
4982
        }
4983
 
4984
      if (!new_cp)
4985
        {
4986
          iv_ca_delta_free (delta);
4987
          return INFTY;
4988
        }
4989
 
4990
      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4991
    }
4992
 
4993
  iv_ca_delta_commit (data, ivs, *delta, true);
4994
  cost = iv_ca_cost (ivs);
4995
  iv_ca_delta_commit (data, ivs, *delta, false);
4996
 
4997
  return cost;
4998
}
4999
 
5000
/* Try optimizing the set of candidates IVS by removing candidates different
5001
   from to EXCEPT_CAND from it.  Return cost of the new set, and store
5002
   differences in DELTA.  */
5003
 
5004
static unsigned
5005
iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5006
             struct iv_cand *except_cand, struct iv_ca_delta **delta)
5007
{
5008
  bitmap_iterator bi;
5009
  struct iv_ca_delta *act_delta, *best_delta;
5010
  unsigned i, best_cost, acost;
5011
  struct iv_cand *cand;
5012
 
5013
  best_delta = NULL;
5014
  best_cost = iv_ca_cost (ivs);
5015
 
5016
  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5017
    {
5018
      cand = iv_cand (data, i);
5019
 
5020
      if (cand == except_cand)
5021
        continue;
5022
 
5023
      acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5024
 
5025
      if (acost < best_cost)
5026
        {
5027
          best_cost = acost;
5028
          iv_ca_delta_free (&best_delta);
5029
          best_delta = act_delta;
5030
        }
5031
      else
5032
        iv_ca_delta_free (&act_delta);
5033
    }
5034
 
5035
  if (!best_delta)
5036
    {
5037
      *delta = NULL;
5038
      return best_cost;
5039
    }
5040
 
5041
  /* Recurse to possibly remove other unnecessary ivs.  */
5042
  iv_ca_delta_commit (data, ivs, best_delta, true);
5043
  best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5044
  iv_ca_delta_commit (data, ivs, best_delta, false);
5045
  *delta = iv_ca_delta_join (best_delta, *delta);
5046
  return best_cost;
5047
}
5048
 
5049
/* Tries to extend the sets IVS in the best possible way in order
5050
   to express the USE.  */
5051
 
5052
static bool
5053
try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5054
                  struct iv_use *use)
5055
{
5056
  unsigned best_cost, act_cost;
5057
  unsigned i;
5058
  bitmap_iterator bi;
5059
  struct iv_cand *cand;
5060
  struct iv_ca_delta *best_delta = NULL, *act_delta;
5061
  struct cost_pair *cp;
5062
 
5063
  iv_ca_add_use (data, ivs, use);
5064
  best_cost = iv_ca_cost (ivs);
5065
 
5066
  cp = iv_ca_cand_for_use (ivs, use);
5067
  if (cp)
5068
    {
5069
      best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5070
      iv_ca_set_no_cp (data, ivs, use);
5071
    }
5072
 
5073
  /* First try important candidates.  Only if it fails, try the specific ones.
5074
     Rationale -- in loops with many variables the best choice often is to use
5075
     just one generic biv.  If we added here many ivs specific to the uses,
5076
     the optimization algorithm later would be likely to get stuck in a local
5077
     minimum, thus causing us to create too many ivs.  The approach from
5078
     few ivs to more seems more likely to be successful -- starting from few
5079
     ivs, replacing an expensive use by a specific iv should always be a
5080
     win.  */
5081
  EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5082
    {
5083
      cand = iv_cand (data, i);
5084
 
5085
      if (iv_ca_cand_used_p (ivs, cand))
5086
        continue;
5087
 
5088
      cp = get_use_iv_cost (data, use, cand);
5089
      if (!cp)
5090
        continue;
5091
 
5092
      iv_ca_set_cp (data, ivs, use, cp);
5093
      act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5094
      iv_ca_set_no_cp (data, ivs, use);
5095
      act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5096
 
5097
      if (act_cost < best_cost)
5098
        {
5099
          best_cost = act_cost;
5100
 
5101
          iv_ca_delta_free (&best_delta);
5102
          best_delta = act_delta;
5103
        }
5104
      else
5105
        iv_ca_delta_free (&act_delta);
5106
    }
5107
 
5108
  if (best_cost == INFTY)
5109
    {
5110
      for (i = 0; i < use->n_map_members; i++)
5111
        {
5112
          cp = use->cost_map + i;
5113
          cand = cp->cand;
5114
          if (!cand)
5115
            continue;
5116
 
5117
          /* Already tried this.  */
5118
          if (cand->important)
5119
            continue;
5120
 
5121
          if (iv_ca_cand_used_p (ivs, cand))
5122
            continue;
5123
 
5124
          act_delta = NULL;
5125
          iv_ca_set_cp (data, ivs, use, cp);
5126
          act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5127
          iv_ca_set_no_cp (data, ivs, use);
5128
          act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5129
                                       cp, act_delta);
5130
 
5131
          if (act_cost < best_cost)
5132
            {
5133
              best_cost = act_cost;
5134
 
5135
              if (best_delta)
5136
                iv_ca_delta_free (&best_delta);
5137
              best_delta = act_delta;
5138
            }
5139
          else
5140
            iv_ca_delta_free (&act_delta);
5141
        }
5142
    }
5143
 
5144
  iv_ca_delta_commit (data, ivs, best_delta, true);
5145
  iv_ca_delta_free (&best_delta);
5146
 
5147
  return (best_cost != INFTY);
5148
}
5149
 
5150
/* Finds an initial assignment of candidates to uses.  */
5151
 
5152
static struct iv_ca *
5153
get_initial_solution (struct ivopts_data *data)
5154
{
5155
  struct iv_ca *ivs = iv_ca_new (data);
5156
  unsigned i;
5157
 
5158
  for (i = 0; i < n_iv_uses (data); i++)
5159
    if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5160
      {
5161
        iv_ca_free (&ivs);
5162
        return NULL;
5163
      }
5164
 
5165
  return ivs;
5166
}
5167
 
5168
/* Tries to improve set of induction variables IVS.  */
5169
 
5170
static bool
5171
try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5172
{
5173
  unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5174
  struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5175
  struct iv_cand *cand;
5176
 
5177
  /* Try extending the set of induction variables by one.  */
5178
  for (i = 0; i < n_iv_cands (data); i++)
5179
    {
5180
      cand = iv_cand (data, i);
5181
 
5182
      if (iv_ca_cand_used_p (ivs, cand))
5183
        continue;
5184
 
5185
      acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5186
      if (!act_delta)
5187
        continue;
5188
 
5189
      /* If we successfully added the candidate and the set is small enough,
5190
         try optimizing it by removing other candidates.  */
5191
      if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5192
        {
5193
          iv_ca_delta_commit (data, ivs, act_delta, true);
5194
          acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5195
          iv_ca_delta_commit (data, ivs, act_delta, false);
5196
          act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5197
        }
5198
 
5199
      if (acost < best_cost)
5200
        {
5201
          best_cost = acost;
5202
          iv_ca_delta_free (&best_delta);
5203
          best_delta = act_delta;
5204
        }
5205
      else
5206
        iv_ca_delta_free (&act_delta);
5207
    }
5208
 
5209
  if (!best_delta)
5210
    {
5211
      /* Try removing the candidates from the set instead.  */
5212
      best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5213
 
5214
      /* Nothing more we can do.  */
5215
      if (!best_delta)
5216
        return false;
5217
    }
5218
 
5219
  iv_ca_delta_commit (data, ivs, best_delta, true);
5220
  gcc_assert (best_cost == iv_ca_cost (ivs));
5221
  iv_ca_delta_free (&best_delta);
5222
  return true;
5223
}
5224
 
5225
/* Attempts to find the optimal set of induction variables.  We do simple
5226
   greedy heuristic -- we try to replace at most one candidate in the selected
5227
   solution and remove the unused ivs while this improves the cost.  */
5228
 
5229
static struct iv_ca *
5230
find_optimal_iv_set (struct ivopts_data *data)
5231
{
5232
  unsigned i;
5233
  struct iv_ca *set;
5234
  struct iv_use *use;
5235
 
5236
  /* Get the initial solution.  */
5237
  set = get_initial_solution (data);
5238
  if (!set)
5239
    {
5240
      if (dump_file && (dump_flags & TDF_DETAILS))
5241
        fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5242
      return NULL;
5243
    }
5244
 
5245
  if (dump_file && (dump_flags & TDF_DETAILS))
5246
    {
5247
      fprintf (dump_file, "Initial set of candidates:\n");
5248
      iv_ca_dump (data, dump_file, set);
5249
    }
5250
 
5251
  while (try_improve_iv_set (data, set))
5252
    {
5253
      if (dump_file && (dump_flags & TDF_DETAILS))
5254
        {
5255
          fprintf (dump_file, "Improved to:\n");
5256
          iv_ca_dump (data, dump_file, set);
5257
        }
5258
    }
5259
 
5260
  if (dump_file && (dump_flags & TDF_DETAILS))
5261
    fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5262
 
5263
  for (i = 0; i < n_iv_uses (data); i++)
5264
    {
5265
      use = iv_use (data, i);
5266
      use->selected = iv_ca_cand_for_use (set, use)->cand;
5267
    }
5268
 
5269
  return set;
5270
}
5271
 
5272
/* Creates a new induction variable corresponding to CAND.  */
5273
 
5274
static void
5275
create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5276
{
5277
  block_stmt_iterator incr_pos;
5278
  tree base;
5279
  bool after = false;
5280
 
5281
  if (!cand->iv)
5282
    return;
5283
 
5284
  switch (cand->pos)
5285
    {
5286
    case IP_NORMAL:
5287
      incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5288
      break;
5289
 
5290
    case IP_END:
5291
      incr_pos = bsi_last (ip_end_pos (data->current_loop));
5292
      after = true;
5293
      break;
5294
 
5295
    case IP_ORIGINAL:
5296
      /* Mark that the iv is preserved.  */
5297
      name_info (data, cand->var_before)->preserve_biv = true;
5298
      name_info (data, cand->var_after)->preserve_biv = true;
5299
 
5300
      /* Rewrite the increment so that it uses var_before directly.  */
5301
      find_interesting_uses_op (data, cand->var_after)->selected = cand;
5302
 
5303
      return;
5304
    }
5305
 
5306
  gimple_add_tmp_var (cand->var_before);
5307
  add_referenced_var (cand->var_before);
5308
 
5309
  base = unshare_expr (cand->iv->base);
5310
 
5311
  create_iv (base, unshare_expr (cand->iv->step),
5312
             cand->var_before, data->current_loop,
5313
             &incr_pos, after, &cand->var_before, &cand->var_after);
5314
}
5315
 
5316
/* Creates new induction variables described in SET.  */
5317
 
5318
static void
5319
create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5320
{
5321
  unsigned i;
5322
  struct iv_cand *cand;
5323
  bitmap_iterator bi;
5324
 
5325
  EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5326
    {
5327
      cand = iv_cand (data, i);
5328
      create_new_iv (data, cand);
5329
    }
5330
}
5331
 
5332
/* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5333
   is true, remove also the ssa name defined by the statement.  */
5334
 
5335
static void
5336
remove_statement (tree stmt, bool including_defined_name)
5337
{
5338
  if (TREE_CODE (stmt) == PHI_NODE)
5339
    {
5340
      if (!including_defined_name)
5341
        {
5342
          /* Prevent the ssa name defined by the statement from being removed.  */
5343
          SET_PHI_RESULT (stmt, NULL);
5344
        }
5345
      remove_phi_node (stmt, NULL_TREE);
5346
    }
5347
  else
5348
    {
5349
      block_stmt_iterator bsi = bsi_for_stmt (stmt);
5350
 
5351
      bsi_remove (&bsi, true);
5352
    }
5353
}
5354
 
5355
/* Rewrites USE (definition of iv used in a nonlinear expression)
5356
   using candidate CAND.  */
5357
 
5358
static void
5359
rewrite_use_nonlinear_expr (struct ivopts_data *data,
5360
                            struct iv_use *use, struct iv_cand *cand)
5361
{
5362
  tree comp;
5363
  tree op, stmts, tgt, ass;
5364
  block_stmt_iterator bsi, pbsi;
5365
 
5366
  /* An important special case -- if we are asked to express value of
5367
     the original iv by itself, just exit; there is no need to
5368
     introduce a new computation (that might also need casting the
5369
     variable to unsigned and back).  */
5370
  if (cand->pos == IP_ORIGINAL
5371
      && cand->incremented_at == use->stmt)
5372
    {
5373
      tree step, ctype, utype;
5374
      enum tree_code incr_code = PLUS_EXPR;
5375
 
5376
      gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5377
      gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5378
 
5379
      step = cand->iv->step;
5380
      ctype = TREE_TYPE (step);
5381
      utype = TREE_TYPE (cand->var_after);
5382
      if (TREE_CODE (step) == NEGATE_EXPR)
5383
        {
5384
          incr_code = MINUS_EXPR;
5385
          step = TREE_OPERAND (step, 0);
5386
        }
5387
 
5388
      /* Check whether we may leave the computation unchanged.
5389
         This is the case only if it does not rely on other
5390
         computations in the loop -- otherwise, the computation
5391
         we rely upon may be removed in remove_unused_ivs,
5392
         thus leading to ICE.  */
5393
      op = TREE_OPERAND (use->stmt, 1);
5394
      if (TREE_CODE (op) == PLUS_EXPR
5395
          || TREE_CODE (op) == MINUS_EXPR)
5396
        {
5397
          if (TREE_OPERAND (op, 0) == cand->var_before)
5398
            op = TREE_OPERAND (op, 1);
5399
          else if (TREE_CODE (op) == PLUS_EXPR
5400
                   && TREE_OPERAND (op, 1) == cand->var_before)
5401
            op = TREE_OPERAND (op, 0);
5402
          else
5403
            op = NULL_TREE;
5404
        }
5405
      else
5406
        op = NULL_TREE;
5407
 
5408
      if (op
5409
          && (TREE_CODE (op) == INTEGER_CST
5410
              || operand_equal_p (op, step, 0)))
5411
        return;
5412
 
5413
      /* Otherwise, add the necessary computations to express
5414
         the iv.  */
5415
      op = fold_convert (ctype, cand->var_before);
5416
      comp = fold_convert (utype,
5417
                           build2 (incr_code, ctype, op,
5418
                                   unshare_expr (step)));
5419
    }
5420
  else
5421
    comp = get_computation (data->current_loop, use, cand);
5422
 
5423
  switch (TREE_CODE (use->stmt))
5424
    {
5425
    case PHI_NODE:
5426
      tgt = PHI_RESULT (use->stmt);
5427
 
5428
      /* If we should keep the biv, do not replace it.  */
5429
      if (name_info (data, tgt)->preserve_biv)
5430
        return;
5431
 
5432
      pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5433
      while (!bsi_end_p (pbsi)
5434
             && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5435
        {
5436
          bsi = pbsi;
5437
          bsi_next (&pbsi);
5438
        }
5439
      break;
5440
 
5441
    case MODIFY_EXPR:
5442
      tgt = TREE_OPERAND (use->stmt, 0);
5443
      bsi = bsi_for_stmt (use->stmt);
5444
      break;
5445
 
5446
    default:
5447
      gcc_unreachable ();
5448
    }
5449
 
5450
  op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5451
 
5452
  if (TREE_CODE (use->stmt) == PHI_NODE)
5453
    {
5454
      if (stmts)
5455
        bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5456
      ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5457
      bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5458
      remove_statement (use->stmt, false);
5459
      SSA_NAME_DEF_STMT (tgt) = ass;
5460
    }
5461
  else
5462
    {
5463
      if (stmts)
5464
        bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5465
      TREE_OPERAND (use->stmt, 1) = op;
5466
    }
5467
}
5468
 
5469
/* Replaces ssa name in index IDX by its basic variable.  Callback for
5470
   for_each_index.  */
5471
 
5472
static bool
5473
idx_remove_ssa_names (tree base, tree *idx,
5474
                      void *data ATTRIBUTE_UNUSED)
5475
{
5476
  tree *op;
5477
 
5478
  if (TREE_CODE (*idx) == SSA_NAME)
5479
    *idx = SSA_NAME_VAR (*idx);
5480
 
5481
  if (TREE_CODE (base) == ARRAY_REF)
5482
    {
5483
      op = &TREE_OPERAND (base, 2);
5484
      if (*op
5485
          && TREE_CODE (*op) == SSA_NAME)
5486
        *op = SSA_NAME_VAR (*op);
5487
      op = &TREE_OPERAND (base, 3);
5488
      if (*op
5489
          && TREE_CODE (*op) == SSA_NAME)
5490
        *op = SSA_NAME_VAR (*op);
5491
    }
5492
 
5493
  return true;
5494
}
5495
 
5496
/* Unshares REF and replaces ssa names inside it by their basic variables.  */
5497
 
5498
static tree
5499
unshare_and_remove_ssa_names (tree ref)
5500
{
5501
  ref = unshare_expr (ref);
5502
  for_each_index (&ref, idx_remove_ssa_names, NULL);
5503
 
5504
  return ref;
5505
}
5506
 
5507
/* Extract the alias analysis info for the memory reference REF.  There are
5508
   several ways how this information may be stored and what precisely is
5509
   its semantics depending on the type of the reference, but there always is
5510
   somewhere hidden one _DECL node that is used to determine the set of
5511
   virtual operands for the reference.  The code below deciphers this jungle
5512
   and extracts this single useful piece of information.  */
5513
 
5514
static tree
5515
get_ref_tag (tree ref, tree orig)
5516
{
5517
  tree var = get_base_address (ref);
5518
  tree aref = NULL_TREE, tag, sv;
5519
  HOST_WIDE_INT offset, size, maxsize;
5520
 
5521
  for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5522
    {
5523
      aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5524
      if (ref)
5525
        break;
5526
    }
5527
 
5528
  if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5529
    return unshare_expr (sv);
5530
 
5531
  if (!var)
5532
    return NULL_TREE;
5533
 
5534
  if (TREE_CODE (var) == INDIRECT_REF)
5535
    {
5536
      /* If the base is a dereference of a pointer, first check its name memory
5537
         tag.  If it does not have one, use its symbol memory tag.  */
5538
      var = TREE_OPERAND (var, 0);
5539
      if (TREE_CODE (var) != SSA_NAME)
5540
        return NULL_TREE;
5541
 
5542
      if (SSA_NAME_PTR_INFO (var))
5543
        {
5544
          tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5545
          if (tag)
5546
            return tag;
5547
        }
5548
 
5549
      var = SSA_NAME_VAR (var);
5550
      tag = var_ann (var)->symbol_mem_tag;
5551
      gcc_assert (tag != NULL_TREE);
5552
      return tag;
5553
    }
5554
  else
5555
    {
5556
      if (!DECL_P (var))
5557
        return NULL_TREE;
5558
 
5559
      tag = var_ann (var)->symbol_mem_tag;
5560
      if (tag)
5561
        return tag;
5562
 
5563
      return var;
5564
    }
5565
}
5566
 
5567
/* Copies the reference information from OLD_REF to NEW_REF.  */
5568
 
5569
static void
5570
copy_ref_info (tree new_ref, tree old_ref)
5571
{
5572
  if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5573
    copy_mem_ref_info (new_ref, old_ref);
5574
  else
5575
    {
5576
      TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5577
      TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5578
    }
5579
}
5580
 
5581
/* Rewrites USE (address that is an iv) using candidate CAND.  */
5582
 
5583
static void
5584
rewrite_use_address (struct ivopts_data *data,
5585
                     struct iv_use *use, struct iv_cand *cand)
5586
{
5587
  struct affine_tree_combination aff;
5588
  block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5589
  tree ref;
5590
 
5591
  get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5592
  unshare_aff_combination (&aff);
5593
 
5594
  ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5595
  copy_ref_info (ref, *use->op_p);
5596
  *use->op_p = ref;
5597
}
5598
 
5599
/* Rewrites USE (the condition such that one of the arguments is an iv) using
5600
   candidate CAND.  */
5601
 
5602
static void
5603
rewrite_use_compare (struct ivopts_data *data,
5604
                     struct iv_use *use, struct iv_cand *cand)
5605
{
5606
  tree comp;
5607
  tree *op_p, cond, op, stmts, bound;
5608
  block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5609
  enum tree_code compare;
5610
  struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5611
 
5612
  bound = cp->value;
5613
  if (bound)
5614
    {
5615
      tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5616
      tree var_type = TREE_TYPE (var);
5617
 
5618
      compare = iv_elimination_compare (data, use);
5619
      bound = fold_convert (var_type, bound);
5620
      op = force_gimple_operand (unshare_expr (bound), &stmts,
5621
                                 true, NULL_TREE);
5622
 
5623
      if (stmts)
5624
        bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5625
 
5626
      *use->op_p = build2 (compare, boolean_type_node, var, op);
5627
      update_stmt (use->stmt);
5628
      return;
5629
    }
5630
 
5631
  /* The induction variable elimination failed; just express the original
5632
     giv.  */
5633
  comp = get_computation (data->current_loop, use, cand);
5634
 
5635
  cond = *use->op_p;
5636
  op_p = &TREE_OPERAND (cond, 0);
5637
  if (TREE_CODE (*op_p) != SSA_NAME
5638
      || zero_p (get_iv (data, *op_p)->step))
5639
    op_p = &TREE_OPERAND (cond, 1);
5640
 
5641
  op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5642
  if (stmts)
5643
    bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5644
 
5645
  *op_p = op;
5646
}
5647
 
5648
/* Rewrites USE using candidate CAND.  */
5649
 
5650
static void
5651
rewrite_use (struct ivopts_data *data,
5652
             struct iv_use *use, struct iv_cand *cand)
5653
{
5654
  switch (use->type)
5655
    {
5656
      case USE_NONLINEAR_EXPR:
5657
        rewrite_use_nonlinear_expr (data, use, cand);
5658
        break;
5659
 
5660
      case USE_ADDRESS:
5661
        rewrite_use_address (data, use, cand);
5662
        break;
5663
 
5664
      case USE_COMPARE:
5665
        rewrite_use_compare (data, use, cand);
5666
        break;
5667
 
5668
      default:
5669
        gcc_unreachable ();
5670
    }
5671
  mark_new_vars_to_rename (use->stmt);
5672
}
5673
 
5674
/* Rewrite the uses using the selected induction variables.  */
5675
 
5676
static void
5677
rewrite_uses (struct ivopts_data *data)
5678
{
5679
  unsigned i;
5680
  struct iv_cand *cand;
5681
  struct iv_use *use;
5682
 
5683
  for (i = 0; i < n_iv_uses (data); i++)
5684
    {
5685
      use = iv_use (data, i);
5686
      cand = use->selected;
5687
      gcc_assert (cand);
5688
 
5689
      rewrite_use (data, use, cand);
5690
    }
5691
}
5692
 
5693
/* Removes the ivs that are not used after rewriting.  */
5694
 
5695
static void
5696
remove_unused_ivs (struct ivopts_data *data)
5697
{
5698
  unsigned j;
5699
  bitmap_iterator bi;
5700
 
5701
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5702
    {
5703
      struct version_info *info;
5704
 
5705
      info = ver_info (data, j);
5706
      if (info->iv
5707
          && !zero_p (info->iv->step)
5708
          && !info->inv_id
5709
          && !info->iv->have_use_for
5710
          && !info->preserve_biv)
5711
        remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5712
    }
5713
}
5714
 
5715
/* Frees data allocated by the optimization of a single loop.  */
5716
 
5717
static void
5718
free_loop_data (struct ivopts_data *data)
5719
{
5720
  unsigned i, j;
5721
  bitmap_iterator bi;
5722
  tree obj;
5723
 
5724
  htab_empty (data->niters);
5725
 
5726
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5727
    {
5728
      struct version_info *info;
5729
 
5730
      info = ver_info (data, i);
5731
      if (info->iv)
5732
        free (info->iv);
5733
      info->iv = NULL;
5734
      info->has_nonlin_use = false;
5735
      info->preserve_biv = false;
5736
      info->inv_id = 0;
5737
    }
5738
  bitmap_clear (data->relevant);
5739
  bitmap_clear (data->important_candidates);
5740
 
5741
  for (i = 0; i < n_iv_uses (data); i++)
5742
    {
5743
      struct iv_use *use = iv_use (data, i);
5744
 
5745
      free (use->iv);
5746
      BITMAP_FREE (use->related_cands);
5747
      for (j = 0; j < use->n_map_members; j++)
5748
        if (use->cost_map[j].depends_on)
5749
          BITMAP_FREE (use->cost_map[j].depends_on);
5750
      free (use->cost_map);
5751
      free (use);
5752
    }
5753
  VEC_truncate (iv_use_p, data->iv_uses, 0);
5754
 
5755
  for (i = 0; i < n_iv_cands (data); i++)
5756
    {
5757
      struct iv_cand *cand = iv_cand (data, i);
5758
 
5759
      if (cand->iv)
5760
        free (cand->iv);
5761
      if (cand->depends_on)
5762
        BITMAP_FREE (cand->depends_on);
5763
      free (cand);
5764
    }
5765
  VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5766
 
5767
  if (data->version_info_size < num_ssa_names)
5768
    {
5769
      data->version_info_size = 2 * num_ssa_names;
5770
      free (data->version_info);
5771
      data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5772
    }
5773
 
5774
  data->max_inv_id = 0;
5775
 
5776
  for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5777
    SET_DECL_RTL (obj, NULL_RTX);
5778
 
5779
  VEC_truncate (tree, decl_rtl_to_reset, 0);
5780
}
5781
 
5782
/* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5783
   loop tree.  */
5784
 
5785
static void
5786
tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5787
{
5788
  free_loop_data (data);
5789
  free (data->version_info);
5790
  BITMAP_FREE (data->relevant);
5791
  BITMAP_FREE (data->important_candidates);
5792
  htab_delete (data->niters);
5793
 
5794
  VEC_free (tree, heap, decl_rtl_to_reset);
5795
  VEC_free (iv_use_p, heap, data->iv_uses);
5796
  VEC_free (iv_cand_p, heap, data->iv_candidates);
5797
}
5798
 
5799
/* Optimizes the LOOP.  Returns true if anything changed.  */
5800
 
5801
static bool
5802
tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5803
{
5804
  bool changed = false;
5805
  struct iv_ca *iv_ca;
5806
  edge exit;
5807
 
5808
  data->current_loop = loop;
5809
 
5810
  if (dump_file && (dump_flags & TDF_DETAILS))
5811
    {
5812
      fprintf (dump_file, "Processing loop %d\n", loop->num);
5813
 
5814
      exit = single_dom_exit (loop);
5815
      if (exit)
5816
        {
5817
          fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5818
                   exit->src->index, exit->dest->index);
5819
          print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5820
          fprintf (dump_file, "\n");
5821
        }
5822
 
5823
      fprintf (dump_file, "\n");
5824
    }
5825
 
5826
  /* For each ssa name determines whether it behaves as an induction variable
5827
     in some loop.  */
5828
  if (!find_induction_variables (data))
5829
    goto finish;
5830
 
5831
  /* Finds interesting uses (item 1).  */
5832
  find_interesting_uses (data);
5833
  if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5834
    goto finish;
5835
 
5836
  /* Finds candidates for the induction variables (item 2).  */
5837
  find_iv_candidates (data);
5838
 
5839
  /* Calculates the costs (item 3, part 1).  */
5840
  determine_use_iv_costs (data);
5841
  determine_iv_costs (data);
5842
  determine_set_costs (data);
5843
 
5844
  /* Find the optimal set of induction variables (item 3, part 2).  */
5845
  iv_ca = find_optimal_iv_set (data);
5846
  if (!iv_ca)
5847
    goto finish;
5848
  changed = true;
5849
 
5850
  /* Create the new induction variables (item 4, part 1).  */
5851
  create_new_ivs (data, iv_ca);
5852
  iv_ca_free (&iv_ca);
5853
 
5854
  /* Rewrite the uses (item 4, part 2).  */
5855
  rewrite_uses (data);
5856
 
5857
  /* Remove the ivs that are unused after rewriting.  */
5858
  remove_unused_ivs (data);
5859
 
5860
  /* We have changed the structure of induction variables; it might happen
5861
     that definitions in the scev database refer to some of them that were
5862
     eliminated.  */
5863
  scev_reset ();
5864
 
5865
finish:
5866
  free_loop_data (data);
5867
 
5868
  return changed;
5869
}
5870
 
5871
/* Main entry point.  Optimizes induction variables in LOOPS.  */
5872
 
5873
void
5874
tree_ssa_iv_optimize (struct loops *loops)
5875
{
5876
  struct loop *loop;
5877
  struct ivopts_data data;
5878
 
5879
  tree_ssa_iv_optimize_init (&data);
5880
 
5881
  /* Optimize the loops starting with the innermost ones.  */
5882
  loop = loops->tree_root;
5883
  while (loop->inner)
5884
    loop = loop->inner;
5885
 
5886
  /* Scan the loops, inner ones first.  */
5887
  while (loop != loops->tree_root)
5888
    {
5889
      if (dump_file && (dump_flags & TDF_DETAILS))
5890
        flow_loop_dump (loop, dump_file, NULL, 1);
5891
 
5892
      tree_ssa_iv_optimize_loop (&data, loop);
5893
 
5894
      if (loop->next)
5895
        {
5896
          loop = loop->next;
5897
          while (loop->inner)
5898
            loop = loop->inner;
5899
        }
5900
      else
5901
        loop = loop->outer;
5902
    }
5903
 
5904
  tree_ssa_iv_optimize_finalize (&data);
5905
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.