OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [tree-ssa-loop-ivopts.c] - Blame information for rev 861

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Induction variable optimizations.
2
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3
   Free Software Foundation, Inc.
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it
8
under the terms of the GNU General Public License as published by the
9
Free Software Foundation; either version 3, or (at your option) any
10
later version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT
13
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
/* This pass tries to find the optimal set of induction variables for the loop.
22
   It optimizes just the basic linear induction variables (although adding
23
   support for other types should not be too hard).  It includes the
24
   optimizations commonly known as strength reduction, induction variable
25
   coalescing and induction variable elimination.  It does it in the
26
   following steps:
27
 
28
   1) The interesting uses of induction variables are found.  This includes
29
 
30
      -- uses of induction variables in non-linear expressions
31
      -- addresses of arrays
32
      -- comparisons of induction variables
33
 
34
   2) Candidates for the induction variables are found.  This includes
35
 
36
      -- old induction variables
37
      -- the variables defined by expressions derived from the "interesting
38
         uses" above
39
 
40
   3) The optimal (w.r. to a cost function) set of variables is chosen.  The
41
      cost function assigns a cost to sets of induction variables and consists
42
      of three parts:
43
 
44
      -- The use costs.  Each of the interesting uses chooses the best induction
45
         variable in the set and adds its cost to the sum.  The cost reflects
46
         the time spent on modifying the induction variables value to be usable
47
         for the given purpose (adding base and offset for arrays, etc.).
48
      -- The variable costs.  Each of the variables has a cost assigned that
49
         reflects the costs associated with incrementing the value of the
50
         variable.  The original variables are somewhat preferred.
51
      -- The set cost.  Depending on the size of the set, extra cost may be
52
         added to reflect register pressure.
53
 
54
      All the costs are defined in a machine-specific way, using the target
55
      hooks and machine descriptions to determine them.
56
 
57
   4) The trees are transformed to use the new variables, the dead code is
58
      removed.
59
 
60
   All of this is done loop by loop.  Doing it globally is theoretically
61
   possible, it might give a better performance and it might enable us
62
   to decide costs more precisely, but getting all the interactions right
63
   would be complicated.  */
64
 
65
#include "config.h"
66
#include "system.h"
67
#include "coretypes.h"
68
#include "tm.h"
69
#include "tree.h"
70
#include "rtl.h"
71
#include "tm_p.h"
72
#include "hard-reg-set.h"
73
#include "basic-block.h"
74
#include "output.h"
75
#include "diagnostic.h"
76
#include "tree-flow.h"
77
#include "tree-dump.h"
78
#include "timevar.h"
79
#include "cfgloop.h"
80
#include "varray.h"
81
#include "expr.h"
82
#include "tree-pass.h"
83
#include "ggc.h"
84
#include "insn-config.h"
85
#include "recog.h"
86
#include "pointer-set.h"
87
#include "hashtab.h"
88
#include "tree-chrec.h"
89
#include "tree-scalar-evolution.h"
90
#include "cfgloop.h"
91
#include "params.h"
92
#include "langhooks.h"
93
#include "tree-affine.h"
94
#include "target.h"
95
 
96
/* The infinite cost.  */
97
#define INFTY 10000000
98
 
99
/* The expected number of loop iterations.  TODO -- use profiling instead of
100
   this.  */
101
#define AVG_LOOP_NITER(LOOP) 5
102
 
103
 
104
/* Representation of the induction variable.  */
105
struct iv
106
{
107
  tree base;            /* Initial value of the iv.  */
108
  tree base_object;     /* A memory object to that the induction variable points.  */
109
  tree step;            /* Step of the iv (constant only).  */
110
  tree ssa_name;        /* The ssa name with the value.  */
111
  bool biv_p;           /* Is it a biv?  */
112
  bool have_use_for;    /* Do we already have a use for it?  */
113
  unsigned use_id;      /* The identifier in the use if it is the case.  */
114
};
115
 
116
/* Per-ssa version information (induction variable descriptions, etc.).  */
117
struct version_info
118
{
119
  tree name;            /* The ssa name.  */
120
  struct iv *iv;        /* Induction variable description.  */
121
  bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
122
                           an expression that is not an induction variable.  */
123
  unsigned inv_id;      /* Id of an invariant.  */
124
  bool preserve_biv;    /* For the original biv, whether to preserve it.  */
125
};
126
 
127
/* Types of uses.  */
128
enum use_type
129
{
130
  USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
131
  USE_ADDRESS,          /* Use in an address.  */
132
  USE_COMPARE           /* Use is a compare.  */
133
};
134
 
135
/* Cost of a computation.  */
136
typedef struct
137
{
138
  int cost;             /* The runtime cost.  */
139
  unsigned complexity;  /* The estimate of the complexity of the code for
140
                           the computation (in no concrete units --
141
                           complexity field should be larger for more
142
                           complex expressions and addressing modes).  */
143
} comp_cost;
144
 
145
static const comp_cost zero_cost = {0, 0};
146
static const comp_cost infinite_cost = {INFTY, INFTY};
147
 
148
/* The candidate - cost pair.  */
149
struct cost_pair
150
{
151
  struct iv_cand *cand; /* The candidate.  */
152
  comp_cost cost;       /* The cost.  */
153
  bitmap depends_on;    /* The list of invariants that have to be
154
                           preserved.  */
155
  tree value;           /* For final value elimination, the expression for
156
                           the final value of the iv.  For iv elimination,
157
                           the new bound to compare with.  */
158
};
159
 
160
/* Use.  */
161
struct iv_use
162
{
163
  unsigned id;          /* The id of the use.  */
164
  enum use_type type;   /* Type of the use.  */
165
  struct iv *iv;        /* The induction variable it is based on.  */
166
  gimple stmt;          /* Statement in that it occurs.  */
167
  tree *op_p;           /* The place where it occurs.  */
168
  bitmap related_cands; /* The set of "related" iv candidates, plus the common
169
                           important ones.  */
170
 
171
  unsigned n_map_members; /* Number of candidates in the cost_map list.  */
172
  struct cost_pair *cost_map;
173
                        /* The costs wrto the iv candidates.  */
174
 
175
  struct iv_cand *selected;
176
                        /* The selected candidate.  */
177
};
178
 
179
/* The position where the iv is computed.  */
180
enum iv_position
181
{
182
  IP_NORMAL,            /* At the end, just before the exit condition.  */
183
  IP_END,               /* At the end of the latch block.  */
184
  IP_BEFORE_USE,        /* Immediately before a specific use.  */
185
  IP_AFTER_USE,         /* Immediately after a specific use.  */
186
  IP_ORIGINAL           /* The original biv.  */
187
};
188
 
189
/* The induction variable candidate.  */
190
struct iv_cand
191
{
192
  unsigned id;          /* The number of the candidate.  */
193
  bool important;       /* Whether this is an "important" candidate, i.e. such
194
                           that it should be considered by all uses.  */
195
  enum iv_position pos; /* Where it is computed.  */
196
  gimple incremented_at;/* For original biv, the statement where it is
197
                           incremented.  */
198
  tree var_before;      /* The variable used for it before increment.  */
199
  tree var_after;       /* The variable used for it after increment.  */
200
  struct iv *iv;        /* The value of the candidate.  NULL for
201
                           "pseudocandidate" used to indicate the possibility
202
                           to replace the final value of an iv by direct
203
                           computation of the value.  */
204
  unsigned cost;        /* Cost of the candidate.  */
205
  unsigned cost_step;   /* Cost of the candidate's increment operation.  */
206
  struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
207
                              where it is incremented.  */
208
  bitmap depends_on;    /* The list of invariants that are used in step of the
209
                           biv.  */
210
};
211
 
212
/* The data used by the induction variable optimizations.  */
213
 
214
typedef struct iv_use *iv_use_p;
215
DEF_VEC_P(iv_use_p);
216
DEF_VEC_ALLOC_P(iv_use_p,heap);
217
 
218
typedef struct iv_cand *iv_cand_p;
219
DEF_VEC_P(iv_cand_p);
220
DEF_VEC_ALLOC_P(iv_cand_p,heap);
221
 
222
struct ivopts_data
223
{
224
  /* The currently optimized loop.  */
225
  struct loop *current_loop;
226
 
227
  /* Numbers of iterations for all exits of the current loop.  */
228
  struct pointer_map_t *niters;
229
 
230
  /* Number of registers used in it.  */
231
  unsigned regs_used;
232
 
233
  /* The size of version_info array allocated.  */
234
  unsigned version_info_size;
235
 
236
  /* The array of information for the ssa names.  */
237
  struct version_info *version_info;
238
 
239
  /* The bitmap of indices in version_info whose value was changed.  */
240
  bitmap relevant;
241
 
242
  /* The uses of induction variables.  */
243
  VEC(iv_use_p,heap) *iv_uses;
244
 
245
  /* The candidates.  */
246
  VEC(iv_cand_p,heap) *iv_candidates;
247
 
248
  /* A bitmap of important candidates.  */
249
  bitmap important_candidates;
250
 
251
  /* The maximum invariant id.  */
252
  unsigned max_inv_id;
253
 
254
  /* Whether to consider just related and important candidates when replacing a
255
     use.  */
256
  bool consider_all_candidates;
257
 
258
  /* Are we optimizing for speed?  */
259
  bool speed;
260
};
261
 
262
/* An assignment of iv candidates to uses.  */
263
 
264
struct iv_ca
265
{
266
  /* The number of uses covered by the assignment.  */
267
  unsigned upto;
268
 
269
  /* Number of uses that cannot be expressed by the candidates in the set.  */
270
  unsigned bad_uses;
271
 
272
  /* Candidate assigned to a use, together with the related costs.  */
273
  struct cost_pair **cand_for_use;
274
 
275
  /* Number of times each candidate is used.  */
276
  unsigned *n_cand_uses;
277
 
278
  /* The candidates used.  */
279
  bitmap cands;
280
 
281
  /* The number of candidates in the set.  */
282
  unsigned n_cands;
283
 
284
  /* Total number of registers needed.  */
285
  unsigned n_regs;
286
 
287
  /* Total cost of expressing uses.  */
288
  comp_cost cand_use_cost;
289
 
290
  /* Total cost of candidates.  */
291
  unsigned cand_cost;
292
 
293
  /* Number of times each invariant is used.  */
294
  unsigned *n_invariant_uses;
295
 
296
  /* Total cost of the assignment.  */
297
  comp_cost cost;
298
};
299
 
300
/* Difference of two iv candidate assignments.  */
301
 
302
struct iv_ca_delta
303
{
304
  /* Changed use.  */
305
  struct iv_use *use;
306
 
307
  /* An old assignment (for rollback purposes).  */
308
  struct cost_pair *old_cp;
309
 
310
  /* A new assignment.  */
311
  struct cost_pair *new_cp;
312
 
313
  /* Next change in the list.  */
314
  struct iv_ca_delta *next_change;
315
};
316
 
317
/* Bound on number of candidates below that all candidates are considered.  */
318
 
319
#define CONSIDER_ALL_CANDIDATES_BOUND \
320
  ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
321
 
322
/* If there are more iv occurrences, we just give up (it is quite unlikely that
323
   optimizing such a loop would help, and it would take ages).  */
324
 
325
#define MAX_CONSIDERED_USES \
326
  ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
327
 
328
/* If there are at most this number of ivs in the set, try removing unnecessary
329
   ivs from the set always.  */
330
 
331
#define ALWAYS_PRUNE_CAND_SET_BOUND \
332
  ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
333
 
334
/* The list of trees for that the decl_rtl field must be reset is stored
335
   here.  */
336
 
337
static VEC(tree,heap) *decl_rtl_to_reset;
338
 
339
/* Number of uses recorded in DATA.  */
340
 
341
static inline unsigned
342
n_iv_uses (struct ivopts_data *data)
343
{
344
  return VEC_length (iv_use_p, data->iv_uses);
345
}
346
 
347
/* Ith use recorded in DATA.  */
348
 
349
static inline struct iv_use *
350
iv_use (struct ivopts_data *data, unsigned i)
351
{
352
  return VEC_index (iv_use_p, data->iv_uses, i);
353
}
354
 
355
/* Number of candidates recorded in DATA.  */
356
 
357
static inline unsigned
358
n_iv_cands (struct ivopts_data *data)
359
{
360
  return VEC_length (iv_cand_p, data->iv_candidates);
361
}
362
 
363
/* Ith candidate recorded in DATA.  */
364
 
365
static inline struct iv_cand *
366
iv_cand (struct ivopts_data *data, unsigned i)
367
{
368
  return VEC_index (iv_cand_p, data->iv_candidates, i);
369
}
370
 
371
/* The single loop exit if it dominates the latch, NULL otherwise.  */
372
 
373
edge
374
single_dom_exit (struct loop *loop)
375
{
376
  edge exit = single_exit (loop);
377
 
378
  if (!exit)
379
    return NULL;
380
 
381
  if (!just_once_each_iteration_p (loop, exit->src))
382
    return NULL;
383
 
384
  return exit;
385
}
386
 
387
/* Dumps information about the induction variable IV to FILE.  */
388
 
389
extern void dump_iv (FILE *, struct iv *);
390
void
391
dump_iv (FILE *file, struct iv *iv)
392
{
393
  if (iv->ssa_name)
394
    {
395
      fprintf (file, "ssa name ");
396
      print_generic_expr (file, iv->ssa_name, TDF_SLIM);
397
      fprintf (file, "\n");
398
    }
399
 
400
  fprintf (file, "  type ");
401
  print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
402
  fprintf (file, "\n");
403
 
404
  if (iv->step)
405
    {
406
      fprintf (file, "  base ");
407
      print_generic_expr (file, iv->base, TDF_SLIM);
408
      fprintf (file, "\n");
409
 
410
      fprintf (file, "  step ");
411
      print_generic_expr (file, iv->step, TDF_SLIM);
412
      fprintf (file, "\n");
413
    }
414
  else
415
    {
416
      fprintf (file, "  invariant ");
417
      print_generic_expr (file, iv->base, TDF_SLIM);
418
      fprintf (file, "\n");
419
    }
420
 
421
  if (iv->base_object)
422
    {
423
      fprintf (file, "  base object ");
424
      print_generic_expr (file, iv->base_object, TDF_SLIM);
425
      fprintf (file, "\n");
426
    }
427
 
428
  if (iv->biv_p)
429
    fprintf (file, "  is a biv\n");
430
}
431
 
432
/* Dumps information about the USE to FILE.  */
433
 
434
extern void dump_use (FILE *, struct iv_use *);
435
void
436
dump_use (FILE *file, struct iv_use *use)
437
{
438
  fprintf (file, "use %d\n", use->id);
439
 
440
  switch (use->type)
441
    {
442
    case USE_NONLINEAR_EXPR:
443
      fprintf (file, "  generic\n");
444
      break;
445
 
446
    case USE_ADDRESS:
447
      fprintf (file, "  address\n");
448
      break;
449
 
450
    case USE_COMPARE:
451
      fprintf (file, "  compare\n");
452
      break;
453
 
454
    default:
455
      gcc_unreachable ();
456
    }
457
 
458
  fprintf (file, "  in statement ");
459
  print_gimple_stmt (file, use->stmt, 0, 0);
460
  fprintf (file, "\n");
461
 
462
  fprintf (file, "  at position ");
463
  if (use->op_p)
464
    print_generic_expr (file, *use->op_p, TDF_SLIM);
465
  fprintf (file, "\n");
466
 
467
  dump_iv (file, use->iv);
468
 
469
  if (use->related_cands)
470
    {
471
      fprintf (file, "  related candidates ");
472
      dump_bitmap (file, use->related_cands);
473
    }
474
}
475
 
476
/* Dumps information about the uses to FILE.  */
477
 
478
extern void dump_uses (FILE *, struct ivopts_data *);
479
void
480
dump_uses (FILE *file, struct ivopts_data *data)
481
{
482
  unsigned i;
483
  struct iv_use *use;
484
 
485
  for (i = 0; i < n_iv_uses (data); i++)
486
    {
487
      use = iv_use (data, i);
488
 
489
      dump_use (file, use);
490
      fprintf (file, "\n");
491
    }
492
}
493
 
494
/* Dumps information about induction variable candidate CAND to FILE.  */
495
 
496
extern void dump_cand (FILE *, struct iv_cand *);
497
void
498
dump_cand (FILE *file, struct iv_cand *cand)
499
{
500
  struct iv *iv = cand->iv;
501
 
502
  fprintf (file, "candidate %d%s\n",
503
           cand->id, cand->important ? " (important)" : "");
504
 
505
  if (cand->depends_on)
506
    {
507
      fprintf (file, "  depends on ");
508
      dump_bitmap (file, cand->depends_on);
509
    }
510
 
511
  if (!iv)
512
    {
513
      fprintf (file, "  final value replacement\n");
514
      return;
515
    }
516
 
517
  switch (cand->pos)
518
    {
519
    case IP_NORMAL:
520
      fprintf (file, "  incremented before exit test\n");
521
      break;
522
 
523
    case IP_BEFORE_USE:
524
      fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
525
      break;
526
 
527
    case IP_AFTER_USE:
528
      fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
529
      break;
530
 
531
    case IP_END:
532
      fprintf (file, "  incremented at end\n");
533
      break;
534
 
535
    case IP_ORIGINAL:
536
      fprintf (file, "  original biv\n");
537
      break;
538
    }
539
 
540
  dump_iv (file, iv);
541
}
542
 
543
/* Returns the info for ssa version VER.  */
544
 
545
static inline struct version_info *
546
ver_info (struct ivopts_data *data, unsigned ver)
547
{
548
  return data->version_info + ver;
549
}
550
 
551
/* Returns the info for ssa name NAME.  */
552
 
553
static inline struct version_info *
554
name_info (struct ivopts_data *data, tree name)
555
{
556
  return ver_info (data, SSA_NAME_VERSION (name));
557
}
558
 
559
/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
560
   emitted in LOOP.  */
561
 
562
static bool
563
stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
564
{
565
  basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
566
 
567
  gcc_assert (bb);
568
 
569
  if (sbb == loop->latch)
570
    return true;
571
 
572
  if (sbb != bb)
573
    return false;
574
 
575
  return stmt == last_stmt (bb);
576
}
577
 
578
/* Returns true if STMT if after the place where the original induction
579
   variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
580
   if the positions are identical.  */
581
 
582
static bool
583
stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
584
{
585
  basic_block cand_bb = gimple_bb (cand->incremented_at);
586
  basic_block stmt_bb = gimple_bb (stmt);
587
 
588
  if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
589
    return false;
590
 
591
  if (stmt_bb != cand_bb)
592
    return true;
593
 
594
  if (true_if_equal
595
      && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
596
    return true;
597
  return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
598
}
599
 
600
/* Returns true if STMT if after the place where the induction variable
601
   CAND is incremented in LOOP.  */
602
 
603
static bool
604
stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
605
{
606
  switch (cand->pos)
607
    {
608
    case IP_END:
609
      return false;
610
 
611
    case IP_NORMAL:
612
      return stmt_after_ip_normal_pos (loop, stmt);
613
 
614
    case IP_ORIGINAL:
615
    case IP_AFTER_USE:
616
      return stmt_after_inc_pos (cand, stmt, false);
617
 
618
    case IP_BEFORE_USE:
619
      return stmt_after_inc_pos (cand, stmt, true);
620
 
621
    default:
622
      gcc_unreachable ();
623
    }
624
}
625
 
626
/* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
627
 
628
static bool
629
abnormal_ssa_name_p (tree exp)
630
{
631
  if (!exp)
632
    return false;
633
 
634
  if (TREE_CODE (exp) != SSA_NAME)
635
    return false;
636
 
637
  return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
638
}
639
 
640
/* Returns false if BASE or INDEX contains a ssa name that occurs in an
641
   abnormal phi node.  Callback for for_each_index.  */
642
 
643
static bool
644
idx_contains_abnormal_ssa_name_p (tree base, tree *index,
645
                                  void *data ATTRIBUTE_UNUSED)
646
{
647
  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
648
    {
649
      if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
650
        return false;
651
      if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
652
        return false;
653
    }
654
 
655
  return !abnormal_ssa_name_p (*index);
656
}
657
 
658
/* Returns true if EXPR contains a ssa name that occurs in an
659
   abnormal phi node.  */
660
 
661
bool
662
contains_abnormal_ssa_name_p (tree expr)
663
{
664
  enum tree_code code;
665
  enum tree_code_class codeclass;
666
 
667
  if (!expr)
668
    return false;
669
 
670
  code = TREE_CODE (expr);
671
  codeclass = TREE_CODE_CLASS (code);
672
 
673
  if (code == SSA_NAME)
674
    return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
675
 
676
  if (code == INTEGER_CST
677
      || is_gimple_min_invariant (expr))
678
    return false;
679
 
680
  if (code == ADDR_EXPR)
681
    return !for_each_index (&TREE_OPERAND (expr, 0),
682
                            idx_contains_abnormal_ssa_name_p,
683
                            NULL);
684
 
685
  switch (codeclass)
686
    {
687
    case tcc_binary:
688
    case tcc_comparison:
689
      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
690
        return true;
691
 
692
      /* Fallthru.  */
693
    case tcc_unary:
694
      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
695
        return true;
696
 
697
      break;
698
 
699
    default:
700
      gcc_unreachable ();
701
    }
702
 
703
  return false;
704
}
705
 
706
/*  Returns tree describing number of iterations determined from
707
    EXIT of DATA->current_loop, or NULL if something goes wrong.  */
708
 
709
static tree
710
niter_for_exit (struct ivopts_data *data, edge exit)
711
{
712
  struct tree_niter_desc desc;
713
  tree niter;
714
  void **slot;
715
 
716
  if (!data->niters)
717
    {
718
      data->niters = pointer_map_create ();
719
      slot = NULL;
720
    }
721
  else
722
    slot = pointer_map_contains (data->niters, exit);
723
 
724
  if (!slot)
725
    {
726
      /* Try to determine number of iterations.  We must know it
727
         unconditionally (i.e., without possibility of # of iterations
728
         being zero).  Also, we cannot safely work with ssa names that
729
         appear in phi nodes on abnormal edges, so that we do not create
730
         overlapping life ranges for them (PR 27283).  */
731
      if (number_of_iterations_exit (data->current_loop,
732
                                     exit, &desc, true)
733
          && integer_zerop (desc.may_be_zero)
734
          && !contains_abnormal_ssa_name_p (desc.niter))
735
        niter = desc.niter;
736
      else
737
        niter = NULL_TREE;
738
 
739
      *pointer_map_insert (data->niters, exit) = niter;
740
    }
741
  else
742
    niter = (tree) *slot;
743
 
744
  return niter;
745
}
746
 
747
/* Returns tree describing number of iterations determined from
748
   single dominating exit of DATA->current_loop, or NULL if something
749
   goes wrong.  */
750
 
751
static tree
752
niter_for_single_dom_exit (struct ivopts_data *data)
753
{
754
  edge exit = single_dom_exit (data->current_loop);
755
 
756
  if (!exit)
757
    return NULL;
758
 
759
  return niter_for_exit (data, exit);
760
}
761
 
762
/* Initializes data structures used by the iv optimization pass, stored
763
   in DATA.  */
764
 
765
static void
766
tree_ssa_iv_optimize_init (struct ivopts_data *data)
767
{
768
  data->version_info_size = 2 * num_ssa_names;
769
  data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770
  data->relevant = BITMAP_ALLOC (NULL);
771
  data->important_candidates = BITMAP_ALLOC (NULL);
772
  data->max_inv_id = 0;
773
  data->niters = NULL;
774
  data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775
  data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776
  decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
777
}
778
 
779
/* Returns a memory object to that EXPR points.  In case we are able to
780
   determine that it does not point to any such object, NULL is returned.  */
781
 
782
static tree
783
determine_base_object (tree expr)
784
{
785
  enum tree_code code = TREE_CODE (expr);
786
  tree base, obj;
787
 
788
  /* If this is a pointer casted to any type, we need to determine
789
     the base object for the pointer; so handle conversions before
790
     throwing away non-pointer expressions.  */
791
  if (CONVERT_EXPR_P (expr))
792
    return determine_base_object (TREE_OPERAND (expr, 0));
793
 
794
  if (!POINTER_TYPE_P (TREE_TYPE (expr)))
795
    return NULL_TREE;
796
 
797
  switch (code)
798
    {
799
    case INTEGER_CST:
800
      return NULL_TREE;
801
 
802
    case ADDR_EXPR:
803
      obj = TREE_OPERAND (expr, 0);
804
      base = get_base_address (obj);
805
 
806
      if (!base)
807
        return expr;
808
 
809
      if (TREE_CODE (base) == INDIRECT_REF)
810
        return determine_base_object (TREE_OPERAND (base, 0));
811
 
812
      return fold_convert (ptr_type_node,
813
                           build_fold_addr_expr (base));
814
 
815
    case POINTER_PLUS_EXPR:
816
      return determine_base_object (TREE_OPERAND (expr, 0));
817
 
818
    case PLUS_EXPR:
819
    case MINUS_EXPR:
820
      /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
821
      gcc_unreachable ();
822
 
823
    default:
824
      return fold_convert (ptr_type_node, expr);
825
    }
826
}
827
 
828
/* Allocates an induction variable with given initial value BASE and step STEP
829
   for loop LOOP.  */
830
 
831
static struct iv *
832
alloc_iv (tree base, tree step)
833
{
834
  struct iv *iv = XCNEW (struct iv);
835
  gcc_assert (step != NULL_TREE);
836
 
837
  iv->base = base;
838
  iv->base_object = determine_base_object (base);
839
  iv->step = step;
840
  iv->biv_p = false;
841
  iv->have_use_for = false;
842
  iv->use_id = 0;
843
  iv->ssa_name = NULL_TREE;
844
 
845
  return iv;
846
}
847
 
848
/* Sets STEP and BASE for induction variable IV.  */
849
 
850
static void
851
set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
852
{
853
  struct version_info *info = name_info (data, iv);
854
 
855
  gcc_assert (!info->iv);
856
 
857
  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
858
  info->iv = alloc_iv (base, step);
859
  info->iv->ssa_name = iv;
860
}
861
 
862
/* Finds induction variable declaration for VAR.  */
863
 
864
static struct iv *
865
get_iv (struct ivopts_data *data, tree var)
866
{
867
  basic_block bb;
868
  tree type = TREE_TYPE (var);
869
 
870
  if (!POINTER_TYPE_P (type)
871
      && !INTEGRAL_TYPE_P (type))
872
    return NULL;
873
 
874
  if (!name_info (data, var)->iv)
875
    {
876
      bb = gimple_bb (SSA_NAME_DEF_STMT (var));
877
 
878
      if (!bb
879
          || !flow_bb_inside_loop_p (data->current_loop, bb))
880
        set_iv (data, var, var, build_int_cst (type, 0));
881
    }
882
 
883
  return name_info (data, var)->iv;
884
}
885
 
886
/* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
887
   not define a simple affine biv with nonzero step.  */
888
 
889
static tree
890
determine_biv_step (gimple phi)
891
{
892
  struct loop *loop = gimple_bb (phi)->loop_father;
893
  tree name = PHI_RESULT (phi);
894
  affine_iv iv;
895
 
896
  if (!is_gimple_reg (name))
897
    return NULL_TREE;
898
 
899
  if (!simple_iv (loop, loop, name, &iv, true))
900
    return NULL_TREE;
901
 
902
  return integer_zerop (iv.step) ? NULL_TREE : iv.step;
903
}
904
 
905
/* Finds basic ivs.  */
906
 
907
static bool
908
find_bivs (struct ivopts_data *data)
909
{
910
  gimple phi;
911
  tree step, type, base;
912
  bool found = false;
913
  struct loop *loop = data->current_loop;
914
  gimple_stmt_iterator psi;
915
 
916
  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
917
    {
918
      phi = gsi_stmt (psi);
919
 
920
      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
921
        continue;
922
 
923
      step = determine_biv_step (phi);
924
      if (!step)
925
        continue;
926
 
927
      base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
928
      base = expand_simple_operations (base);
929
      if (contains_abnormal_ssa_name_p (base)
930
          || contains_abnormal_ssa_name_p (step))
931
        continue;
932
 
933
      type = TREE_TYPE (PHI_RESULT (phi));
934
      base = fold_convert (type, base);
935
      if (step)
936
        {
937
          if (POINTER_TYPE_P (type))
938
            step = fold_convert (sizetype, step);
939
          else
940
            step = fold_convert (type, step);
941
        }
942
 
943
      set_iv (data, PHI_RESULT (phi), base, step);
944
      found = true;
945
    }
946
 
947
  return found;
948
}
949
 
950
/* Marks basic ivs.  */
951
 
952
static void
953
mark_bivs (struct ivopts_data *data)
954
{
955
  gimple phi;
956
  tree var;
957
  struct iv *iv, *incr_iv;
958
  struct loop *loop = data->current_loop;
959
  basic_block incr_bb;
960
  gimple_stmt_iterator psi;
961
 
962
  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
963
    {
964
      phi = gsi_stmt (psi);
965
 
966
      iv = get_iv (data, PHI_RESULT (phi));
967
      if (!iv)
968
        continue;
969
 
970
      var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
971
      incr_iv = get_iv (data, var);
972
      if (!incr_iv)
973
        continue;
974
 
975
      /* If the increment is in the subloop, ignore it.  */
976
      incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
977
      if (incr_bb->loop_father != data->current_loop
978
          || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
979
        continue;
980
 
981
      iv->biv_p = true;
982
      incr_iv->biv_p = true;
983
    }
984
}
985
 
986
/* Checks whether STMT defines a linear induction variable and stores its
987
   parameters to IV.  */
988
 
989
static bool
990
find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
991
{
992
  tree lhs;
993
  struct loop *loop = data->current_loop;
994
 
995
  iv->base = NULL_TREE;
996
  iv->step = NULL_TREE;
997
 
998
  if (gimple_code (stmt) != GIMPLE_ASSIGN)
999
    return false;
1000
 
1001
  lhs = gimple_assign_lhs (stmt);
1002
  if (TREE_CODE (lhs) != SSA_NAME)
1003
    return false;
1004
 
1005
  if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1006
    return false;
1007
  iv->base = expand_simple_operations (iv->base);
1008
 
1009
  if (contains_abnormal_ssa_name_p (iv->base)
1010
      || contains_abnormal_ssa_name_p (iv->step))
1011
    return false;
1012
 
1013
  return true;
1014
}
1015
 
1016
/* Finds general ivs in statement STMT.  */
1017
 
1018
static void
1019
find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1020
{
1021
  affine_iv iv;
1022
 
1023
  if (!find_givs_in_stmt_scev (data, stmt, &iv))
1024
    return;
1025
 
1026
  set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1027
}
1028
 
1029
/* Finds general ivs in basic block BB.  */
1030
 
1031
static void
1032
find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1033
{
1034
  gimple_stmt_iterator bsi;
1035
 
1036
  for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1037
    find_givs_in_stmt (data, gsi_stmt (bsi));
1038
}
1039
 
1040
/* Finds general ivs.  */
1041
 
1042
static void
1043
find_givs (struct ivopts_data *data)
1044
{
1045
  struct loop *loop = data->current_loop;
1046
  basic_block *body = get_loop_body_in_dom_order (loop);
1047
  unsigned i;
1048
 
1049
  for (i = 0; i < loop->num_nodes; i++)
1050
    find_givs_in_bb (data, body[i]);
1051
  free (body);
1052
}
1053
 
1054
/* For each ssa name defined in LOOP determines whether it is an induction
1055
   variable and if so, its initial value and step.  */
1056
 
1057
static bool
1058
find_induction_variables (struct ivopts_data *data)
1059
{
1060
  unsigned i;
1061
  bitmap_iterator bi;
1062
 
1063
  if (!find_bivs (data))
1064
    return false;
1065
 
1066
  find_givs (data);
1067
  mark_bivs (data);
1068
 
1069
  if (dump_file && (dump_flags & TDF_DETAILS))
1070
    {
1071
      tree niter = niter_for_single_dom_exit (data);
1072
 
1073
      if (niter)
1074
        {
1075
          fprintf (dump_file, "  number of iterations ");
1076
          print_generic_expr (dump_file, niter, TDF_SLIM);
1077
          fprintf (dump_file, "\n\n");
1078
        };
1079
 
1080
      fprintf (dump_file, "Induction variables:\n\n");
1081
 
1082
      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1083
        {
1084
          if (ver_info (data, i)->iv)
1085
            dump_iv (dump_file, ver_info (data, i)->iv);
1086
        }
1087
    }
1088
 
1089
  return true;
1090
}
1091
 
1092
/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1093
 
1094
static struct iv_use *
1095
record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1096
            gimple stmt, enum use_type use_type)
1097
{
1098
  struct iv_use *use = XCNEW (struct iv_use);
1099
 
1100
  use->id = n_iv_uses (data);
1101
  use->type = use_type;
1102
  use->iv = iv;
1103
  use->stmt = stmt;
1104
  use->op_p = use_p;
1105
  use->related_cands = BITMAP_ALLOC (NULL);
1106
 
1107
  /* To avoid showing ssa name in the dumps, if it was not reset by the
1108
     caller.  */
1109
  iv->ssa_name = NULL_TREE;
1110
 
1111
  if (dump_file && (dump_flags & TDF_DETAILS))
1112
    dump_use (dump_file, use);
1113
 
1114
  VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1115
 
1116
  return use;
1117
}
1118
 
1119
/* Checks whether OP is a loop-level invariant and if so, records it.
1120
   NONLINEAR_USE is true if the invariant is used in a way we do not
1121
   handle specially.  */
1122
 
1123
static void
1124
record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1125
{
1126
  basic_block bb;
1127
  struct version_info *info;
1128
 
1129
  if (TREE_CODE (op) != SSA_NAME
1130
      || !is_gimple_reg (op))
1131
    return;
1132
 
1133
  bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1134
  if (bb
1135
      && flow_bb_inside_loop_p (data->current_loop, bb))
1136
    return;
1137
 
1138
  info = name_info (data, op);
1139
  info->name = op;
1140
  info->has_nonlin_use |= nonlinear_use;
1141
  if (!info->inv_id)
1142
    info->inv_id = ++data->max_inv_id;
1143
  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1144
}
1145
 
1146
/* Checks whether the use OP is interesting and if so, records it.  */
1147
 
1148
static struct iv_use *
1149
find_interesting_uses_op (struct ivopts_data *data, tree op)
1150
{
1151
  struct iv *iv;
1152
  struct iv *civ;
1153
  gimple stmt;
1154
  struct iv_use *use;
1155
 
1156
  if (TREE_CODE (op) != SSA_NAME)
1157
    return NULL;
1158
 
1159
  iv = get_iv (data, op);
1160
  if (!iv)
1161
    return NULL;
1162
 
1163
  if (iv->have_use_for)
1164
    {
1165
      use = iv_use (data, iv->use_id);
1166
 
1167
      gcc_assert (use->type == USE_NONLINEAR_EXPR);
1168
      return use;
1169
    }
1170
 
1171
  if (integer_zerop (iv->step))
1172
    {
1173
      record_invariant (data, op, true);
1174
      return NULL;
1175
    }
1176
  iv->have_use_for = true;
1177
 
1178
  civ = XNEW (struct iv);
1179
  *civ = *iv;
1180
 
1181
  stmt = SSA_NAME_DEF_STMT (op);
1182
  gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1183
              || is_gimple_assign (stmt));
1184
 
1185
  use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1186
  iv->use_id = use->id;
1187
 
1188
  return use;
1189
}
1190
 
1191
/* Given a condition in statement STMT, checks whether it is a compare
1192
   of an induction variable and an invariant.  If this is the case,
1193
   CONTROL_VAR is set to location of the iv, BOUND to the location of
1194
   the invariant, IV_VAR and IV_BOUND are set to the corresponding
1195
   induction variable descriptions, and true is returned.  If this is not
1196
   the case, CONTROL_VAR and BOUND are set to the arguments of the
1197
   condition and false is returned.  */
1198
 
1199
static bool
1200
extract_cond_operands (struct ivopts_data *data, gimple stmt,
1201
                       tree **control_var, tree **bound,
1202
                       struct iv **iv_var, struct iv **iv_bound)
1203
{
1204
  /* The objects returned when COND has constant operands.  */
1205
  static struct iv const_iv;
1206
  static tree zero;
1207
  tree *op0 = &zero, *op1 = &zero, *tmp_op;
1208
  struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1209
  bool ret = false;
1210
 
1211
  if (gimple_code (stmt) == GIMPLE_COND)
1212
    {
1213
      op0 = gimple_cond_lhs_ptr (stmt);
1214
      op1 = gimple_cond_rhs_ptr (stmt);
1215
    }
1216
  else
1217
    {
1218
      op0 = gimple_assign_rhs1_ptr (stmt);
1219
      op1 = gimple_assign_rhs2_ptr (stmt);
1220
    }
1221
 
1222
  zero = integer_zero_node;
1223
  const_iv.step = integer_zero_node;
1224
 
1225
  if (TREE_CODE (*op0) == SSA_NAME)
1226
    iv0 = get_iv (data, *op0);
1227
  if (TREE_CODE (*op1) == SSA_NAME)
1228
    iv1 = get_iv (data, *op1);
1229
 
1230
  /* Exactly one of the compared values must be an iv, and the other one must
1231
     be an invariant.  */
1232
  if (!iv0 || !iv1)
1233
    goto end;
1234
 
1235
  if (integer_zerop (iv0->step))
1236
    {
1237
      /* Control variable may be on the other side.  */
1238
      tmp_op = op0; op0 = op1; op1 = tmp_op;
1239
      tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1240
    }
1241
  ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1242
 
1243
end:
1244
  if (control_var)
1245
    *control_var = op0;;
1246
  if (iv_var)
1247
    *iv_var = iv0;;
1248
  if (bound)
1249
    *bound = op1;
1250
  if (iv_bound)
1251
    *iv_bound = iv1;
1252
 
1253
  return ret;
1254
}
1255
 
1256
/* Checks whether the condition in STMT is interesting and if so,
1257
   records it.  */
1258
 
1259
static void
1260
find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1261
{
1262
  tree *var_p, *bound_p;
1263
  struct iv *var_iv, *civ;
1264
 
1265
  if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1266
    {
1267
      find_interesting_uses_op (data, *var_p);
1268
      find_interesting_uses_op (data, *bound_p);
1269
      return;
1270
    }
1271
 
1272
  civ = XNEW (struct iv);
1273
  *civ = *var_iv;
1274
  record_use (data, NULL, civ, stmt, USE_COMPARE);
1275
}
1276
 
1277
/* Returns true if expression EXPR is obviously invariant in LOOP,
1278
   i.e. if all its operands are defined outside of the LOOP.  LOOP
1279
   should not be the function body.  */
1280
 
1281
bool
1282
expr_invariant_in_loop_p (struct loop *loop, tree expr)
1283
{
1284
  basic_block def_bb;
1285
  unsigned i, len;
1286
 
1287
  gcc_assert (loop_depth (loop) > 0);
1288
 
1289
  if (is_gimple_min_invariant (expr))
1290
    return true;
1291
 
1292
  if (TREE_CODE (expr) == SSA_NAME)
1293
    {
1294
      def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1295
      if (def_bb
1296
          && flow_bb_inside_loop_p (loop, def_bb))
1297
        return false;
1298
 
1299
      return true;
1300
    }
1301
 
1302
  if (!EXPR_P (expr))
1303
    return false;
1304
 
1305
  len = TREE_OPERAND_LENGTH (expr);
1306
  for (i = 0; i < len; i++)
1307
    if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1308
      return false;
1309
 
1310
  return true;
1311
}
1312
 
1313
/* Returns true if statement STMT is obviously invariant in LOOP,
1314
   i.e. if all its operands on the RHS are defined outside of the LOOP.
1315
   LOOP should not be the function body.  */
1316
 
1317
bool
1318
stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1319
{
1320
  unsigned i;
1321
  tree lhs;
1322
 
1323
  gcc_assert (loop_depth (loop) > 0);
1324
 
1325
  lhs = gimple_get_lhs (stmt);
1326
  for (i = 0; i < gimple_num_ops (stmt); i++)
1327
    {
1328
      tree op = gimple_op (stmt, i);
1329
      if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1330
        return false;
1331
    }
1332
 
1333
  return true;
1334
}
1335
 
1336
/* Cumulates the steps of indices into DATA and replaces their values with the
1337
   initial ones.  Returns false when the value of the index cannot be determined.
1338
   Callback for for_each_index.  */
1339
 
1340
struct ifs_ivopts_data
1341
{
1342
  struct ivopts_data *ivopts_data;
1343
  gimple stmt;
1344
  tree step;
1345
};
1346
 
1347
static bool
1348
idx_find_step (tree base, tree *idx, void *data)
1349
{
1350
  struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1351
  struct iv *iv;
1352
  tree step, iv_base, iv_step, lbound, off;
1353
  struct loop *loop = dta->ivopts_data->current_loop;
1354
 
1355
  if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1356
      || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1357
    return false;
1358
 
1359
  /* If base is a component ref, require that the offset of the reference
1360
     be invariant.  */
1361
  if (TREE_CODE (base) == COMPONENT_REF)
1362
    {
1363
      off = component_ref_field_offset (base);
1364
      return expr_invariant_in_loop_p (loop, off);
1365
    }
1366
 
1367
  /* If base is array, first check whether we will be able to move the
1368
     reference out of the loop (in order to take its address in strength
1369
     reduction).  In order for this to work we need both lower bound
1370
     and step to be loop invariants.  */
1371
  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1372
    {
1373
      /* Moreover, for a range, the size needs to be invariant as well.  */
1374
      if (TREE_CODE (base) == ARRAY_RANGE_REF
1375
          && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1376
        return false;
1377
 
1378
      step = array_ref_element_size (base);
1379
      lbound = array_ref_low_bound (base);
1380
 
1381
      if (!expr_invariant_in_loop_p (loop, step)
1382
          || !expr_invariant_in_loop_p (loop, lbound))
1383
        return false;
1384
    }
1385
 
1386
  if (TREE_CODE (*idx) != SSA_NAME)
1387
    return true;
1388
 
1389
  iv = get_iv (dta->ivopts_data, *idx);
1390
  if (!iv)
1391
    return false;
1392
 
1393
  /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1394
          *&x[0], which is not folded and does not trigger the
1395
          ARRAY_REF path below.  */
1396
  *idx = iv->base;
1397
 
1398
  if (integer_zerop (iv->step))
1399
    return true;
1400
 
1401
  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1402
    {
1403
      step = array_ref_element_size (base);
1404
 
1405
      /* We only handle addresses whose step is an integer constant.  */
1406
      if (TREE_CODE (step) != INTEGER_CST)
1407
        return false;
1408
    }
1409
  else
1410
    /* The step for pointer arithmetics already is 1 byte.  */
1411
    step = build_int_cst (sizetype, 1);
1412
 
1413
  iv_base = iv->base;
1414
  iv_step = iv->step;
1415
  if (!convert_affine_scev (dta->ivopts_data->current_loop,
1416
                            sizetype, &iv_base, &iv_step, dta->stmt,
1417
                            false))
1418
    {
1419
      /* The index might wrap.  */
1420
      return false;
1421
    }
1422
 
1423
  step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1424
  dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1425
 
1426
  return true;
1427
}
1428
 
1429
/* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1430
   object is passed to it in DATA.  */
1431
 
1432
static bool
1433
idx_record_use (tree base, tree *idx,
1434
                void *vdata)
1435
{
1436
  struct ivopts_data *data = (struct ivopts_data *) vdata;
1437
  find_interesting_uses_op (data, *idx);
1438
  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1439
    {
1440
      find_interesting_uses_op (data, array_ref_element_size (base));
1441
      find_interesting_uses_op (data, array_ref_low_bound (base));
1442
    }
1443
  return true;
1444
}
1445
 
1446
/* If we can prove that TOP = cst * BOT for some constant cst,
1447
   store cst to MUL and return true.  Otherwise return false.
1448
   The returned value is always sign-extended, regardless of the
1449
   signedness of TOP and BOT.  */
1450
 
1451
static bool
1452
constant_multiple_of (tree top, tree bot, double_int *mul)
1453
{
1454
  tree mby;
1455
  enum tree_code code;
1456
  double_int res, p0, p1;
1457
  unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1458
 
1459
  STRIP_NOPS (top);
1460
  STRIP_NOPS (bot);
1461
 
1462
  if (operand_equal_p (top, bot, 0))
1463
    {
1464
      *mul = double_int_one;
1465
      return true;
1466
    }
1467
 
1468
  code = TREE_CODE (top);
1469
  switch (code)
1470
    {
1471
    case MULT_EXPR:
1472
      mby = TREE_OPERAND (top, 1);
1473
      if (TREE_CODE (mby) != INTEGER_CST)
1474
        return false;
1475
 
1476
      if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1477
        return false;
1478
 
1479
      *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1480
                              precision);
1481
      return true;
1482
 
1483
    case PLUS_EXPR:
1484
    case MINUS_EXPR:
1485
      if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1486
          || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1487
        return false;
1488
 
1489
      if (code == MINUS_EXPR)
1490
        p1 = double_int_neg (p1);
1491
      *mul = double_int_sext (double_int_add (p0, p1), precision);
1492
      return true;
1493
 
1494
    case INTEGER_CST:
1495
      if (TREE_CODE (bot) != INTEGER_CST)
1496
        return false;
1497
 
1498
      p0 = double_int_sext (tree_to_double_int (top), precision);
1499
      p1 = double_int_sext (tree_to_double_int (bot), precision);
1500
      if (double_int_zero_p (p1))
1501
        return false;
1502
      *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1503
                              precision);
1504
      return double_int_zero_p (res);
1505
 
1506
    default:
1507
      return false;
1508
    }
1509
}
1510
 
1511
/* Returns true if memory reference REF with step STEP may be unaligned.  */
1512
 
1513
static bool
1514
may_be_unaligned_p (tree ref, tree step)
1515
{
1516
  tree base;
1517
  tree base_type;
1518
  HOST_WIDE_INT bitsize;
1519
  HOST_WIDE_INT bitpos;
1520
  tree toffset;
1521
  enum machine_mode mode;
1522
  int unsignedp, volatilep;
1523
  unsigned base_align;
1524
 
1525
  /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1526
     thus they are not misaligned.  */
1527
  if (TREE_CODE (ref) == TARGET_MEM_REF)
1528
    return false;
1529
 
1530
  /* The test below is basically copy of what expr.c:normal_inner_ref
1531
     does to check whether the object must be loaded by parts when
1532
     STRICT_ALIGNMENT is true.  */
1533
  base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1534
                              &unsignedp, &volatilep, true);
1535
  base_type = TREE_TYPE (base);
1536
  base_align = TYPE_ALIGN (base_type);
1537
 
1538
  if (mode != BLKmode)
1539
    {
1540
      unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1541
 
1542
      if (base_align < mode_align
1543
          || (bitpos % mode_align) != 0
1544
          || (bitpos % BITS_PER_UNIT) != 0)
1545
        return true;
1546
 
1547
      if (toffset
1548
          && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1549
        return true;
1550
 
1551
      if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1552
        return true;
1553
    }
1554
 
1555
  return false;
1556
}
1557
 
1558
/* Return true if EXPR may be non-addressable.   */
1559
 
1560 378 julius
bool
1561 280 jeremybenn
may_be_nonaddressable_p (tree expr)
1562
{
1563
  switch (TREE_CODE (expr))
1564
    {
1565
    case TARGET_MEM_REF:
1566
      /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1567
         target, thus they are always addressable.  */
1568
      return false;
1569
 
1570
    case COMPONENT_REF:
1571
      return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1572
             || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1573
 
1574
    case VIEW_CONVERT_EXPR:
1575
      /* This kind of view-conversions may wrap non-addressable objects
1576
         and make them look addressable.  After some processing the
1577
         non-addressability may be uncovered again, causing ADDR_EXPRs
1578
         of inappropriate objects to be built.  */
1579
      if (is_gimple_reg (TREE_OPERAND (expr, 0))
1580
          || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1581
        return true;
1582
 
1583
      /* ... fall through ... */
1584
 
1585
    case ARRAY_REF:
1586
    case ARRAY_RANGE_REF:
1587
      return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1588
 
1589
    CASE_CONVERT:
1590
      return true;
1591
 
1592
    default:
1593
      break;
1594
    }
1595
 
1596
  return false;
1597
}
1598
 
1599
/* Finds addresses in *OP_P inside STMT.  */
1600
 
1601
static void
1602
find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1603
{
1604
  tree base = *op_p, step = build_int_cst (sizetype, 0);
1605
  struct iv *civ;
1606
  struct ifs_ivopts_data ifs_ivopts_data;
1607
 
1608
  /* Do not play with volatile memory references.  A bit too conservative,
1609
     perhaps, but safe.  */
1610
  if (gimple_has_volatile_ops (stmt))
1611
    goto fail;
1612
 
1613
  /* Ignore bitfields for now.  Not really something terribly complicated
1614
     to handle.  TODO.  */
1615
  if (TREE_CODE (base) == BIT_FIELD_REF)
1616
    goto fail;
1617
 
1618
  base = unshare_expr (base);
1619
 
1620
  if (TREE_CODE (base) == TARGET_MEM_REF)
1621
    {
1622
      tree type = build_pointer_type (TREE_TYPE (base));
1623
      tree astep;
1624
 
1625
      if (TMR_BASE (base)
1626
          && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1627
        {
1628
          civ = get_iv (data, TMR_BASE (base));
1629
          if (!civ)
1630
            goto fail;
1631
 
1632
          TMR_BASE (base) = civ->base;
1633
          step = civ->step;
1634
        }
1635
      if (TMR_INDEX (base)
1636
          && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1637
        {
1638
          civ = get_iv (data, TMR_INDEX (base));
1639
          if (!civ)
1640
            goto fail;
1641
 
1642
          TMR_INDEX (base) = civ->base;
1643
          astep = civ->step;
1644
 
1645
          if (astep)
1646
            {
1647
              if (TMR_STEP (base))
1648
                astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1649
 
1650
              step = fold_build2 (PLUS_EXPR, type, step, astep);
1651
            }
1652
        }
1653
 
1654
      if (integer_zerop (step))
1655
        goto fail;
1656
      base = tree_mem_ref_addr (type, base);
1657
    }
1658
  else
1659
    {
1660
      ifs_ivopts_data.ivopts_data = data;
1661
      ifs_ivopts_data.stmt = stmt;
1662
      ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1663
      if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1664
          || integer_zerop (ifs_ivopts_data.step))
1665
        goto fail;
1666
      step = ifs_ivopts_data.step;
1667
 
1668
      gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1669
      gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1670
 
1671
      /* Check that the base expression is addressable.  This needs
1672
         to be done after substituting bases of IVs into it.  */
1673
      if (may_be_nonaddressable_p (base))
1674
        goto fail;
1675
 
1676
      /* Moreover, on strict alignment platforms, check that it is
1677
         sufficiently aligned.  */
1678
      if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1679
        goto fail;
1680
 
1681
      base = build_fold_addr_expr (base);
1682
 
1683
      /* Substituting bases of IVs into the base expression might
1684
         have caused folding opportunities.  */
1685
      if (TREE_CODE (base) == ADDR_EXPR)
1686
        {
1687
          tree *ref = &TREE_OPERAND (base, 0);
1688
          while (handled_component_p (*ref))
1689
            ref = &TREE_OPERAND (*ref, 0);
1690
          if (TREE_CODE (*ref) == INDIRECT_REF)
1691
            {
1692
              tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0));
1693
              if (tem)
1694
                *ref = tem;
1695
            }
1696
        }
1697
    }
1698
 
1699
  civ = alloc_iv (base, step);
1700
  record_use (data, op_p, civ, stmt, USE_ADDRESS);
1701
  return;
1702
 
1703
fail:
1704
  for_each_index (op_p, idx_record_use, data);
1705
}
1706
 
1707
/* Finds and records invariants used in STMT.  */
1708
 
1709
static void
1710
find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1711
{
1712
  ssa_op_iter iter;
1713
  use_operand_p use_p;
1714
  tree op;
1715
 
1716
  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1717
    {
1718
      op = USE_FROM_PTR (use_p);
1719
      record_invariant (data, op, false);
1720
    }
1721
}
1722
 
1723
/* Finds interesting uses of induction variables in the statement STMT.  */
1724
 
1725
static void
1726
find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1727
{
1728
  struct iv *iv;
1729
  tree op, *lhs, *rhs;
1730
  ssa_op_iter iter;
1731
  use_operand_p use_p;
1732
  enum tree_code code;
1733
 
1734
  find_invariants_stmt (data, stmt);
1735
 
1736
  if (gimple_code (stmt) == GIMPLE_COND)
1737
    {
1738
      find_interesting_uses_cond (data, stmt);
1739
      return;
1740
    }
1741
 
1742
  if (is_gimple_assign (stmt))
1743
    {
1744
      lhs = gimple_assign_lhs_ptr (stmt);
1745
      rhs = gimple_assign_rhs1_ptr (stmt);
1746
 
1747
      if (TREE_CODE (*lhs) == SSA_NAME)
1748
        {
1749
          /* If the statement defines an induction variable, the uses are not
1750
             interesting by themselves.  */
1751
 
1752
          iv = get_iv (data, *lhs);
1753
 
1754
          if (iv && !integer_zerop (iv->step))
1755
            return;
1756
        }
1757
 
1758
      code = gimple_assign_rhs_code (stmt);
1759
      if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1760
          && (REFERENCE_CLASS_P (*rhs)
1761
              || is_gimple_val (*rhs)))
1762
        {
1763
          if (REFERENCE_CLASS_P (*rhs))
1764
            find_interesting_uses_address (data, stmt, rhs);
1765
          else
1766
            find_interesting_uses_op (data, *rhs);
1767
 
1768
          if (REFERENCE_CLASS_P (*lhs))
1769
            find_interesting_uses_address (data, stmt, lhs);
1770
          return;
1771
        }
1772
      else if (TREE_CODE_CLASS (code) == tcc_comparison)
1773
        {
1774
          find_interesting_uses_cond (data, stmt);
1775
          return;
1776
        }
1777
 
1778
      /* TODO -- we should also handle address uses of type
1779
 
1780
         memory = call (whatever);
1781
 
1782
         and
1783
 
1784
         call (memory).  */
1785
    }
1786
 
1787
  if (gimple_code (stmt) == GIMPLE_PHI
1788
      && gimple_bb (stmt) == data->current_loop->header)
1789
    {
1790
      iv = get_iv (data, PHI_RESULT (stmt));
1791
 
1792
      if (iv && !integer_zerop (iv->step))
1793
        return;
1794
    }
1795
 
1796
  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1797
    {
1798
      op = USE_FROM_PTR (use_p);
1799
 
1800
      if (TREE_CODE (op) != SSA_NAME)
1801
        continue;
1802
 
1803
      iv = get_iv (data, op);
1804
      if (!iv)
1805
        continue;
1806
 
1807
      find_interesting_uses_op (data, op);
1808
    }
1809
}
1810
 
1811
/* Finds interesting uses of induction variables outside of loops
1812
   on loop exit edge EXIT.  */
1813
 
1814
static void
1815
find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1816
{
1817
  gimple phi;
1818
  gimple_stmt_iterator psi;
1819
  tree def;
1820
 
1821
  for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1822
    {
1823
      phi = gsi_stmt (psi);
1824
      def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1825
      if (is_gimple_reg (def))
1826
        find_interesting_uses_op (data, def);
1827
    }
1828
}
1829
 
1830
/* Finds uses of the induction variables that are interesting.  */
1831
 
1832
static void
1833
find_interesting_uses (struct ivopts_data *data)
1834
{
1835
  basic_block bb;
1836
  gimple_stmt_iterator bsi;
1837
  basic_block *body = get_loop_body (data->current_loop);
1838
  unsigned i;
1839
  struct version_info *info;
1840
  edge e;
1841
 
1842
  if (dump_file && (dump_flags & TDF_DETAILS))
1843
    fprintf (dump_file, "Uses:\n\n");
1844
 
1845
  for (i = 0; i < data->current_loop->num_nodes; i++)
1846
    {
1847
      edge_iterator ei;
1848
      bb = body[i];
1849
 
1850
      FOR_EACH_EDGE (e, ei, bb->succs)
1851
        if (e->dest != EXIT_BLOCK_PTR
1852
            && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1853
          find_interesting_uses_outside (data, e);
1854
 
1855
      for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1856
        find_interesting_uses_stmt (data, gsi_stmt (bsi));
1857
      for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1858
        if (!is_gimple_debug (gsi_stmt (bsi)))
1859
          find_interesting_uses_stmt (data, gsi_stmt (bsi));
1860
    }
1861
 
1862
  if (dump_file && (dump_flags & TDF_DETAILS))
1863
    {
1864
      bitmap_iterator bi;
1865
 
1866
      fprintf (dump_file, "\n");
1867
 
1868
      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1869
        {
1870
          info = ver_info (data, i);
1871
          if (info->inv_id)
1872
            {
1873
              fprintf (dump_file, "  ");
1874
              print_generic_expr (dump_file, info->name, TDF_SLIM);
1875
              fprintf (dump_file, " is invariant (%d)%s\n",
1876
                       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1877
            }
1878
        }
1879
 
1880
      fprintf (dump_file, "\n");
1881
    }
1882
 
1883
  free (body);
1884
}
1885
 
1886
/* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1887
   is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1888
   we are at the top-level of the processed address.  */
1889
 
1890
static tree
1891
strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1892
                unsigned HOST_WIDE_INT *offset)
1893
{
1894
  tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1895
  enum tree_code code;
1896
  tree type, orig_type = TREE_TYPE (expr);
1897
  unsigned HOST_WIDE_INT off0, off1, st;
1898
  tree orig_expr = expr;
1899
 
1900
  STRIP_NOPS (expr);
1901
 
1902
  type = TREE_TYPE (expr);
1903
  code = TREE_CODE (expr);
1904
  *offset = 0;
1905
 
1906
  switch (code)
1907
    {
1908
    case INTEGER_CST:
1909
      if (!cst_and_fits_in_hwi (expr)
1910
          || integer_zerop (expr))
1911
        return orig_expr;
1912
 
1913
      *offset = int_cst_value (expr);
1914
      return build_int_cst (orig_type, 0);
1915
 
1916
    case POINTER_PLUS_EXPR:
1917
    case PLUS_EXPR:
1918
    case MINUS_EXPR:
1919
      op0 = TREE_OPERAND (expr, 0);
1920
      op1 = TREE_OPERAND (expr, 1);
1921
 
1922
      op0 = strip_offset_1 (op0, false, false, &off0);
1923
      op1 = strip_offset_1 (op1, false, false, &off1);
1924
 
1925
      *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1926
      if (op0 == TREE_OPERAND (expr, 0)
1927
          && op1 == TREE_OPERAND (expr, 1))
1928
        return orig_expr;
1929
 
1930
      if (integer_zerop (op1))
1931
        expr = op0;
1932
      else if (integer_zerop (op0))
1933
        {
1934
          if (code == MINUS_EXPR)
1935
            expr = fold_build1 (NEGATE_EXPR, type, op1);
1936
          else
1937
            expr = op1;
1938
        }
1939
      else
1940
        expr = fold_build2 (code, type, op0, op1);
1941
 
1942
      return fold_convert (orig_type, expr);
1943
 
1944
    case MULT_EXPR:
1945
      op1 = TREE_OPERAND (expr, 1);
1946
      if (!cst_and_fits_in_hwi (op1))
1947
        return orig_expr;
1948
 
1949
      op0 = TREE_OPERAND (expr, 0);
1950
      op0 = strip_offset_1 (op0, false, false, &off0);
1951
      if (op0 == TREE_OPERAND (expr, 0))
1952
        return orig_expr;
1953
 
1954
      *offset = off0 * int_cst_value (op1);
1955
      if (integer_zerop (op0))
1956
        expr = op0;
1957
      else
1958
        expr = fold_build2 (MULT_EXPR, type, op0, op1);
1959
 
1960
      return fold_convert (orig_type, expr);
1961
 
1962
    case ARRAY_REF:
1963
    case ARRAY_RANGE_REF:
1964
      if (!inside_addr)
1965
        return orig_expr;
1966
 
1967
      step = array_ref_element_size (expr);
1968
      if (!cst_and_fits_in_hwi (step))
1969
        break;
1970
 
1971
      st = int_cst_value (step);
1972
      op1 = TREE_OPERAND (expr, 1);
1973
      op1 = strip_offset_1 (op1, false, false, &off1);
1974
      *offset = off1 * st;
1975
 
1976
      if (top_compref
1977
          && integer_zerop (op1))
1978
        {
1979
          /* Strip the component reference completely.  */
1980
          op0 = TREE_OPERAND (expr, 0);
1981
          op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1982
          *offset += off0;
1983
          return op0;
1984
        }
1985
      break;
1986
 
1987
    case COMPONENT_REF:
1988
      if (!inside_addr)
1989
        return orig_expr;
1990
 
1991
      tmp = component_ref_field_offset (expr);
1992
      if (top_compref
1993
          && cst_and_fits_in_hwi (tmp))
1994
        {
1995
          /* Strip the component reference completely.  */
1996
          op0 = TREE_OPERAND (expr, 0);
1997
          op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1998
          *offset = off0 + int_cst_value (tmp);
1999
          return op0;
2000
        }
2001
      break;
2002
 
2003
    case ADDR_EXPR:
2004
      op0 = TREE_OPERAND (expr, 0);
2005
      op0 = strip_offset_1 (op0, true, true, &off0);
2006
      *offset += off0;
2007
 
2008
      if (op0 == TREE_OPERAND (expr, 0))
2009
        return orig_expr;
2010
 
2011
      expr = build_fold_addr_expr (op0);
2012
      return fold_convert (orig_type, expr);
2013
 
2014
    case INDIRECT_REF:
2015
      inside_addr = false;
2016
      break;
2017
 
2018
    default:
2019
      return orig_expr;
2020
    }
2021
 
2022
  /* Default handling of expressions for that we want to recurse into
2023
     the first operand.  */
2024
  op0 = TREE_OPERAND (expr, 0);
2025
  op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2026
  *offset += off0;
2027
 
2028
  if (op0 == TREE_OPERAND (expr, 0)
2029
      && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2030
    return orig_expr;
2031
 
2032
  expr = copy_node (expr);
2033
  TREE_OPERAND (expr, 0) = op0;
2034
  if (op1)
2035
    TREE_OPERAND (expr, 1) = op1;
2036
 
2037
  /* Inside address, we might strip the top level component references,
2038
     thus changing type of the expression.  Handling of ADDR_EXPR
2039
     will fix that.  */
2040
  expr = fold_convert (orig_type, expr);
2041
 
2042
  return expr;
2043
}
2044
 
2045
/* Strips constant offsets from EXPR and stores them to OFFSET.  */
2046
 
2047
static tree
2048
strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2049
{
2050
  return strip_offset_1 (expr, false, false, offset);
2051
}
2052
 
2053
/* Returns variant of TYPE that can be used as base for different uses.
2054
   We return unsigned type with the same precision, which avoids problems
2055
   with overflows.  */
2056
 
2057
static tree
2058
generic_type_for (tree type)
2059
{
2060
  if (POINTER_TYPE_P (type))
2061
    return unsigned_type_for (type);
2062
 
2063
  if (TYPE_UNSIGNED (type))
2064
    return type;
2065
 
2066
  return unsigned_type_for (type);
2067
}
2068
 
2069
/* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2070
   the bitmap to that we should store it.  */
2071
 
2072
static struct ivopts_data *fd_ivopts_data;
2073
static tree
2074
find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2075
{
2076
  bitmap *depends_on = (bitmap *) data;
2077
  struct version_info *info;
2078
 
2079
  if (TREE_CODE (*expr_p) != SSA_NAME)
2080
    return NULL_TREE;
2081
  info = name_info (fd_ivopts_data, *expr_p);
2082
 
2083
  if (!info->inv_id || info->has_nonlin_use)
2084
    return NULL_TREE;
2085
 
2086
  if (!*depends_on)
2087
    *depends_on = BITMAP_ALLOC (NULL);
2088
  bitmap_set_bit (*depends_on, info->inv_id);
2089
 
2090
  return NULL_TREE;
2091
}
2092
 
2093
/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2094
   position to POS.  If USE is not NULL, the candidate is set as related to
2095
   it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2096
   replacement of the final value of the iv by a direct computation.  */
2097
 
2098
static struct iv_cand *
2099
add_candidate_1 (struct ivopts_data *data,
2100
                 tree base, tree step, bool important, enum iv_position pos,
2101
                 struct iv_use *use, gimple incremented_at)
2102
{
2103
  unsigned i;
2104
  struct iv_cand *cand = NULL;
2105
  tree type, orig_type;
2106
 
2107
  if (base)
2108
    {
2109
      orig_type = TREE_TYPE (base);
2110
      type = generic_type_for (orig_type);
2111
      if (type != orig_type)
2112
        {
2113
          base = fold_convert (type, base);
2114
          step = fold_convert (type, step);
2115
        }
2116
    }
2117
 
2118
  for (i = 0; i < n_iv_cands (data); i++)
2119
    {
2120
      cand = iv_cand (data, i);
2121
 
2122
      if (cand->pos != pos)
2123
        continue;
2124
 
2125
      if (cand->incremented_at != incremented_at
2126
          || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2127
              && cand->ainc_use != use))
2128
        continue;
2129
 
2130
      if (!cand->iv)
2131
        {
2132
          if (!base && !step)
2133
            break;
2134
 
2135
          continue;
2136
        }
2137
 
2138
      if (!base && !step)
2139
        continue;
2140
 
2141
      if (operand_equal_p (base, cand->iv->base, 0)
2142
          && operand_equal_p (step, cand->iv->step, 0))
2143
        break;
2144
    }
2145
 
2146
  if (i == n_iv_cands (data))
2147
    {
2148
      cand = XCNEW (struct iv_cand);
2149
      cand->id = i;
2150
 
2151
      if (!base && !step)
2152
        cand->iv = NULL;
2153
      else
2154
        cand->iv = alloc_iv (base, step);
2155
 
2156
      cand->pos = pos;
2157
      if (pos != IP_ORIGINAL && cand->iv)
2158
        {
2159
          cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2160
          cand->var_after = cand->var_before;
2161
        }
2162
      cand->important = important;
2163
      cand->incremented_at = incremented_at;
2164
      VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2165
 
2166
      if (step
2167
          && TREE_CODE (step) != INTEGER_CST)
2168
        {
2169
          fd_ivopts_data = data;
2170
          walk_tree (&step, find_depends, &cand->depends_on, NULL);
2171
        }
2172
 
2173
      if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2174
        cand->ainc_use = use;
2175
      else
2176
        cand->ainc_use = NULL;
2177
 
2178
      if (dump_file && (dump_flags & TDF_DETAILS))
2179
        dump_cand (dump_file, cand);
2180
    }
2181
 
2182
  if (important && !cand->important)
2183
    {
2184
      cand->important = true;
2185
      if (dump_file && (dump_flags & TDF_DETAILS))
2186
        fprintf (dump_file, "Candidate %d is important\n", cand->id);
2187
    }
2188
 
2189
  if (use)
2190
    {
2191
      bitmap_set_bit (use->related_cands, i);
2192
      if (dump_file && (dump_flags & TDF_DETAILS))
2193
        fprintf (dump_file, "Candidate %d is related to use %d\n",
2194
                 cand->id, use->id);
2195
    }
2196
 
2197
  return cand;
2198
}
2199
 
2200
/* Returns true if incrementing the induction variable at the end of the LOOP
2201
   is allowed.
2202
 
2203
   The purpose is to avoid splitting latch edge with a biv increment, thus
2204
   creating a jump, possibly confusing other optimization passes and leaving
2205
   less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2206
   is not available (so we do not have a better alternative), or if the latch
2207
   edge is already nonempty.  */
2208
 
2209
static bool
2210
allow_ip_end_pos_p (struct loop *loop)
2211
{
2212
  if (!ip_normal_pos (loop))
2213
    return true;
2214
 
2215
  if (!empty_block_p (ip_end_pos (loop)))
2216
    return true;
2217
 
2218
  return false;
2219
}
2220
 
2221
/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2222
   Important field is set to IMPORTANT.  */
2223
 
2224
static void
2225
add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2226
                        bool important, struct iv_use *use)
2227
{
2228
  basic_block use_bb = gimple_bb (use->stmt);
2229
  enum machine_mode mem_mode;
2230
  unsigned HOST_WIDE_INT cstepi;
2231
 
2232
  /* If we insert the increment in any position other than the standard
2233
     ones, we must ensure that it is incremented once per iteration.
2234
     It must not be in an inner nested loop, or one side of an if
2235
     statement.  */
2236
  if (use_bb->loop_father != data->current_loop
2237
      || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2238
      || stmt_could_throw_p (use->stmt)
2239
      || !cst_and_fits_in_hwi (step))
2240
    return;
2241
 
2242
  cstepi = int_cst_value (step);
2243
 
2244
  mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2245
  if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2246
      || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2247
    {
2248
      enum tree_code code = MINUS_EXPR;
2249
      tree new_base;
2250
      tree new_step = step;
2251
 
2252
      if (POINTER_TYPE_P (TREE_TYPE (base)))
2253
        {
2254
          new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2255
          code = POINTER_PLUS_EXPR;
2256
        }
2257
      else
2258
        new_step = fold_convert (TREE_TYPE (base), new_step);
2259
      new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2260
      add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2261
                       use->stmt);
2262
    }
2263
  if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2264
      || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2265
    {
2266
      add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2267
                       use->stmt);
2268
    }
2269
}
2270
 
2271
/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2272
   position to POS.  If USE is not NULL, the candidate is set as related to
2273
   it.  The candidate computation is scheduled on all available positions.  */
2274
 
2275
static void
2276
add_candidate (struct ivopts_data *data,
2277
               tree base, tree step, bool important, struct iv_use *use)
2278
{
2279
  if (ip_normal_pos (data->current_loop))
2280
    add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2281
  if (ip_end_pos (data->current_loop)
2282
      && allow_ip_end_pos_p (data->current_loop))
2283
    add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2284
 
2285
  if (use != NULL && use->type == USE_ADDRESS)
2286
    add_autoinc_candidates (data, base, step, important, use);
2287
}
2288
 
2289
/* Add a standard "0 + 1 * iteration" iv candidate for a
2290
   type with SIZE bits.  */
2291
 
2292
static void
2293
add_standard_iv_candidates_for_size (struct ivopts_data *data,
2294
                                     unsigned int size)
2295
{
2296
  tree type = lang_hooks.types.type_for_size (size, true);
2297
  add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2298
                 true, NULL);
2299
}
2300
 
2301
/* Adds standard iv candidates.  */
2302
 
2303
static void
2304
add_standard_iv_candidates (struct ivopts_data *data)
2305
{
2306
  add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2307
 
2308
  /* The same for a double-integer type if it is still fast enough.  */
2309
  if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2310
    add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2311
}
2312
 
2313
 
2314
/* Adds candidates bases on the old induction variable IV.  */
2315
 
2316
static void
2317
add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2318
{
2319
  gimple phi;
2320
  tree def;
2321
  struct iv_cand *cand;
2322
 
2323
  add_candidate (data, iv->base, iv->step, true, NULL);
2324
 
2325
  /* The same, but with initial value zero.  */
2326
  if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2327
    add_candidate (data, size_int (0), iv->step, true, NULL);
2328
  else
2329
    add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2330
                   iv->step, true, NULL);
2331
 
2332
  phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2333
  if (gimple_code (phi) == GIMPLE_PHI)
2334
    {
2335
      /* Additionally record the possibility of leaving the original iv
2336
         untouched.  */
2337
      def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2338
      cand = add_candidate_1 (data,
2339
                              iv->base, iv->step, true, IP_ORIGINAL, NULL,
2340
                              SSA_NAME_DEF_STMT (def));
2341
      cand->var_before = iv->ssa_name;
2342
      cand->var_after = def;
2343
    }
2344
}
2345
 
2346
/* Adds candidates based on the old induction variables.  */
2347
 
2348
static void
2349
add_old_ivs_candidates (struct ivopts_data *data)
2350
{
2351
  unsigned i;
2352
  struct iv *iv;
2353
  bitmap_iterator bi;
2354
 
2355
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2356
    {
2357
      iv = ver_info (data, i)->iv;
2358
      if (iv && iv->biv_p && !integer_zerop (iv->step))
2359
        add_old_iv_candidates (data, iv);
2360
    }
2361
}
2362
 
2363
/* Adds candidates based on the value of the induction variable IV and USE.  */
2364
 
2365
static void
2366
add_iv_value_candidates (struct ivopts_data *data,
2367
                         struct iv *iv, struct iv_use *use)
2368
{
2369
  unsigned HOST_WIDE_INT offset;
2370
  tree base;
2371
  tree basetype;
2372
 
2373
  add_candidate (data, iv->base, iv->step, false, use);
2374
 
2375
  /* The same, but with initial value zero.  Make such variable important,
2376
     since it is generic enough so that possibly many uses may be based
2377
     on it.  */
2378
  basetype = TREE_TYPE (iv->base);
2379
  if (POINTER_TYPE_P (basetype))
2380
    basetype = sizetype;
2381
  add_candidate (data, build_int_cst (basetype, 0),
2382
                 iv->step, true, use);
2383
 
2384
  /* Third, try removing the constant offset.  Make sure to even
2385
     add a candidate for &a[0] vs. (T *)&a.  */
2386
  base = strip_offset (iv->base, &offset);
2387
  if (offset
2388
      || base != iv->base)
2389
    add_candidate (data, base, iv->step, false, use);
2390
}
2391
 
2392
/* Adds candidates based on the uses.  */
2393
 
2394
static void
2395
add_derived_ivs_candidates (struct ivopts_data *data)
2396
{
2397
  unsigned i;
2398
 
2399
  for (i = 0; i < n_iv_uses (data); i++)
2400
    {
2401
      struct iv_use *use = iv_use (data, i);
2402
 
2403
      if (!use)
2404
        continue;
2405
 
2406
      switch (use->type)
2407
        {
2408
        case USE_NONLINEAR_EXPR:
2409
        case USE_COMPARE:
2410
        case USE_ADDRESS:
2411
          /* Just add the ivs based on the value of the iv used here.  */
2412
          add_iv_value_candidates (data, use->iv, use);
2413
          break;
2414
 
2415
        default:
2416
          gcc_unreachable ();
2417
        }
2418
    }
2419
}
2420
 
2421
/* Record important candidates and add them to related_cands bitmaps
2422
   if needed.  */
2423
 
2424
static void
2425
record_important_candidates (struct ivopts_data *data)
2426
{
2427
  unsigned i;
2428
  struct iv_use *use;
2429
 
2430
  for (i = 0; i < n_iv_cands (data); i++)
2431
    {
2432
      struct iv_cand *cand = iv_cand (data, i);
2433
 
2434
      if (cand->important)
2435
        bitmap_set_bit (data->important_candidates, i);
2436
    }
2437
 
2438
  data->consider_all_candidates = (n_iv_cands (data)
2439
                                   <= CONSIDER_ALL_CANDIDATES_BOUND);
2440
 
2441
  if (data->consider_all_candidates)
2442
    {
2443
      /* We will not need "related_cands" bitmaps in this case,
2444
         so release them to decrease peak memory consumption.  */
2445
      for (i = 0; i < n_iv_uses (data); i++)
2446
        {
2447
          use = iv_use (data, i);
2448
          BITMAP_FREE (use->related_cands);
2449
        }
2450
    }
2451
  else
2452
    {
2453
      /* Add important candidates to the related_cands bitmaps.  */
2454
      for (i = 0; i < n_iv_uses (data); i++)
2455
        bitmap_ior_into (iv_use (data, i)->related_cands,
2456
                         data->important_candidates);
2457
    }
2458
}
2459
 
2460
/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2461
   If consider_all_candidates is true, we use a two-dimensional array, otherwise
2462
   we allocate a simple list to every use.  */
2463
 
2464
static void
2465
alloc_use_cost_map (struct ivopts_data *data)
2466
{
2467
  unsigned i, size, s, j;
2468
 
2469
  for (i = 0; i < n_iv_uses (data); i++)
2470
    {
2471
      struct iv_use *use = iv_use (data, i);
2472
      bitmap_iterator bi;
2473
 
2474
      if (data->consider_all_candidates)
2475
        size = n_iv_cands (data);
2476
      else
2477
        {
2478
          s = 0;
2479
          EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2480
            {
2481
              s++;
2482
            }
2483
 
2484
          /* Round up to the power of two, so that moduling by it is fast.  */
2485
          for (size = 1; size < s; size <<= 1)
2486
            continue;
2487
        }
2488
 
2489
      use->n_map_members = size;
2490
      use->cost_map = XCNEWVEC (struct cost_pair, size);
2491
    }
2492
}
2493
 
2494
/* Returns description of computation cost of expression whose runtime
2495
   cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2496
 
2497
static comp_cost
2498
new_cost (unsigned runtime, unsigned complexity)
2499
{
2500
  comp_cost cost;
2501
 
2502
  cost.cost = runtime;
2503
  cost.complexity = complexity;
2504
 
2505
  return cost;
2506
}
2507
 
2508
/* Adds costs COST1 and COST2.  */
2509
 
2510
static comp_cost
2511
add_costs (comp_cost cost1, comp_cost cost2)
2512
{
2513
  cost1.cost += cost2.cost;
2514
  cost1.complexity += cost2.complexity;
2515
 
2516
  return cost1;
2517
}
2518
/* Subtracts costs COST1 and COST2.  */
2519
 
2520
static comp_cost
2521
sub_costs (comp_cost cost1, comp_cost cost2)
2522
{
2523
  cost1.cost -= cost2.cost;
2524
  cost1.complexity -= cost2.complexity;
2525
 
2526
  return cost1;
2527
}
2528
 
2529
/* Returns a negative number if COST1 < COST2, a positive number if
2530
   COST1 > COST2, and 0 if COST1 = COST2.  */
2531
 
2532
static int
2533
compare_costs (comp_cost cost1, comp_cost cost2)
2534
{
2535
  if (cost1.cost == cost2.cost)
2536
    return cost1.complexity - cost2.complexity;
2537
 
2538
  return cost1.cost - cost2.cost;
2539
}
2540
 
2541
/* Returns true if COST is infinite.  */
2542
 
2543
static bool
2544
infinite_cost_p (comp_cost cost)
2545
{
2546
  return cost.cost == INFTY;
2547
}
2548
 
2549
/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2550
   on invariants DEPENDS_ON and that the value used in expressing it
2551
   is VALUE.  */
2552
 
2553
static void
2554
set_use_iv_cost (struct ivopts_data *data,
2555
                 struct iv_use *use, struct iv_cand *cand,
2556
                 comp_cost cost, bitmap depends_on, tree value)
2557
{
2558
  unsigned i, s;
2559
 
2560
  if (infinite_cost_p (cost))
2561
    {
2562
      BITMAP_FREE (depends_on);
2563
      return;
2564
    }
2565
 
2566
  if (data->consider_all_candidates)
2567
    {
2568
      use->cost_map[cand->id].cand = cand;
2569
      use->cost_map[cand->id].cost = cost;
2570
      use->cost_map[cand->id].depends_on = depends_on;
2571
      use->cost_map[cand->id].value = value;
2572
      return;
2573
    }
2574
 
2575
  /* n_map_members is a power of two, so this computes modulo.  */
2576
  s = cand->id & (use->n_map_members - 1);
2577
  for (i = s; i < use->n_map_members; i++)
2578
    if (!use->cost_map[i].cand)
2579
      goto found;
2580
  for (i = 0; i < s; i++)
2581
    if (!use->cost_map[i].cand)
2582
      goto found;
2583
 
2584
  gcc_unreachable ();
2585
 
2586
found:
2587
  use->cost_map[i].cand = cand;
2588
  use->cost_map[i].cost = cost;
2589
  use->cost_map[i].depends_on = depends_on;
2590
  use->cost_map[i].value = value;
2591
}
2592
 
2593
/* Gets cost of (USE, CANDIDATE) pair.  */
2594
 
2595
static struct cost_pair *
2596
get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2597
                 struct iv_cand *cand)
2598
{
2599
  unsigned i, s;
2600
  struct cost_pair *ret;
2601
 
2602
  if (!cand)
2603
    return NULL;
2604
 
2605
  if (data->consider_all_candidates)
2606
    {
2607
      ret = use->cost_map + cand->id;
2608
      if (!ret->cand)
2609
        return NULL;
2610
 
2611
      return ret;
2612
    }
2613
 
2614
  /* n_map_members is a power of two, so this computes modulo.  */
2615
  s = cand->id & (use->n_map_members - 1);
2616
  for (i = s; i < use->n_map_members; i++)
2617
    if (use->cost_map[i].cand == cand)
2618
      return use->cost_map + i;
2619
 
2620
  for (i = 0; i < s; i++)
2621
    if (use->cost_map[i].cand == cand)
2622
      return use->cost_map + i;
2623
 
2624
  return NULL;
2625
}
2626
 
2627
/* Returns estimate on cost of computing SEQ.  */
2628
 
2629
static unsigned
2630
seq_cost (rtx seq, bool speed)
2631
{
2632
  unsigned cost = 0;
2633
  rtx set;
2634
 
2635
  for (; seq; seq = NEXT_INSN (seq))
2636
    {
2637
      set = single_set (seq);
2638
      if (set)
2639
        cost += rtx_cost (set, SET,speed);
2640
      else
2641
        cost++;
2642
    }
2643
 
2644
  return cost;
2645
}
2646
 
2647
/* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2648
static rtx
2649
produce_memory_decl_rtl (tree obj, int *regno)
2650
{
2651
  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2652
  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2653
  rtx x;
2654
 
2655
  gcc_assert (obj);
2656
  if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2657
    {
2658
      const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2659
      x = gen_rtx_SYMBOL_REF (address_mode, name);
2660
      SET_SYMBOL_REF_DECL (x, obj);
2661
      x = gen_rtx_MEM (DECL_MODE (obj), x);
2662
      set_mem_addr_space (x, as);
2663
      targetm.encode_section_info (obj, x, true);
2664
    }
2665
  else
2666
    {
2667
      x = gen_raw_REG (address_mode, (*regno)++);
2668
      x = gen_rtx_MEM (DECL_MODE (obj), x);
2669
      set_mem_addr_space (x, as);
2670
    }
2671
 
2672
  return x;
2673
}
2674
 
2675
/* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2676
   walk_tree.  DATA contains the actual fake register number.  */
2677
 
2678
static tree
2679
prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2680
{
2681
  tree obj = NULL_TREE;
2682
  rtx x = NULL_RTX;
2683
  int *regno = (int *) data;
2684
 
2685
  switch (TREE_CODE (*expr_p))
2686
    {
2687
    case ADDR_EXPR:
2688
      for (expr_p = &TREE_OPERAND (*expr_p, 0);
2689
           handled_component_p (*expr_p);
2690
           expr_p = &TREE_OPERAND (*expr_p, 0))
2691
        continue;
2692
      obj = *expr_p;
2693
      if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2694
        x = produce_memory_decl_rtl (obj, regno);
2695
      break;
2696
 
2697
    case SSA_NAME:
2698
      *ws = 0;
2699
      obj = SSA_NAME_VAR (*expr_p);
2700
      if (!DECL_RTL_SET_P (obj))
2701
        x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2702
      break;
2703
 
2704
    case VAR_DECL:
2705
    case PARM_DECL:
2706
    case RESULT_DECL:
2707
      *ws = 0;
2708
      obj = *expr_p;
2709
 
2710
      if (DECL_RTL_SET_P (obj))
2711
        break;
2712
 
2713
      if (DECL_MODE (obj) == BLKmode)
2714
        x = produce_memory_decl_rtl (obj, regno);
2715
      else
2716
        x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2717
 
2718
      break;
2719
 
2720
    default:
2721
      break;
2722
    }
2723
 
2724
  if (x)
2725
    {
2726
      VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2727
      SET_DECL_RTL (obj, x);
2728
    }
2729
 
2730
  return NULL_TREE;
2731
}
2732
 
2733
/* Determines cost of the computation of EXPR.  */
2734
 
2735
static unsigned
2736
computation_cost (tree expr, bool speed)
2737
{
2738
  rtx seq, rslt;
2739
  tree type = TREE_TYPE (expr);
2740
  unsigned cost;
2741
  /* Avoid using hard regs in ways which may be unsupported.  */
2742
  int regno = LAST_VIRTUAL_REGISTER + 1;
2743
  enum function_frequency real_frequency = cfun->function_frequency;
2744
 
2745
  cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2746
  crtl->maybe_hot_insn_p = speed;
2747
  walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2748
  start_sequence ();
2749
  rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2750
  seq = get_insns ();
2751
  end_sequence ();
2752
  default_rtl_profile ();
2753
  cfun->function_frequency = real_frequency;
2754
 
2755
  cost = seq_cost (seq, speed);
2756
  if (MEM_P (rslt))
2757
    cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2758
                          TYPE_ADDR_SPACE (type), speed);
2759
 
2760
  return cost;
2761
}
2762
 
2763
/* Returns variable containing the value of candidate CAND at statement AT.  */
2764
 
2765
static tree
2766
var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2767
{
2768
  if (stmt_after_increment (loop, cand, stmt))
2769
    return cand->var_after;
2770
  else
2771
    return cand->var_before;
2772
}
2773
 
2774
/* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2775
   but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2776
 
2777
int
2778
tree_int_cst_sign_bit (const_tree t)
2779
{
2780
  unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2781
  unsigned HOST_WIDE_INT w;
2782
 
2783
  if (bitno < HOST_BITS_PER_WIDE_INT)
2784
    w = TREE_INT_CST_LOW (t);
2785
  else
2786
    {
2787
      w = TREE_INT_CST_HIGH (t);
2788
      bitno -= HOST_BITS_PER_WIDE_INT;
2789
    }
2790
 
2791
  return (w >> bitno) & 1;
2792
}
2793
 
2794
/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2795
   same precision that is at least as wide as the precision of TYPE, stores
2796
   BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2797
   type of A and B.  */
2798
 
2799
static tree
2800
determine_common_wider_type (tree *a, tree *b)
2801
{
2802
  tree wider_type = NULL;
2803
  tree suba, subb;
2804
  tree atype = TREE_TYPE (*a);
2805
 
2806
  if (CONVERT_EXPR_P (*a))
2807
    {
2808
      suba = TREE_OPERAND (*a, 0);
2809
      wider_type = TREE_TYPE (suba);
2810
      if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2811
        return atype;
2812
    }
2813
  else
2814
    return atype;
2815
 
2816
  if (CONVERT_EXPR_P (*b))
2817
    {
2818
      subb = TREE_OPERAND (*b, 0);
2819
      if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2820
        return atype;
2821
    }
2822
  else
2823
    return atype;
2824
 
2825
  *a = suba;
2826
  *b = subb;
2827
  return wider_type;
2828
}
2829
 
2830
/* Determines the expression by that USE is expressed from induction variable
2831
   CAND at statement AT in LOOP.  The expression is stored in a decomposed
2832
   form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2833
 
2834
static bool
2835
get_computation_aff (struct loop *loop,
2836
                     struct iv_use *use, struct iv_cand *cand, gimple at,
2837
                     struct affine_tree_combination *aff)
2838
{
2839
  tree ubase = use->iv->base;
2840
  tree ustep = use->iv->step;
2841
  tree cbase = cand->iv->base;
2842
  tree cstep = cand->iv->step, cstep_common;
2843
  tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2844
  tree common_type, var;
2845
  tree uutype;
2846
  aff_tree cbase_aff, var_aff;
2847
  double_int rat;
2848
 
2849
  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2850
    {
2851
      /* We do not have a precision to express the values of use.  */
2852
      return false;
2853
    }
2854
 
2855
  var = var_at_stmt (loop, cand, at);
2856
  uutype = unsigned_type_for (utype);
2857
 
2858
  /* If the conversion is not noop, perform it.  */
2859
  if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2860
    {
2861
      cstep = fold_convert (uutype, cstep);
2862
      cbase = fold_convert (uutype, cbase);
2863
      var = fold_convert (uutype, var);
2864
    }
2865
 
2866
  if (!constant_multiple_of (ustep, cstep, &rat))
2867
    return false;
2868
 
2869
  /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2870
     type, we achieve better folding by computing their difference in this
2871
     wider type, and cast the result to UUTYPE.  We do not need to worry about
2872
     overflows, as all the arithmetics will in the end be performed in UUTYPE
2873
     anyway.  */
2874
  common_type = determine_common_wider_type (&ubase, &cbase);
2875
 
2876
  /* use = ubase - ratio * cbase + ratio * var.  */
2877
  tree_to_aff_combination (ubase, common_type, aff);
2878
  tree_to_aff_combination (cbase, common_type, &cbase_aff);
2879
  tree_to_aff_combination (var, uutype, &var_aff);
2880
 
2881
  /* We need to shift the value if we are after the increment.  */
2882
  if (stmt_after_increment (loop, cand, at))
2883
    {
2884
      aff_tree cstep_aff;
2885
 
2886
      if (common_type != uutype)
2887
        cstep_common = fold_convert (common_type, cstep);
2888
      else
2889
        cstep_common = cstep;
2890
 
2891
      tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2892
      aff_combination_add (&cbase_aff, &cstep_aff);
2893
    }
2894
 
2895
  aff_combination_scale (&cbase_aff, double_int_neg (rat));
2896
  aff_combination_add (aff, &cbase_aff);
2897
  if (common_type != uutype)
2898
    aff_combination_convert (aff, uutype);
2899
 
2900
  aff_combination_scale (&var_aff, rat);
2901
  aff_combination_add (aff, &var_aff);
2902
 
2903
  return true;
2904
}
2905
 
2906
/* Determines the expression by that USE is expressed from induction variable
2907
   CAND at statement AT in LOOP.  The computation is unshared.  */
2908
 
2909
static tree
2910
get_computation_at (struct loop *loop,
2911
                    struct iv_use *use, struct iv_cand *cand, gimple at)
2912
{
2913
  aff_tree aff;
2914
  tree type = TREE_TYPE (use->iv->base);
2915
 
2916
  if (!get_computation_aff (loop, use, cand, at, &aff))
2917
    return NULL_TREE;
2918
  unshare_aff_combination (&aff);
2919
  return fold_convert (type, aff_combination_to_tree (&aff));
2920
}
2921
 
2922
/* Determines the expression by that USE is expressed from induction variable
2923
   CAND in LOOP.  The computation is unshared.  */
2924
 
2925
static tree
2926
get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2927
{
2928
  return get_computation_at (loop, use, cand, use->stmt);
2929
}
2930
 
2931
/* Returns cost of addition in MODE.  */
2932
 
2933
static unsigned
2934
add_cost (enum machine_mode mode, bool speed)
2935
{
2936
  static unsigned costs[NUM_MACHINE_MODES];
2937
  rtx seq;
2938
  unsigned cost;
2939
 
2940
  if (costs[mode])
2941
    return costs[mode];
2942
 
2943
  start_sequence ();
2944
  force_operand (gen_rtx_fmt_ee (PLUS, mode,
2945
                                 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2946
                                 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2947
                 NULL_RTX);
2948
  seq = get_insns ();
2949
  end_sequence ();
2950
 
2951
  cost = seq_cost (seq, speed);
2952
  if (!cost)
2953
    cost = 1;
2954
 
2955
  costs[mode] = cost;
2956
 
2957
  if (dump_file && (dump_flags & TDF_DETAILS))
2958
    fprintf (dump_file, "Addition in %s costs %d\n",
2959
             GET_MODE_NAME (mode), cost);
2960
  return cost;
2961
}
2962
 
2963
/* Entry in a hashtable of already known costs for multiplication.  */
2964
struct mbc_entry
2965
{
2966
  HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2967
  enum machine_mode mode;       /* In mode.  */
2968
  unsigned cost;                /* The cost.  */
2969
};
2970
 
2971
/* Counts hash value for the ENTRY.  */
2972
 
2973
static hashval_t
2974
mbc_entry_hash (const void *entry)
2975
{
2976
  const struct mbc_entry *e = (const struct mbc_entry *) entry;
2977
 
2978
  return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2979
}
2980
 
2981
/* Compares the hash table entries ENTRY1 and ENTRY2.  */
2982
 
2983
static int
2984
mbc_entry_eq (const void *entry1, const void *entry2)
2985
{
2986
  const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2987
  const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2988
 
2989
  return (e1->mode == e2->mode
2990
          && e1->cst == e2->cst);
2991
}
2992
 
2993
/* Returns cost of multiplication by constant CST in MODE.  */
2994
 
2995
unsigned
2996
multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2997
{
2998
  static htab_t costs;
2999
  struct mbc_entry **cached, act;
3000
  rtx seq;
3001
  unsigned cost;
3002
 
3003
  if (!costs)
3004
    costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3005
 
3006
  act.mode = mode;
3007
  act.cst = cst;
3008
  cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3009
  if (*cached)
3010
    return (*cached)->cost;
3011
 
3012
  *cached = XNEW (struct mbc_entry);
3013
  (*cached)->mode = mode;
3014
  (*cached)->cst = cst;
3015
 
3016
  start_sequence ();
3017
  expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3018
               gen_int_mode (cst, mode), NULL_RTX, 0);
3019
  seq = get_insns ();
3020
  end_sequence ();
3021
 
3022
  cost = seq_cost (seq, speed);
3023
 
3024
  if (dump_file && (dump_flags & TDF_DETAILS))
3025
    fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3026
             (int) cst, GET_MODE_NAME (mode), cost);
3027
 
3028
  (*cached)->cost = cost;
3029
 
3030
  return cost;
3031
}
3032
 
3033
/* Returns true if multiplying by RATIO is allowed in an address.  Test the
3034
   validity for a memory reference accessing memory of mode MODE in
3035
   address space AS.  */
3036
 
3037
DEF_VEC_P (sbitmap);
3038
DEF_VEC_ALLOC_P (sbitmap, heap);
3039
 
3040
bool
3041
multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3042
                                 addr_space_t as)
3043
{
3044
#define MAX_RATIO 128
3045
  unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3046
  static VEC (sbitmap, heap) *valid_mult_list;
3047
  sbitmap valid_mult;
3048
 
3049
  if (data_index >= VEC_length (sbitmap, valid_mult_list))
3050
    VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3051
 
3052
  valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3053
  if (!valid_mult)
3054
    {
3055
      enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3056
      rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3057
      rtx addr;
3058
      HOST_WIDE_INT i;
3059
 
3060
      valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3061
      sbitmap_zero (valid_mult);
3062
      addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3063
      for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3064
        {
3065
          XEXP (addr, 1) = gen_int_mode (i, address_mode);
3066
          if (memory_address_addr_space_p (mode, addr, as))
3067
            SET_BIT (valid_mult, i + MAX_RATIO);
3068
        }
3069
 
3070
      if (dump_file && (dump_flags & TDF_DETAILS))
3071
        {
3072
          fprintf (dump_file, "  allowed multipliers:");
3073
          for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3074
            if (TEST_BIT (valid_mult, i + MAX_RATIO))
3075
              fprintf (dump_file, " %d", (int) i);
3076
          fprintf (dump_file, "\n");
3077
          fprintf (dump_file, "\n");
3078
        }
3079
 
3080
      VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3081
    }
3082
 
3083
  if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3084
    return false;
3085
 
3086
  return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3087
}
3088
 
3089
/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3090
   If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3091
   variable is omitted.  Compute the cost for a memory reference that accesses
3092
   a memory location of mode MEM_MODE in address space AS.
3093
 
3094
   MAY_AUTOINC is set to true if the autoincrement (increasing index by
3095
   size of MEM_MODE / RATIO) is available.  To make this determination, we
3096
   look at the size of the increment to be made, which is given in CSTEP.
3097
   CSTEP may be zero if the step is unknown.
3098
   STMT_AFTER_INC is true iff the statement we're looking at is after the
3099
   increment of the original biv.
3100
 
3101
   TODO -- there must be some better way.  This all is quite crude.  */
3102
 
3103
typedef struct
3104
{
3105
  HOST_WIDE_INT min_offset, max_offset;
3106
  unsigned costs[2][2][2][2];
3107
} *address_cost_data;
3108
 
3109
DEF_VEC_P (address_cost_data);
3110
DEF_VEC_ALLOC_P (address_cost_data, heap);
3111
 
3112
static comp_cost
3113
get_address_cost (bool symbol_present, bool var_present,
3114
                  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3115
                  HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3116
                  addr_space_t as, bool speed,
3117
                  bool stmt_after_inc, bool *may_autoinc)
3118
{
3119
  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3120
  static VEC(address_cost_data, heap) *address_cost_data_list;
3121
  unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3122
  address_cost_data data;
3123
  static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3124
  static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3125
  unsigned cost, acost, complexity;
3126
  bool offset_p, ratio_p, autoinc;
3127
  HOST_WIDE_INT s_offset, autoinc_offset, msize;
3128
  unsigned HOST_WIDE_INT mask;
3129
  unsigned bits;
3130
 
3131
  if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3132
    VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3133
                           data_index + 1);
3134
 
3135
  data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3136
  if (!data)
3137
    {
3138
      HOST_WIDE_INT i;
3139
      HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3140
      HOST_WIDE_INT rat, off;
3141
      int old_cse_not_expected;
3142
      unsigned sym_p, var_p, off_p, rat_p, add_c;
3143
      rtx seq, addr, base;
3144
      rtx reg0, reg1;
3145
 
3146
      data = (address_cost_data) xcalloc (1, sizeof (*data));
3147
 
3148
      reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3149
 
3150
      addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3151
      for (i = start; i <= 1 << 20; i <<= 1)
3152
        {
3153
          XEXP (addr, 1) = gen_int_mode (i, address_mode);
3154
          if (!memory_address_addr_space_p (mem_mode, addr, as))
3155
            break;
3156
        }
3157
      data->max_offset = i == start ? 0 : i >> 1;
3158
      off = data->max_offset;
3159
 
3160
      for (i = start; i <= 1 << 20; i <<= 1)
3161
        {
3162
          XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3163
          if (!memory_address_addr_space_p (mem_mode, addr, as))
3164
            break;
3165
        }
3166
      data->min_offset = i == start ? 0 : -(i >> 1);
3167
 
3168
      if (dump_file && (dump_flags & TDF_DETAILS))
3169
        {
3170
          fprintf (dump_file, "get_address_cost:\n");
3171
          fprintf (dump_file, "  min offset %s %d\n",
3172
                   GET_MODE_NAME (mem_mode),
3173
                   (int) data->min_offset);
3174
          fprintf (dump_file, "  max offset %s %d\n",
3175
                   GET_MODE_NAME (mem_mode),
3176
                   (int) data->max_offset);
3177
        }
3178
 
3179
      rat = 1;
3180
      for (i = 2; i <= MAX_RATIO; i++)
3181
        if (multiplier_allowed_in_address_p (i, mem_mode, as))
3182
          {
3183
            rat = i;
3184
            break;
3185
          }
3186
 
3187
      /* Compute the cost of various addressing modes.  */
3188
      acost = 0;
3189
      reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3190
      reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3191
 
3192
      if (HAVE_PRE_DECREMENT)
3193
        {
3194
          addr = gen_rtx_PRE_DEC (address_mode, reg0);
3195
          has_predec[mem_mode]
3196
            = memory_address_addr_space_p (mem_mode, addr, as);
3197
        }
3198
      if (HAVE_POST_DECREMENT)
3199
        {
3200
          addr = gen_rtx_POST_DEC (address_mode, reg0);
3201
          has_postdec[mem_mode]
3202
            = memory_address_addr_space_p (mem_mode, addr, as);
3203
        }
3204
      if (HAVE_PRE_INCREMENT)
3205
        {
3206
          addr = gen_rtx_PRE_INC (address_mode, reg0);
3207
          has_preinc[mem_mode]
3208
            = memory_address_addr_space_p (mem_mode, addr, as);
3209
        }
3210
      if (HAVE_POST_INCREMENT)
3211
        {
3212
          addr = gen_rtx_POST_INC (address_mode, reg0);
3213
          has_postinc[mem_mode]
3214
            = memory_address_addr_space_p (mem_mode, addr, as);
3215
        }
3216
      for (i = 0; i < 16; i++)
3217
        {
3218
          sym_p = i & 1;
3219
          var_p = (i >> 1) & 1;
3220
          off_p = (i >> 2) & 1;
3221
          rat_p = (i >> 3) & 1;
3222
 
3223
          addr = reg0;
3224
          if (rat_p)
3225
            addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3226
                                   gen_int_mode (rat, address_mode));
3227
 
3228
          if (var_p)
3229
            addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3230
 
3231
          if (sym_p)
3232
            {
3233
              base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3234
              /* ??? We can run into trouble with some backends by presenting
3235
                 it with symbols which haven't been properly passed through
3236
                 targetm.encode_section_info.  By setting the local bit, we
3237
                 enhance the probability of things working.  */
3238
              SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3239
 
3240
              if (off_p)
3241
                base = gen_rtx_fmt_e (CONST, address_mode,
3242
                                      gen_rtx_fmt_ee
3243
                                        (PLUS, address_mode, base,
3244
                                         gen_int_mode (off, address_mode)));
3245
            }
3246
          else if (off_p)
3247
            base = gen_int_mode (off, address_mode);
3248
          else
3249
            base = NULL_RTX;
3250
 
3251
          if (base)
3252
            addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3253
 
3254
          start_sequence ();
3255
          /* To avoid splitting addressing modes, pretend that no cse will
3256
             follow.  */
3257
          old_cse_not_expected = cse_not_expected;
3258
          cse_not_expected = true;
3259
          addr = memory_address_addr_space (mem_mode, addr, as);
3260
          cse_not_expected = old_cse_not_expected;
3261
          seq = get_insns ();
3262
          end_sequence ();
3263
 
3264
          acost = seq_cost (seq, speed);
3265
          acost += address_cost (addr, mem_mode, as, speed);
3266
 
3267
          if (!acost)
3268
            acost = 1;
3269
          data->costs[sym_p][var_p][off_p][rat_p] = acost;
3270
        }
3271
 
3272
      /* On some targets, it is quite expensive to load symbol to a register,
3273
         which makes addresses that contain symbols look much more expensive.
3274
         However, the symbol will have to be loaded in any case before the
3275
         loop (and quite likely we have it in register already), so it does not
3276
         make much sense to penalize them too heavily.  So make some final
3277
         tweaks for the SYMBOL_PRESENT modes:
3278
 
3279
         If VAR_PRESENT is false, and the mode obtained by changing symbol to
3280
         var is cheaper, use this mode with small penalty.
3281
         If VAR_PRESENT is true, try whether the mode with
3282
         SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3283
         if this is the case, use it.  */
3284
      add_c = add_cost (address_mode, speed);
3285
      for (i = 0; i < 8; i++)
3286
        {
3287
          var_p = i & 1;
3288
          off_p = (i >> 1) & 1;
3289
          rat_p = (i >> 2) & 1;
3290
 
3291
          acost = data->costs[0][1][off_p][rat_p] + 1;
3292
          if (var_p)
3293
            acost += add_c;
3294
 
3295
          if (acost < data->costs[1][var_p][off_p][rat_p])
3296
            data->costs[1][var_p][off_p][rat_p] = acost;
3297
        }
3298
 
3299
      if (dump_file && (dump_flags & TDF_DETAILS))
3300
        {
3301
          fprintf (dump_file, "Address costs:\n");
3302
 
3303
          for (i = 0; i < 16; i++)
3304
            {
3305
              sym_p = i & 1;
3306
              var_p = (i >> 1) & 1;
3307
              off_p = (i >> 2) & 1;
3308
              rat_p = (i >> 3) & 1;
3309
 
3310
              fprintf (dump_file, "  ");
3311
              if (sym_p)
3312
                fprintf (dump_file, "sym + ");
3313
              if (var_p)
3314
                fprintf (dump_file, "var + ");
3315
              if (off_p)
3316
                fprintf (dump_file, "cst + ");
3317
              if (rat_p)
3318
                fprintf (dump_file, "rat * ");
3319
 
3320
              acost = data->costs[sym_p][var_p][off_p][rat_p];
3321
              fprintf (dump_file, "index costs %d\n", acost);
3322
            }
3323
          if (has_predec[mem_mode] || has_postdec[mem_mode]
3324
              || has_preinc[mem_mode] || has_postinc[mem_mode])
3325
            fprintf (dump_file, "  May include autoinc/dec\n");
3326
          fprintf (dump_file, "\n");
3327
        }
3328
 
3329
      VEC_replace (address_cost_data, address_cost_data_list,
3330
                   data_index, data);
3331
    }
3332
 
3333
  bits = GET_MODE_BITSIZE (address_mode);
3334
  mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3335
  offset &= mask;
3336
  if ((offset >> (bits - 1) & 1))
3337
    offset |= ~mask;
3338
  s_offset = offset;
3339
 
3340
  autoinc = false;
3341
  msize = GET_MODE_SIZE (mem_mode);
3342
  autoinc_offset = offset;
3343
  if (stmt_after_inc)
3344
    autoinc_offset += ratio * cstep;
3345
  if (symbol_present || var_present || ratio != 1)
3346
    autoinc = false;
3347
  else if ((has_postinc[mem_mode] && autoinc_offset == 0
3348
               && msize == cstep)
3349
           || (has_postdec[mem_mode] && autoinc_offset == 0
3350
               && msize == -cstep)
3351
           || (has_preinc[mem_mode] && autoinc_offset == msize
3352
               && msize == cstep)
3353
           || (has_predec[mem_mode] && autoinc_offset == -msize
3354
               && msize == -cstep))
3355
    autoinc = true;
3356
 
3357
  cost = 0;
3358
  offset_p = (s_offset != 0
3359
              && data->min_offset <= s_offset
3360
              && s_offset <= data->max_offset);
3361
  ratio_p = (ratio != 1
3362
             && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3363
 
3364
  if (ratio != 1 && !ratio_p)
3365
    cost += multiply_by_cost (ratio, address_mode, speed);
3366
 
3367
  if (s_offset && !offset_p && !symbol_present)
3368
    cost += add_cost (address_mode, speed);
3369
 
3370
  if (may_autoinc)
3371
    *may_autoinc = autoinc;
3372
  acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3373
  complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3374
  return new_cost (cost + acost, complexity);
3375
}
3376
 
3377
/* Estimates cost of forcing expression EXPR into a variable.  */
3378
 
3379
static comp_cost
3380
force_expr_to_var_cost (tree expr, bool speed)
3381
{
3382
  static bool costs_initialized = false;
3383
  static unsigned integer_cost [2];
3384
  static unsigned symbol_cost [2];
3385
  static unsigned address_cost [2];
3386
  tree op0, op1;
3387
  comp_cost cost0, cost1, cost;
3388
  enum machine_mode mode;
3389
 
3390
  if (!costs_initialized)
3391
    {
3392
      tree type = build_pointer_type (integer_type_node);
3393
      tree var, addr;
3394
      rtx x;
3395
      int i;
3396
 
3397
      var = create_tmp_var_raw (integer_type_node, "test_var");
3398
      TREE_STATIC (var) = 1;
3399
      x = produce_memory_decl_rtl (var, NULL);
3400
      SET_DECL_RTL (var, x);
3401
 
3402
      addr = build1 (ADDR_EXPR, type, var);
3403
 
3404
 
3405
      for (i = 0; i < 2; i++)
3406
        {
3407
          integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3408
                                                             2000), i);
3409
 
3410
          symbol_cost[i] = computation_cost (addr, i) + 1;
3411
 
3412
          address_cost[i]
3413
            = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3414
                                        addr,
3415
                                        build_int_cst (sizetype, 2000)), i) + 1;
3416
          if (dump_file && (dump_flags & TDF_DETAILS))
3417
            {
3418
              fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3419
              fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3420
              fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3421
              fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3422
              fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3423
              fprintf (dump_file, "\n");
3424
            }
3425
        }
3426
 
3427
      costs_initialized = true;
3428
    }
3429
 
3430
  STRIP_NOPS (expr);
3431
 
3432
  if (SSA_VAR_P (expr))
3433
    return zero_cost;
3434
 
3435
  if (is_gimple_min_invariant (expr))
3436
    {
3437
      if (TREE_CODE (expr) == INTEGER_CST)
3438
        return new_cost (integer_cost [speed], 0);
3439
 
3440
      if (TREE_CODE (expr) == ADDR_EXPR)
3441
        {
3442
          tree obj = TREE_OPERAND (expr, 0);
3443
 
3444
          if (TREE_CODE (obj) == VAR_DECL
3445
              || TREE_CODE (obj) == PARM_DECL
3446
              || TREE_CODE (obj) == RESULT_DECL)
3447
            return new_cost (symbol_cost [speed], 0);
3448
        }
3449
 
3450
      return new_cost (address_cost [speed], 0);
3451
    }
3452
 
3453
  switch (TREE_CODE (expr))
3454
    {
3455
    case POINTER_PLUS_EXPR:
3456
    case PLUS_EXPR:
3457
    case MINUS_EXPR:
3458
    case MULT_EXPR:
3459
      op0 = TREE_OPERAND (expr, 0);
3460
      op1 = TREE_OPERAND (expr, 1);
3461
      STRIP_NOPS (op0);
3462
      STRIP_NOPS (op1);
3463
 
3464
      if (is_gimple_val (op0))
3465
        cost0 = zero_cost;
3466
      else
3467
        cost0 = force_expr_to_var_cost (op0, speed);
3468
 
3469
      if (is_gimple_val (op1))
3470
        cost1 = zero_cost;
3471
      else
3472
        cost1 = force_expr_to_var_cost (op1, speed);
3473
 
3474
      break;
3475
 
3476
    case NEGATE_EXPR:
3477
      op0 = TREE_OPERAND (expr, 0);
3478
      STRIP_NOPS (op0);
3479
      op1 = NULL_TREE;
3480
 
3481
      if (is_gimple_val (op0))
3482
        cost0 = zero_cost;
3483
      else
3484
        cost0 = force_expr_to_var_cost (op0, speed);
3485
 
3486
      cost1 = zero_cost;
3487
      break;
3488
 
3489
    default:
3490
      /* Just an arbitrary value, FIXME.  */
3491
      return new_cost (target_spill_cost[speed], 0);
3492
    }
3493
 
3494
  mode = TYPE_MODE (TREE_TYPE (expr));
3495
  switch (TREE_CODE (expr))
3496
    {
3497
    case POINTER_PLUS_EXPR:
3498
    case PLUS_EXPR:
3499
    case MINUS_EXPR:
3500
    case NEGATE_EXPR:
3501
      cost = new_cost (add_cost (mode, speed), 0);
3502
      break;
3503
 
3504
    case MULT_EXPR:
3505
      if (cst_and_fits_in_hwi (op0))
3506
        cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3507
      else if (cst_and_fits_in_hwi (op1))
3508
        cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3509
      else
3510
        return new_cost (target_spill_cost [speed], 0);
3511
      break;
3512
 
3513
    default:
3514
      gcc_unreachable ();
3515
    }
3516
 
3517
  cost = add_costs (cost, cost0);
3518
  cost = add_costs (cost, cost1);
3519
 
3520
  /* Bound the cost by target_spill_cost.  The parts of complicated
3521
     computations often are either loop invariant or at least can
3522
     be shared between several iv uses, so letting this grow without
3523
     limits would not give reasonable results.  */
3524
  if (cost.cost > (int) target_spill_cost [speed])
3525
    cost.cost = target_spill_cost [speed];
3526
 
3527
  return cost;
3528
}
3529
 
3530
/* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3531
   invariants the computation depends on.  */
3532
 
3533
static comp_cost
3534
force_var_cost (struct ivopts_data *data,
3535
                tree expr, bitmap *depends_on)
3536
{
3537
  if (depends_on)
3538
    {
3539
      fd_ivopts_data = data;
3540
      walk_tree (&expr, find_depends, depends_on, NULL);
3541
    }
3542
 
3543
  return force_expr_to_var_cost (expr, data->speed);
3544
}
3545
 
3546
/* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3547
   value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3548
   to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3549
   invariants the computation depends on.  */
3550
 
3551
static comp_cost
3552
split_address_cost (struct ivopts_data *data,
3553
                    tree addr, bool *symbol_present, bool *var_present,
3554
                    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3555
{
3556
  tree core;
3557
  HOST_WIDE_INT bitsize;
3558
  HOST_WIDE_INT bitpos;
3559
  tree toffset;
3560
  enum machine_mode mode;
3561
  int unsignedp, volatilep;
3562
 
3563
  core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3564
                              &unsignedp, &volatilep, false);
3565
 
3566
  if (toffset != 0
3567
      || bitpos % BITS_PER_UNIT != 0
3568
      || TREE_CODE (core) != VAR_DECL)
3569
    {
3570
      *symbol_present = false;
3571
      *var_present = true;
3572
      fd_ivopts_data = data;
3573
      walk_tree (&addr, find_depends, depends_on, NULL);
3574
      return new_cost (target_spill_cost[data->speed], 0);
3575
    }
3576
 
3577
  *offset += bitpos / BITS_PER_UNIT;
3578
  if (TREE_STATIC (core)
3579
      || DECL_EXTERNAL (core))
3580
    {
3581
      *symbol_present = true;
3582
      *var_present = false;
3583
      return zero_cost;
3584
    }
3585
 
3586
  *symbol_present = false;
3587
  *var_present = true;
3588
  return zero_cost;
3589
}
3590
 
3591
/* Estimates cost of expressing difference of addresses E1 - E2 as
3592
   var + symbol + offset.  The value of offset is added to OFFSET,
3593
   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3594
   part is missing.  DEPENDS_ON is a set of the invariants the computation
3595
   depends on.  */
3596
 
3597
static comp_cost
3598
ptr_difference_cost (struct ivopts_data *data,
3599
                     tree e1, tree e2, bool *symbol_present, bool *var_present,
3600
                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3601
{
3602
  HOST_WIDE_INT diff = 0;
3603
  aff_tree aff_e1, aff_e2;
3604
  tree type;
3605
 
3606
  gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3607
 
3608
  if (ptr_difference_const (e1, e2, &diff))
3609
    {
3610
      *offset += diff;
3611
      *symbol_present = false;
3612
      *var_present = false;
3613
      return zero_cost;
3614
    }
3615
 
3616
  if (integer_zerop (e2))
3617
    return split_address_cost (data, TREE_OPERAND (e1, 0),
3618
                               symbol_present, var_present, offset, depends_on);
3619
 
3620
  *symbol_present = false;
3621
  *var_present = true;
3622
 
3623
  type = signed_type_for (TREE_TYPE (e1));
3624
  tree_to_aff_combination (e1, type, &aff_e1);
3625
  tree_to_aff_combination (e2, type, &aff_e2);
3626
  aff_combination_scale (&aff_e2, double_int_minus_one);
3627
  aff_combination_add (&aff_e1, &aff_e2);
3628
 
3629
  return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3630
}
3631
 
3632
/* Estimates cost of expressing difference E1 - E2 as
3633
   var + symbol + offset.  The value of offset is added to OFFSET,
3634
   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3635
   part is missing.  DEPENDS_ON is a set of the invariants the computation
3636
   depends on.  */
3637
 
3638
static comp_cost
3639
difference_cost (struct ivopts_data *data,
3640
                 tree e1, tree e2, bool *symbol_present, bool *var_present,
3641
                 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3642
{
3643
  enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3644
  unsigned HOST_WIDE_INT off1, off2;
3645
  aff_tree aff_e1, aff_e2;
3646
  tree type;
3647
 
3648
  e1 = strip_offset (e1, &off1);
3649
  e2 = strip_offset (e2, &off2);
3650
  *offset += off1 - off2;
3651
 
3652
  STRIP_NOPS (e1);
3653
  STRIP_NOPS (e2);
3654
 
3655
  if (TREE_CODE (e1) == ADDR_EXPR)
3656
    return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3657
                                offset, depends_on);
3658
  *symbol_present = false;
3659
 
3660
  if (operand_equal_p (e1, e2, 0))
3661
    {
3662
      *var_present = false;
3663
      return zero_cost;
3664
    }
3665
 
3666
  *var_present = true;
3667
 
3668
  if (integer_zerop (e2))
3669
    return force_var_cost (data, e1, depends_on);
3670
 
3671
  if (integer_zerop (e1))
3672
    {
3673
      comp_cost cost = force_var_cost (data, e2, depends_on);
3674
      cost.cost += multiply_by_cost (-1, mode, data->speed);
3675
      return cost;
3676
    }
3677
 
3678
  type = signed_type_for (TREE_TYPE (e1));
3679
  tree_to_aff_combination (e1, type, &aff_e1);
3680
  tree_to_aff_combination (e2, type, &aff_e2);
3681
  aff_combination_scale (&aff_e2, double_int_minus_one);
3682
  aff_combination_add (&aff_e1, &aff_e2);
3683
 
3684
  return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3685
}
3686
 
3687
/* Determines the cost of the computation by that USE is expressed
3688
   from induction variable CAND.  If ADDRESS_P is true, we just need
3689
   to create an address from it, otherwise we want to get it into
3690
   register.  A set of invariants we depend on is stored in
3691
   DEPENDS_ON.  AT is the statement at that the value is computed.
3692
   If CAN_AUTOINC is nonnull, use it to record whether autoinc
3693
   addressing is likely.  */
3694
 
3695
static comp_cost
3696
get_computation_cost_at (struct ivopts_data *data,
3697
                         struct iv_use *use, struct iv_cand *cand,
3698
                         bool address_p, bitmap *depends_on, gimple at,
3699
                         bool *can_autoinc)
3700
{
3701
  tree ubase = use->iv->base, ustep = use->iv->step;
3702
  tree cbase, cstep;
3703
  tree utype = TREE_TYPE (ubase), ctype;
3704
  unsigned HOST_WIDE_INT cstepi, offset = 0;
3705
  HOST_WIDE_INT ratio, aratio;
3706
  bool var_present, symbol_present, stmt_is_after_inc;
3707
  comp_cost cost;
3708
  double_int rat;
3709
  bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3710
 
3711
  *depends_on = NULL;
3712
 
3713
  /* Only consider real candidates.  */
3714
  if (!cand->iv)
3715
    return infinite_cost;
3716
 
3717
  cbase = cand->iv->base;
3718
  cstep = cand->iv->step;
3719
  ctype = TREE_TYPE (cbase);
3720
 
3721
  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3722
    {
3723
      /* We do not have a precision to express the values of use.  */
3724
      return infinite_cost;
3725
    }
3726
 
3727
  if (address_p)
3728
    {
3729
      /* Do not try to express address of an object with computation based
3730
         on address of a different object.  This may cause problems in rtl
3731
         level alias analysis (that does not expect this to be happening,
3732
         as this is illegal in C), and would be unlikely to be useful
3733
         anyway.  */
3734
      if (use->iv->base_object
3735
          && cand->iv->base_object
3736
          && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3737
        return infinite_cost;
3738
    }
3739
 
3740
  if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3741
    {
3742
      /* TODO -- add direct handling of this case.  */
3743
      goto fallback;
3744
    }
3745
 
3746
  /* CSTEPI is removed from the offset in case statement is after the
3747
     increment.  If the step is not constant, we use zero instead.
3748
     This is a bit imprecise (there is the extra addition), but
3749
     redundancy elimination is likely to transform the code so that
3750
     it uses value of the variable before increment anyway,
3751
     so it is not that much unrealistic.  */
3752
  if (cst_and_fits_in_hwi (cstep))
3753
    cstepi = int_cst_value (cstep);
3754
  else
3755
    cstepi = 0;
3756
 
3757
  if (!constant_multiple_of (ustep, cstep, &rat))
3758
    return infinite_cost;
3759
 
3760
  if (double_int_fits_in_shwi_p (rat))
3761
    ratio = double_int_to_shwi (rat);
3762
  else
3763
    return infinite_cost;
3764
 
3765
  STRIP_NOPS (cbase);
3766
  ctype = TREE_TYPE (cbase);
3767
 
3768
  /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3769
     or ratio == 1, it is better to handle this like
3770
 
3771
     ubase - ratio * cbase + ratio * var
3772
 
3773
     (also holds in the case ratio == -1, TODO.  */
3774
 
3775
  if (cst_and_fits_in_hwi (cbase))
3776
    {
3777
      offset = - ratio * int_cst_value (cbase);
3778
      cost = difference_cost (data,
3779
                              ubase, build_int_cst (utype, 0),
3780
                              &symbol_present, &var_present, &offset,
3781
                              depends_on);
3782
    }
3783
  else if (ratio == 1)
3784
    {
3785
      cost = difference_cost (data,
3786
                              ubase, cbase,
3787
                              &symbol_present, &var_present, &offset,
3788
                              depends_on);
3789
    }
3790
  else if (address_p
3791
           && !POINTER_TYPE_P (ctype)
3792
           && multiplier_allowed_in_address_p
3793
                (ratio, TYPE_MODE (TREE_TYPE (utype)),
3794
                        TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3795
    {
3796
      cbase
3797
        = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3798
      cost = difference_cost (data,
3799
                              ubase, cbase,
3800
                              &symbol_present, &var_present, &offset,
3801
                              depends_on);
3802
    }
3803
  else
3804
    {
3805
      cost = force_var_cost (data, cbase, depends_on);
3806
      cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3807
      cost = add_costs (cost,
3808
                        difference_cost (data,
3809
                                         ubase, build_int_cst (utype, 0),
3810
                                         &symbol_present, &var_present,
3811
                                         &offset, depends_on));
3812
    }
3813
 
3814
  /* If we are after the increment, the value of the candidate is higher by
3815
     one iteration.  */
3816
  stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3817
  if (stmt_is_after_inc)
3818
    offset -= ratio * cstepi;
3819
 
3820
  /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3821
     (symbol/var1/const parts may be omitted).  If we are looking for an
3822
     address, find the cost of addressing this.  */
3823
  if (address_p)
3824
    return add_costs (cost,
3825
                      get_address_cost (symbol_present, var_present,
3826
                                        offset, ratio, cstepi,
3827
                                        TYPE_MODE (TREE_TYPE (utype)),
3828
                                        TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3829
                                        speed, stmt_is_after_inc,
3830
                                        can_autoinc));
3831
 
3832
  /* Otherwise estimate the costs for computing the expression.  */
3833
  if (!symbol_present && !var_present && !offset)
3834
    {
3835
      if (ratio != 1)
3836
        cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3837
      return cost;
3838
    }
3839
 
3840
  /* Symbol + offset should be compile-time computable so consider that they
3841
      are added once to the variable, if present.  */
3842
  if (var_present && (symbol_present || offset))
3843
    cost.cost += add_cost (TYPE_MODE (ctype), speed)
3844
                 / AVG_LOOP_NITER (data->current_loop);
3845
 
3846
  /* Having offset does not affect runtime cost in case it is added to
3847
     symbol, but it increases complexity.  */
3848
  if (offset)
3849
    cost.complexity++;
3850
 
3851
  cost.cost += add_cost (TYPE_MODE (ctype), speed);
3852
 
3853
  aratio = ratio > 0 ? ratio : -ratio;
3854
  if (aratio != 1)
3855
    cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3856
  return cost;
3857
 
3858
fallback:
3859
  if (can_autoinc)
3860
    *can_autoinc = false;
3861
 
3862
  {
3863
    /* Just get the expression, expand it and measure the cost.  */
3864
    tree comp = get_computation_at (data->current_loop, use, cand, at);
3865
 
3866
    if (!comp)
3867
      return infinite_cost;
3868
 
3869
    if (address_p)
3870
      comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3871
 
3872
    return new_cost (computation_cost (comp, speed), 0);
3873
  }
3874
}
3875
 
3876
/* Determines the cost of the computation by that USE is expressed
3877
   from induction variable CAND.  If ADDRESS_P is true, we just need
3878
   to create an address from it, otherwise we want to get it into
3879
   register.  A set of invariants we depend on is stored in
3880
   DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
3881
   autoinc addressing is likely.  */
3882
 
3883
static comp_cost
3884
get_computation_cost (struct ivopts_data *data,
3885
                      struct iv_use *use, struct iv_cand *cand,
3886
                      bool address_p, bitmap *depends_on, bool *can_autoinc)
3887
{
3888
  return get_computation_cost_at (data,
3889
                                  use, cand, address_p, depends_on, use->stmt,
3890
                                  can_autoinc);
3891
}
3892
 
3893
/* Determines cost of basing replacement of USE on CAND in a generic
3894
   expression.  */
3895
 
3896
static bool
3897
determine_use_iv_cost_generic (struct ivopts_data *data,
3898
                               struct iv_use *use, struct iv_cand *cand)
3899
{
3900
  bitmap depends_on;
3901
  comp_cost cost;
3902
 
3903
  /* The simple case first -- if we need to express value of the preserved
3904
     original biv, the cost is 0.  This also prevents us from counting the
3905
     cost of increment twice -- once at this use and once in the cost of
3906
     the candidate.  */
3907
  if (cand->pos == IP_ORIGINAL
3908
      && cand->incremented_at == use->stmt)
3909
    {
3910
      set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3911
      return true;
3912
    }
3913
 
3914
  cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3915
  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3916
 
3917
  return !infinite_cost_p (cost);
3918
}
3919
 
3920
/* Determines cost of basing replacement of USE on CAND in an address.  */
3921
 
3922
static bool
3923
determine_use_iv_cost_address (struct ivopts_data *data,
3924
                               struct iv_use *use, struct iv_cand *cand)
3925
{
3926
  bitmap depends_on;
3927
  bool can_autoinc;
3928
  comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3929
                                         &can_autoinc);
3930
 
3931
  if (cand->ainc_use == use)
3932
    {
3933
      if (can_autoinc)
3934
        cost.cost -= cand->cost_step;
3935
      /* If we generated the candidate solely for exploiting autoincrement
3936
         opportunities, and it turns out it can't be used, set the cost to
3937
         infinity to make sure we ignore it.  */
3938
      else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3939
        cost = infinite_cost;
3940
    }
3941
  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3942
 
3943
  return !infinite_cost_p (cost);
3944
}
3945
 
3946
/* Computes value of candidate CAND at position AT in iteration NITER, and
3947
   stores it to VAL.  */
3948
 
3949
static void
3950
cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3951
               aff_tree *val)
3952
{
3953
  aff_tree step, delta, nit;
3954
  struct iv *iv = cand->iv;
3955
  tree type = TREE_TYPE (iv->base);
3956
  tree steptype = type;
3957
  if (POINTER_TYPE_P (type))
3958
    steptype = sizetype;
3959
 
3960
  tree_to_aff_combination (iv->step, steptype, &step);
3961
  tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3962
  aff_combination_convert (&nit, steptype);
3963
  aff_combination_mult (&nit, &step, &delta);
3964
  if (stmt_after_increment (loop, cand, at))
3965
    aff_combination_add (&delta, &step);
3966
 
3967
  tree_to_aff_combination (iv->base, type, val);
3968
  aff_combination_add (val, &delta);
3969
}
3970
 
3971
/* Returns period of induction variable iv.  */
3972
 
3973
static tree
3974
iv_period (struct iv *iv)
3975
{
3976
  tree step = iv->step, period, type;
3977
  tree pow2div;
3978
 
3979
  gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3980
 
3981
  /* Period of the iv is gcd (step, type range).  Since type range is power
3982
     of two, it suffices to determine the maximum power of two that divides
3983
     step.  */
3984
  pow2div = num_ending_zeros (step);
3985
  type = unsigned_type_for (TREE_TYPE (step));
3986
 
3987
  period = build_low_bits_mask (type,
3988
                                (TYPE_PRECISION (type)
3989
                                 - tree_low_cst (pow2div, 1)));
3990
 
3991
  return period;
3992
}
3993
 
3994
/* Returns the comparison operator used when eliminating the iv USE.  */
3995
 
3996
static enum tree_code
3997
iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3998
{
3999
  struct loop *loop = data->current_loop;
4000
  basic_block ex_bb;
4001
  edge exit;
4002
 
4003
  ex_bb = gimple_bb (use->stmt);
4004
  exit = EDGE_SUCC (ex_bb, 0);
4005
  if (flow_bb_inside_loop_p (loop, exit->dest))
4006
    exit = EDGE_SUCC (ex_bb, 1);
4007
 
4008
  return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4009
}
4010
 
4011
/* Check whether it is possible to express the condition in USE by comparison
4012
   of candidate CAND.  If so, store the value compared with to BOUND.  */
4013
 
4014
static bool
4015
may_eliminate_iv (struct ivopts_data *data,
4016
                  struct iv_use *use, struct iv_cand *cand, tree *bound)
4017
{
4018
  basic_block ex_bb;
4019
  edge exit;
4020
  tree nit, period;
4021
  struct loop *loop = data->current_loop;
4022
  aff_tree bnd;
4023
 
4024
  if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4025
    return false;
4026
 
4027
  /* For now works only for exits that dominate the loop latch.
4028
     TODO: extend to other conditions inside loop body.  */
4029
  ex_bb = gimple_bb (use->stmt);
4030
  if (use->stmt != last_stmt (ex_bb)
4031
      || gimple_code (use->stmt) != GIMPLE_COND
4032
      || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4033
    return false;
4034
 
4035
  exit = EDGE_SUCC (ex_bb, 0);
4036
  if (flow_bb_inside_loop_p (loop, exit->dest))
4037
    exit = EDGE_SUCC (ex_bb, 1);
4038
  if (flow_bb_inside_loop_p (loop, exit->dest))
4039
    return false;
4040
 
4041
  nit = niter_for_exit (data, exit);
4042
  if (!nit)
4043
    return false;
4044
 
4045
  /* Determine whether we can use the variable to test the exit condition.
4046
     This is the case iff the period of the induction variable is greater
4047
     than the number of iterations for which the exit condition is true.  */
4048
  period = iv_period (cand->iv);
4049
 
4050
  /* If the number of iterations is constant, compare against it directly.  */
4051
  if (TREE_CODE (nit) == INTEGER_CST)
4052
    {
4053
      if (!tree_int_cst_lt (nit, period))
4054
        return false;
4055
    }
4056
 
4057
  /* If not, and if this is the only possible exit of the loop, see whether
4058
     we can get a conservative estimate on the number of iterations of the
4059
     entire loop and compare against that instead.  */
4060
  else if (loop_only_exit_p (loop, exit))
4061
    {
4062
      double_int period_value, max_niter;
4063
      if (!estimated_loop_iterations (loop, true, &max_niter))
4064
        return false;
4065
      period_value = tree_to_double_int (period);
4066
      if (double_int_ucmp (max_niter, period_value) >= 0)
4067
        return false;
4068
    }
4069
 
4070
  /* Otherwise, punt.  */
4071
  else
4072
    return false;
4073
 
4074
  cand_value_at (loop, cand, use->stmt, nit, &bnd);
4075
 
4076
  *bound = aff_combination_to_tree (&bnd);
4077
  /* It is unlikely that computing the number of iterations using division
4078
     would be more profitable than keeping the original induction variable.  */
4079
  if (expression_expensive_p (*bound))
4080
    return false;
4081
  return true;
4082
}
4083
 
4084
/* Determines cost of basing replacement of USE on CAND in a condition.  */
4085
 
4086
static bool
4087
determine_use_iv_cost_condition (struct ivopts_data *data,
4088
                                 struct iv_use *use, struct iv_cand *cand)
4089
{
4090
  tree bound = NULL_TREE;
4091
  struct iv *cmp_iv;
4092
  bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4093
  comp_cost elim_cost, express_cost, cost;
4094
  bool ok;
4095
  tree *control_var, *bound_cst;
4096
 
4097
  /* Only consider real candidates.  */
4098
  if (!cand->iv)
4099
    {
4100
      set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4101
      return false;
4102
    }
4103
 
4104
  /* Try iv elimination.  */
4105
  if (may_eliminate_iv (data, use, cand, &bound))
4106
    {
4107
      elim_cost = force_var_cost (data, bound, &depends_on_elim);
4108
      /* The bound is a loop invariant, so it will be only computed
4109
         once.  */
4110
      elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4111
    }
4112
  else
4113
    elim_cost = infinite_cost;
4114
 
4115
  /* Try expressing the original giv.  If it is compared with an invariant,
4116
     note that we cannot get rid of it.  */
4117
  ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4118
                              NULL, &cmp_iv);
4119
  gcc_assert (ok);
4120
 
4121
  /* When the condition is a comparison of the candidate IV against
4122
     zero, prefer this IV.
4123
 
4124
     TODO: The constant that we're substracting from the cost should
4125
     be target-dependent.  This information should be added to the
4126
     target costs for each backend.  */
4127
  if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4128
      && integer_zerop (*bound_cst)
4129
      && (operand_equal_p (*control_var, cand->var_after, 0)
4130
          || operand_equal_p (*control_var, cand->var_before, 0)))
4131
    elim_cost.cost -= 1;
4132
 
4133
  express_cost = get_computation_cost (data, use, cand, false,
4134
                                       &depends_on_express, NULL);
4135
  fd_ivopts_data = data;
4136
  walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4137
 
4138
  /* Choose the better approach, preferring the eliminated IV. */
4139
  if (compare_costs (elim_cost, express_cost) <= 0)
4140
    {
4141
      cost = elim_cost;
4142
      depends_on = depends_on_elim;
4143
      depends_on_elim = NULL;
4144
    }
4145
  else
4146
    {
4147
      cost = express_cost;
4148
      depends_on = depends_on_express;
4149
      depends_on_express = NULL;
4150
      bound = NULL_TREE;
4151
    }
4152
 
4153
  set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4154
 
4155
  if (depends_on_elim)
4156
    BITMAP_FREE (depends_on_elim);
4157
  if (depends_on_express)
4158
    BITMAP_FREE (depends_on_express);
4159
 
4160
  return !infinite_cost_p (cost);
4161
}
4162
 
4163
/* Determines cost of basing replacement of USE on CAND.  Returns false
4164
   if USE cannot be based on CAND.  */
4165
 
4166
static bool
4167
determine_use_iv_cost (struct ivopts_data *data,
4168
                       struct iv_use *use, struct iv_cand *cand)
4169
{
4170
  switch (use->type)
4171
    {
4172
    case USE_NONLINEAR_EXPR:
4173
      return determine_use_iv_cost_generic (data, use, cand);
4174
 
4175
    case USE_ADDRESS:
4176
      return determine_use_iv_cost_address (data, use, cand);
4177
 
4178
    case USE_COMPARE:
4179
      return determine_use_iv_cost_condition (data, use, cand);
4180
 
4181
    default:
4182
      gcc_unreachable ();
4183
    }
4184
}
4185
 
4186
/* Return true if get_computation_cost indicates that autoincrement is
4187
   a possibility for the pair of USE and CAND, false otherwise.  */
4188
 
4189
static bool
4190
autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4191
                           struct iv_cand *cand)
4192
{
4193
  bitmap depends_on;
4194
  bool can_autoinc;
4195
  comp_cost cost;
4196
 
4197
  if (use->type != USE_ADDRESS)
4198
    return false;
4199
 
4200
  cost = get_computation_cost (data, use, cand, true, &depends_on,
4201
                               &can_autoinc);
4202
 
4203
  BITMAP_FREE (depends_on);
4204
 
4205
  return !infinite_cost_p (cost) && can_autoinc;
4206
}
4207
 
4208
/* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4209
   use that allows autoincrement, and set their AINC_USE if possible.  */
4210
 
4211
static void
4212
set_autoinc_for_original_candidates (struct ivopts_data *data)
4213
{
4214
  unsigned i, j;
4215
 
4216
  for (i = 0; i < n_iv_cands (data); i++)
4217
    {
4218
      struct iv_cand *cand = iv_cand (data, i);
4219
      struct iv_use *closest = NULL;
4220
      if (cand->pos != IP_ORIGINAL)
4221
        continue;
4222
      for (j = 0; j < n_iv_uses (data); j++)
4223
        {
4224
          struct iv_use *use = iv_use (data, j);
4225
          unsigned uid = gimple_uid (use->stmt);
4226
          if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4227
              || uid > gimple_uid (cand->incremented_at))
4228
            continue;
4229
          if (closest == NULL || uid > gimple_uid (closest->stmt))
4230
            closest = use;
4231
        }
4232
      if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4233
        continue;
4234
      cand->ainc_use = closest;
4235
    }
4236
}
4237
 
4238
/* Finds the candidates for the induction variables.  */
4239
 
4240
static void
4241
find_iv_candidates (struct ivopts_data *data)
4242
{
4243
  /* Add commonly used ivs.  */
4244
  add_standard_iv_candidates (data);
4245
 
4246
  /* Add old induction variables.  */
4247
  add_old_ivs_candidates (data);
4248
 
4249
  /* Add induction variables derived from uses.  */
4250
  add_derived_ivs_candidates (data);
4251
 
4252
  set_autoinc_for_original_candidates (data);
4253
 
4254
  /* Record the important candidates.  */
4255
  record_important_candidates (data);
4256
}
4257
 
4258
/* Determines costs of basing the use of the iv on an iv candidate.  */
4259
 
4260
static void
4261
determine_use_iv_costs (struct ivopts_data *data)
4262
{
4263
  unsigned i, j;
4264
  struct iv_use *use;
4265
  struct iv_cand *cand;
4266
  bitmap to_clear = BITMAP_ALLOC (NULL);
4267
 
4268
  alloc_use_cost_map (data);
4269
 
4270
  for (i = 0; i < n_iv_uses (data); i++)
4271
    {
4272
      use = iv_use (data, i);
4273
 
4274
      if (data->consider_all_candidates)
4275
        {
4276
          for (j = 0; j < n_iv_cands (data); j++)
4277
            {
4278
              cand = iv_cand (data, j);
4279
              determine_use_iv_cost (data, use, cand);
4280
            }
4281
        }
4282
      else
4283
        {
4284
          bitmap_iterator bi;
4285
 
4286
          EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4287
            {
4288
              cand = iv_cand (data, j);
4289
              if (!determine_use_iv_cost (data, use, cand))
4290
                bitmap_set_bit (to_clear, j);
4291
            }
4292
 
4293
          /* Remove the candidates for that the cost is infinite from
4294
             the list of related candidates.  */
4295
          bitmap_and_compl_into (use->related_cands, to_clear);
4296
          bitmap_clear (to_clear);
4297
        }
4298
    }
4299
 
4300
  BITMAP_FREE (to_clear);
4301
 
4302
  if (dump_file && (dump_flags & TDF_DETAILS))
4303
    {
4304
      fprintf (dump_file, "Use-candidate costs:\n");
4305
 
4306
      for (i = 0; i < n_iv_uses (data); i++)
4307
        {
4308
          use = iv_use (data, i);
4309
 
4310
          fprintf (dump_file, "Use %d:\n", i);
4311
          fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4312
          for (j = 0; j < use->n_map_members; j++)
4313
            {
4314
              if (!use->cost_map[j].cand
4315
                  || infinite_cost_p (use->cost_map[j].cost))
4316
                continue;
4317
 
4318
              fprintf (dump_file, "  %d\t%d\t%d\t",
4319
                       use->cost_map[j].cand->id,
4320
                       use->cost_map[j].cost.cost,
4321
                       use->cost_map[j].cost.complexity);
4322
              if (use->cost_map[j].depends_on)
4323
                bitmap_print (dump_file,
4324
                              use->cost_map[j].depends_on, "","");
4325
              fprintf (dump_file, "\n");
4326
            }
4327
 
4328
          fprintf (dump_file, "\n");
4329
        }
4330
      fprintf (dump_file, "\n");
4331
    }
4332
}
4333
 
4334
/* Determines cost of the candidate CAND.  */
4335
 
4336
static void
4337
determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4338
{
4339
  comp_cost cost_base;
4340
  unsigned cost, cost_step;
4341
  tree base;
4342
 
4343
  if (!cand->iv)
4344
    {
4345
      cand->cost = 0;
4346
      return;
4347
    }
4348
 
4349
  /* There are two costs associated with the candidate -- its increment
4350
     and its initialization.  The second is almost negligible for any loop
4351
     that rolls enough, so we take it just very little into account.  */
4352
 
4353
  base = cand->iv->base;
4354
  cost_base = force_var_cost (data, base, NULL);
4355
  cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4356
 
4357
  cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4358
 
4359
  /* Prefer the original ivs unless we may gain something by replacing it.
4360
     The reason is to make debugging simpler; so this is not relevant for
4361
     artificial ivs created by other optimization passes.  */
4362
  if (cand->pos != IP_ORIGINAL
4363
      || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4364
    cost++;
4365
 
4366
  /* Prefer not to insert statements into latch unless there are some
4367
     already (so that we do not create unnecessary jumps).  */
4368
  if (cand->pos == IP_END
4369
      && empty_block_p (ip_end_pos (data->current_loop)))
4370
    cost++;
4371
 
4372
  cand->cost = cost;
4373
  cand->cost_step = cost_step;
4374
}
4375
 
4376
/* Determines costs of computation of the candidates.  */
4377
 
4378
static void
4379
determine_iv_costs (struct ivopts_data *data)
4380
{
4381
  unsigned i;
4382
 
4383
  if (dump_file && (dump_flags & TDF_DETAILS))
4384
    {
4385
      fprintf (dump_file, "Candidate costs:\n");
4386
      fprintf (dump_file, "  cand\tcost\n");
4387
    }
4388
 
4389
  for (i = 0; i < n_iv_cands (data); i++)
4390
    {
4391
      struct iv_cand *cand = iv_cand (data, i);
4392
 
4393
      determine_iv_cost (data, cand);
4394
 
4395
      if (dump_file && (dump_flags & TDF_DETAILS))
4396
        fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4397
    }
4398
 
4399
  if (dump_file && (dump_flags & TDF_DETAILS))
4400
    fprintf (dump_file, "\n");
4401
}
4402
 
4403
/* Calculates cost for having SIZE induction variables.  */
4404
 
4405
static unsigned
4406
ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4407
{
4408
  /* We add size to the cost, so that we prefer eliminating ivs
4409
     if possible.  */
4410
  return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4411
}
4412
 
4413
/* For each size of the induction variable set determine the penalty.  */
4414
 
4415
static void
4416
determine_set_costs (struct ivopts_data *data)
4417
{
4418
  unsigned j, n;
4419
  gimple phi;
4420
  gimple_stmt_iterator psi;
4421
  tree op;
4422
  struct loop *loop = data->current_loop;
4423
  bitmap_iterator bi;
4424
 
4425
  /* We use the following model (definitely improvable, especially the
4426
     cost function -- TODO):
4427
 
4428
     We estimate the number of registers available (using MD data), name it A.
4429
 
4430
     We estimate the number of registers used by the loop, name it U.  This
4431
     number is obtained as the number of loop phi nodes (not counting virtual
4432
     registers and bivs) + the number of variables from outside of the loop.
4433
 
4434
     We set a reserve R (free regs that are used for temporary computations,
4435
     etc.).  For now the reserve is a constant 3.
4436
 
4437
     Let I be the number of induction variables.
4438
 
4439
     -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4440
        make a lot of ivs without a reason).
4441
     -- if A - R < U + I <= A, the cost is I * PRES_COST
4442
     -- if U + I > A, the cost is I * PRES_COST and
4443
        number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4444
 
4445
  if (dump_file && (dump_flags & TDF_DETAILS))
4446
    {
4447
      fprintf (dump_file, "Global costs:\n");
4448
      fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4449
      fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4450
      fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4451
    }
4452
 
4453
  n = 0;
4454
  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4455
    {
4456
      phi = gsi_stmt (psi);
4457
      op = PHI_RESULT (phi);
4458
 
4459
      if (!is_gimple_reg (op))
4460
        continue;
4461
 
4462
      if (get_iv (data, op))
4463
        continue;
4464
 
4465
      n++;
4466
    }
4467
 
4468
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4469
    {
4470
      struct version_info *info = ver_info (data, j);
4471
 
4472
      if (info->inv_id && info->has_nonlin_use)
4473
        n++;
4474
    }
4475
 
4476
  data->regs_used = n;
4477
  if (dump_file && (dump_flags & TDF_DETAILS))
4478
    fprintf (dump_file, "  regs_used %d\n", n);
4479
 
4480
  if (dump_file && (dump_flags & TDF_DETAILS))
4481
    {
4482
      fprintf (dump_file, "  cost for size:\n");
4483
      fprintf (dump_file, "  ivs\tcost\n");
4484
      for (j = 0; j <= 2 * target_avail_regs; j++)
4485
        fprintf (dump_file, "  %d\t%d\n", j,
4486
                 ivopts_global_cost_for_size (data, j));
4487
      fprintf (dump_file, "\n");
4488
    }
4489
}
4490
 
4491
/* Returns true if A is a cheaper cost pair than B.  */
4492
 
4493
static bool
4494
cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4495
{
4496
  int cmp;
4497
 
4498
  if (!a)
4499
    return false;
4500
 
4501
  if (!b)
4502
    return true;
4503
 
4504
  cmp = compare_costs (a->cost, b->cost);
4505
  if (cmp < 0)
4506
    return true;
4507
 
4508
  if (cmp > 0)
4509
    return false;
4510
 
4511
  /* In case the costs are the same, prefer the cheaper candidate.  */
4512
  if (a->cand->cost < b->cand->cost)
4513
    return true;
4514
 
4515
  return false;
4516
}
4517
 
4518
/* Computes the cost field of IVS structure.  */
4519
 
4520
static void
4521
iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4522
{
4523
  comp_cost cost = ivs->cand_use_cost;
4524
  cost.cost += ivs->cand_cost;
4525
  cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4526
 
4527
  ivs->cost = cost;
4528
}
4529
 
4530
/* Remove invariants in set INVS to set IVS.  */
4531
 
4532
static void
4533
iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4534
{
4535
  bitmap_iterator bi;
4536
  unsigned iid;
4537
 
4538
  if (!invs)
4539
    return;
4540
 
4541
  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4542
    {
4543
      ivs->n_invariant_uses[iid]--;
4544
      if (ivs->n_invariant_uses[iid] == 0)
4545
        ivs->n_regs--;
4546
    }
4547
}
4548
 
4549
/* Set USE not to be expressed by any candidate in IVS.  */
4550
 
4551
static void
4552
iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4553
                 struct iv_use *use)
4554
{
4555
  unsigned uid = use->id, cid;
4556
  struct cost_pair *cp;
4557
 
4558
  cp = ivs->cand_for_use[uid];
4559
  if (!cp)
4560
    return;
4561
  cid = cp->cand->id;
4562
 
4563
  ivs->bad_uses++;
4564
  ivs->cand_for_use[uid] = NULL;
4565
  ivs->n_cand_uses[cid]--;
4566
 
4567
  if (ivs->n_cand_uses[cid] == 0)
4568
    {
4569
      bitmap_clear_bit (ivs->cands, cid);
4570
      /* Do not count the pseudocandidates.  */
4571
      if (cp->cand->iv)
4572
        ivs->n_regs--;
4573
      ivs->n_cands--;
4574
      ivs->cand_cost -= cp->cand->cost;
4575
 
4576
      iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4577
    }
4578
 
4579
  ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4580
 
4581
  iv_ca_set_remove_invariants (ivs, cp->depends_on);
4582
  iv_ca_recount_cost (data, ivs);
4583
}
4584
 
4585
/* Add invariants in set INVS to set IVS.  */
4586
 
4587
static void
4588
iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4589
{
4590
  bitmap_iterator bi;
4591
  unsigned iid;
4592
 
4593
  if (!invs)
4594
    return;
4595
 
4596
  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4597
    {
4598
      ivs->n_invariant_uses[iid]++;
4599
      if (ivs->n_invariant_uses[iid] == 1)
4600
        ivs->n_regs++;
4601
    }
4602
}
4603
 
4604
/* Set cost pair for USE in set IVS to CP.  */
4605
 
4606
static void
4607
iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4608
              struct iv_use *use, struct cost_pair *cp)
4609
{
4610
  unsigned uid = use->id, cid;
4611
 
4612
  if (ivs->cand_for_use[uid] == cp)
4613
    return;
4614
 
4615
  if (ivs->cand_for_use[uid])
4616
    iv_ca_set_no_cp (data, ivs, use);
4617
 
4618
  if (cp)
4619
    {
4620
      cid = cp->cand->id;
4621
 
4622
      ivs->bad_uses--;
4623
      ivs->cand_for_use[uid] = cp;
4624
      ivs->n_cand_uses[cid]++;
4625
      if (ivs->n_cand_uses[cid] == 1)
4626
        {
4627
          bitmap_set_bit (ivs->cands, cid);
4628
          /* Do not count the pseudocandidates.  */
4629
          if (cp->cand->iv)
4630
            ivs->n_regs++;
4631
          ivs->n_cands++;
4632
          ivs->cand_cost += cp->cand->cost;
4633
 
4634
          iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4635
        }
4636
 
4637
      ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4638
      iv_ca_set_add_invariants (ivs, cp->depends_on);
4639
      iv_ca_recount_cost (data, ivs);
4640
    }
4641
}
4642
 
4643
/* Extend set IVS by expressing USE by some of the candidates in it
4644
   if possible.  */
4645
 
4646
static void
4647
iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4648
               struct iv_use *use)
4649
{
4650
  struct cost_pair *best_cp = NULL, *cp;
4651
  bitmap_iterator bi;
4652
  unsigned i;
4653
 
4654
  gcc_assert (ivs->upto >= use->id);
4655
 
4656
  if (ivs->upto == use->id)
4657
    {
4658
      ivs->upto++;
4659
      ivs->bad_uses++;
4660
    }
4661
 
4662
  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4663
    {
4664
      cp = get_use_iv_cost (data, use, iv_cand (data, i));
4665
 
4666
      if (cheaper_cost_pair (cp, best_cp))
4667
        best_cp = cp;
4668
    }
4669
 
4670
  iv_ca_set_cp (data, ivs, use, best_cp);
4671
}
4672
 
4673
/* Get cost for assignment IVS.  */
4674
 
4675
static comp_cost
4676
iv_ca_cost (struct iv_ca *ivs)
4677
{
4678
  /* This was a conditional expression but it triggered a bug in
4679
     Sun C 5.5.  */
4680
  if (ivs->bad_uses)
4681
    return infinite_cost;
4682
  else
4683
    return ivs->cost;
4684
}
4685
 
4686
/* Returns true if all dependences of CP are among invariants in IVS.  */
4687
 
4688
static bool
4689
iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4690
{
4691
  unsigned i;
4692
  bitmap_iterator bi;
4693
 
4694
  if (!cp->depends_on)
4695
    return true;
4696
 
4697
  EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4698
    {
4699
      if (ivs->n_invariant_uses[i] == 0)
4700
        return false;
4701
    }
4702
 
4703
  return true;
4704
}
4705
 
4706
/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4707
   it before NEXT_CHANGE.  */
4708
 
4709
static struct iv_ca_delta *
4710
iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4711
                 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4712
{
4713
  struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4714
 
4715
  change->use = use;
4716
  change->old_cp = old_cp;
4717
  change->new_cp = new_cp;
4718
  change->next_change = next_change;
4719
 
4720
  return change;
4721
}
4722
 
4723
/* Joins two lists of changes L1 and L2.  Destructive -- old lists
4724
   are rewritten.  */
4725
 
4726
static struct iv_ca_delta *
4727
iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4728
{
4729
  struct iv_ca_delta *last;
4730
 
4731
  if (!l2)
4732
    return l1;
4733
 
4734
  if (!l1)
4735
    return l2;
4736
 
4737
  for (last = l1; last->next_change; last = last->next_change)
4738
    continue;
4739
  last->next_change = l2;
4740
 
4741
  return l1;
4742
}
4743
 
4744
/* Returns candidate by that USE is expressed in IVS.  */
4745
 
4746
static struct cost_pair *
4747
iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4748
{
4749
  return ivs->cand_for_use[use->id];
4750
}
4751
 
4752
/* Reverse the list of changes DELTA, forming the inverse to it.  */
4753
 
4754
static struct iv_ca_delta *
4755
iv_ca_delta_reverse (struct iv_ca_delta *delta)
4756
{
4757
  struct iv_ca_delta *act, *next, *prev = NULL;
4758
  struct cost_pair *tmp;
4759
 
4760
  for (act = delta; act; act = next)
4761
    {
4762
      next = act->next_change;
4763
      act->next_change = prev;
4764
      prev = act;
4765
 
4766
      tmp = act->old_cp;
4767
      act->old_cp = act->new_cp;
4768
      act->new_cp = tmp;
4769
    }
4770
 
4771
  return prev;
4772
}
4773
 
4774
/* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4775
   reverted instead.  */
4776
 
4777
static void
4778
iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4779
                    struct iv_ca_delta *delta, bool forward)
4780
{
4781
  struct cost_pair *from, *to;
4782
  struct iv_ca_delta *act;
4783
 
4784
  if (!forward)
4785
    delta = iv_ca_delta_reverse (delta);
4786
 
4787
  for (act = delta; act; act = act->next_change)
4788
    {
4789
      from = act->old_cp;
4790
      to = act->new_cp;
4791
      gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4792
      iv_ca_set_cp (data, ivs, act->use, to);
4793
    }
4794
 
4795
  if (!forward)
4796
    iv_ca_delta_reverse (delta);
4797
}
4798
 
4799
/* Returns true if CAND is used in IVS.  */
4800
 
4801
static bool
4802
iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4803
{
4804
  return ivs->n_cand_uses[cand->id] > 0;
4805
}
4806
 
4807
/* Returns number of induction variable candidates in the set IVS.  */
4808
 
4809
static unsigned
4810
iv_ca_n_cands (struct iv_ca *ivs)
4811
{
4812
  return ivs->n_cands;
4813
}
4814
 
4815
/* Free the list of changes DELTA.  */
4816
 
4817
static void
4818
iv_ca_delta_free (struct iv_ca_delta **delta)
4819
{
4820
  struct iv_ca_delta *act, *next;
4821
 
4822
  for (act = *delta; act; act = next)
4823
    {
4824
      next = act->next_change;
4825
      free (act);
4826
    }
4827
 
4828
  *delta = NULL;
4829
}
4830
 
4831
/* Allocates new iv candidates assignment.  */
4832
 
4833
static struct iv_ca *
4834
iv_ca_new (struct ivopts_data *data)
4835
{
4836
  struct iv_ca *nw = XNEW (struct iv_ca);
4837
 
4838
  nw->upto = 0;
4839
  nw->bad_uses = 0;
4840
  nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4841
  nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4842
  nw->cands = BITMAP_ALLOC (NULL);
4843
  nw->n_cands = 0;
4844
  nw->n_regs = 0;
4845
  nw->cand_use_cost = zero_cost;
4846
  nw->cand_cost = 0;
4847
  nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4848
  nw->cost = zero_cost;
4849
 
4850
  return nw;
4851
}
4852
 
4853
/* Free memory occupied by the set IVS.  */
4854
 
4855
static void
4856
iv_ca_free (struct iv_ca **ivs)
4857
{
4858
  free ((*ivs)->cand_for_use);
4859
  free ((*ivs)->n_cand_uses);
4860
  BITMAP_FREE ((*ivs)->cands);
4861
  free ((*ivs)->n_invariant_uses);
4862
  free (*ivs);
4863
  *ivs = NULL;
4864
}
4865
 
4866
/* Dumps IVS to FILE.  */
4867
 
4868
static void
4869
iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4870
{
4871
  const char *pref = "  invariants ";
4872
  unsigned i;
4873
  comp_cost cost = iv_ca_cost (ivs);
4874
 
4875
  fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4876
  bitmap_print (file, ivs->cands, "  candidates ","\n");
4877
 
4878
  for (i = 1; i <= data->max_inv_id; i++)
4879
    if (ivs->n_invariant_uses[i])
4880
      {
4881
        fprintf (file, "%s%d", pref, i);
4882
        pref = ", ";
4883
      }
4884
  fprintf (file, "\n");
4885
}
4886
 
4887
/* Try changing candidate in IVS to CAND for each use.  Return cost of the
4888
   new set, and store differences in DELTA.  Number of induction variables
4889
   in the new set is stored to N_IVS.  */
4890
 
4891
static comp_cost
4892
iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4893
              struct iv_cand *cand, struct iv_ca_delta **delta,
4894
              unsigned *n_ivs)
4895
{
4896
  unsigned i;
4897
  comp_cost cost;
4898
  struct iv_use *use;
4899
  struct cost_pair *old_cp, *new_cp;
4900
 
4901
  *delta = NULL;
4902
  for (i = 0; i < ivs->upto; i++)
4903
    {
4904
      use = iv_use (data, i);
4905
      old_cp = iv_ca_cand_for_use (ivs, use);
4906
 
4907
      if (old_cp
4908
          && old_cp->cand == cand)
4909
        continue;
4910
 
4911
      new_cp = get_use_iv_cost (data, use, cand);
4912
      if (!new_cp)
4913
        continue;
4914
 
4915
      if (!iv_ca_has_deps (ivs, new_cp))
4916
        continue;
4917
 
4918
      if (!cheaper_cost_pair (new_cp, old_cp))
4919
        continue;
4920
 
4921
      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4922
    }
4923
 
4924
  iv_ca_delta_commit (data, ivs, *delta, true);
4925
  cost = iv_ca_cost (ivs);
4926
  if (n_ivs)
4927
    *n_ivs = iv_ca_n_cands (ivs);
4928
  iv_ca_delta_commit (data, ivs, *delta, false);
4929
 
4930
  return cost;
4931
}
4932
 
4933
/* Try narrowing set IVS by removing CAND.  Return the cost of
4934
   the new set and store the differences in DELTA.  */
4935
 
4936
static comp_cost
4937
iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4938
              struct iv_cand *cand, struct iv_ca_delta **delta)
4939
{
4940
  unsigned i, ci;
4941
  struct iv_use *use;
4942
  struct cost_pair *old_cp, *new_cp, *cp;
4943
  bitmap_iterator bi;
4944
  struct iv_cand *cnd;
4945
  comp_cost cost;
4946
 
4947
  *delta = NULL;
4948
  for (i = 0; i < n_iv_uses (data); i++)
4949
    {
4950
      use = iv_use (data, i);
4951
 
4952
      old_cp = iv_ca_cand_for_use (ivs, use);
4953
      if (old_cp->cand != cand)
4954
        continue;
4955
 
4956
      new_cp = NULL;
4957
 
4958
      if (data->consider_all_candidates)
4959
        {
4960
          EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4961
            {
4962
              if (ci == cand->id)
4963
                continue;
4964
 
4965
              cnd = iv_cand (data, ci);
4966
 
4967
              cp = get_use_iv_cost (data, use, cnd);
4968
              if (!cp)
4969
                continue;
4970
              if (!iv_ca_has_deps (ivs, cp))
4971
                continue;
4972
 
4973
              if (!cheaper_cost_pair (cp, new_cp))
4974
                continue;
4975
 
4976
              new_cp = cp;
4977
            }
4978
        }
4979
      else
4980
        {
4981
          EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4982
            {
4983
              if (ci == cand->id)
4984
                continue;
4985
 
4986
              cnd = iv_cand (data, ci);
4987
 
4988
              cp = get_use_iv_cost (data, use, cnd);
4989
              if (!cp)
4990
                continue;
4991
              if (!iv_ca_has_deps (ivs, cp))
4992
                continue;
4993
 
4994
              if (!cheaper_cost_pair (cp, new_cp))
4995
                continue;
4996
 
4997
              new_cp = cp;
4998
            }
4999
        }
5000
 
5001
      if (!new_cp)
5002
        {
5003
          iv_ca_delta_free (delta);
5004
          return infinite_cost;
5005
        }
5006
 
5007
      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5008
    }
5009
 
5010
  iv_ca_delta_commit (data, ivs, *delta, true);
5011
  cost = iv_ca_cost (ivs);
5012
  iv_ca_delta_commit (data, ivs, *delta, false);
5013
 
5014
  return cost;
5015
}
5016
 
5017
/* Try optimizing the set of candidates IVS by removing candidates different
5018
   from to EXCEPT_CAND from it.  Return cost of the new set, and store
5019
   differences in DELTA.  */
5020
 
5021
static comp_cost
5022
iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5023
             struct iv_cand *except_cand, struct iv_ca_delta **delta)
5024
{
5025
  bitmap_iterator bi;
5026
  struct iv_ca_delta *act_delta, *best_delta;
5027
  unsigned i;
5028
  comp_cost best_cost, acost;
5029
  struct iv_cand *cand;
5030
 
5031
  best_delta = NULL;
5032
  best_cost = iv_ca_cost (ivs);
5033
 
5034
  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5035
    {
5036
      cand = iv_cand (data, i);
5037
 
5038
      if (cand == except_cand)
5039
        continue;
5040
 
5041
      acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5042
 
5043
      if (compare_costs (acost, best_cost) < 0)
5044
        {
5045
          best_cost = acost;
5046
          iv_ca_delta_free (&best_delta);
5047
          best_delta = act_delta;
5048
        }
5049
      else
5050
        iv_ca_delta_free (&act_delta);
5051
    }
5052
 
5053
  if (!best_delta)
5054
    {
5055
      *delta = NULL;
5056
      return best_cost;
5057
    }
5058
 
5059
  /* Recurse to possibly remove other unnecessary ivs.  */
5060
  iv_ca_delta_commit (data, ivs, best_delta, true);
5061
  best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5062
  iv_ca_delta_commit (data, ivs, best_delta, false);
5063
  *delta = iv_ca_delta_join (best_delta, *delta);
5064
  return best_cost;
5065
}
5066
 
5067
/* Tries to extend the sets IVS in the best possible way in order
5068
   to express the USE.  */
5069
 
5070
static bool
5071
try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5072
                  struct iv_use *use)
5073
{
5074
  comp_cost best_cost, act_cost;
5075
  unsigned i;
5076
  bitmap_iterator bi;
5077
  struct iv_cand *cand;
5078
  struct iv_ca_delta *best_delta = NULL, *act_delta;
5079
  struct cost_pair *cp;
5080
 
5081
  iv_ca_add_use (data, ivs, use);
5082
  best_cost = iv_ca_cost (ivs);
5083
 
5084
  cp = iv_ca_cand_for_use (ivs, use);
5085
  if (cp)
5086
    {
5087
      best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5088
      iv_ca_set_no_cp (data, ivs, use);
5089
    }
5090
 
5091
  /* First try important candidates not based on any memory object.  Only if
5092
     this fails, try the specific ones.  Rationale -- in loops with many
5093
     variables the best choice often is to use just one generic biv.  If we
5094
     added here many ivs specific to the uses, the optimization algorithm later
5095
     would be likely to get stuck in a local minimum, thus causing us to create
5096
     too many ivs.  The approach from few ivs to more seems more likely to be
5097
     successful -- starting from few ivs, replacing an expensive use by a
5098
     specific iv should always be a win.  */
5099
  EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5100
    {
5101
      cand = iv_cand (data, i);
5102
 
5103
      if (cand->iv->base_object != NULL_TREE)
5104
        continue;
5105
 
5106
      if (iv_ca_cand_used_p (ivs, cand))
5107
        continue;
5108
 
5109
      cp = get_use_iv_cost (data, use, cand);
5110
      if (!cp)
5111
        continue;
5112
 
5113
      iv_ca_set_cp (data, ivs, use, cp);
5114
      act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5115
      iv_ca_set_no_cp (data, ivs, use);
5116
      act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5117
 
5118
      if (compare_costs (act_cost, best_cost) < 0)
5119
        {
5120
          best_cost = act_cost;
5121
 
5122
          iv_ca_delta_free (&best_delta);
5123
          best_delta = act_delta;
5124
        }
5125
      else
5126
        iv_ca_delta_free (&act_delta);
5127
    }
5128
 
5129
  if (infinite_cost_p (best_cost))
5130
    {
5131
      for (i = 0; i < use->n_map_members; i++)
5132
        {
5133
          cp = use->cost_map + i;
5134
          cand = cp->cand;
5135
          if (!cand)
5136
            continue;
5137
 
5138
          /* Already tried this.  */
5139
          if (cand->important && cand->iv->base_object == NULL_TREE)
5140
            continue;
5141
 
5142
          if (iv_ca_cand_used_p (ivs, cand))
5143
            continue;
5144
 
5145
          act_delta = NULL;
5146
          iv_ca_set_cp (data, ivs, use, cp);
5147
          act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5148
          iv_ca_set_no_cp (data, ivs, use);
5149
          act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5150
                                       cp, act_delta);
5151
 
5152
          if (compare_costs (act_cost, best_cost) < 0)
5153
            {
5154
              best_cost = act_cost;
5155
 
5156
              if (best_delta)
5157
                iv_ca_delta_free (&best_delta);
5158
              best_delta = act_delta;
5159
            }
5160
          else
5161
            iv_ca_delta_free (&act_delta);
5162
        }
5163
    }
5164
 
5165
  iv_ca_delta_commit (data, ivs, best_delta, true);
5166
  iv_ca_delta_free (&best_delta);
5167
 
5168
  return !infinite_cost_p (best_cost);
5169
}
5170
 
5171
/* Finds an initial assignment of candidates to uses.  */
5172
 
5173
static struct iv_ca *
5174
get_initial_solution (struct ivopts_data *data)
5175
{
5176
  struct iv_ca *ivs = iv_ca_new (data);
5177
  unsigned i;
5178
 
5179
  for (i = 0; i < n_iv_uses (data); i++)
5180
    if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5181
      {
5182
        iv_ca_free (&ivs);
5183
        return NULL;
5184
      }
5185
 
5186
  return ivs;
5187
}
5188
 
5189
/* Tries to improve set of induction variables IVS.  */
5190
 
5191
static bool
5192
try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5193
{
5194
  unsigned i, n_ivs;
5195
  comp_cost acost, best_cost = iv_ca_cost (ivs);
5196
  struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5197
  struct iv_cand *cand;
5198
 
5199
  /* Try extending the set of induction variables by one.  */
5200
  for (i = 0; i < n_iv_cands (data); i++)
5201
    {
5202
      cand = iv_cand (data, i);
5203
 
5204
      if (iv_ca_cand_used_p (ivs, cand))
5205
        continue;
5206
 
5207
      acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5208
      if (!act_delta)
5209
        continue;
5210
 
5211
      /* If we successfully added the candidate and the set is small enough,
5212
         try optimizing it by removing other candidates.  */
5213
      if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5214
        {
5215
          iv_ca_delta_commit (data, ivs, act_delta, true);
5216
          acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5217
          iv_ca_delta_commit (data, ivs, act_delta, false);
5218
          act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5219
        }
5220
 
5221
      if (compare_costs (acost, best_cost) < 0)
5222
        {
5223
          best_cost = acost;
5224
          iv_ca_delta_free (&best_delta);
5225
          best_delta = act_delta;
5226
        }
5227
      else
5228
        iv_ca_delta_free (&act_delta);
5229
    }
5230
 
5231
  if (!best_delta)
5232
    {
5233
      /* Try removing the candidates from the set instead.  */
5234
      best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5235
 
5236
      /* Nothing more we can do.  */
5237
      if (!best_delta)
5238
        return false;
5239
    }
5240
 
5241
  iv_ca_delta_commit (data, ivs, best_delta, true);
5242
  gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5243
  iv_ca_delta_free (&best_delta);
5244
  return true;
5245
}
5246
 
5247
/* Attempts to find the optimal set of induction variables.  We do simple
5248
   greedy heuristic -- we try to replace at most one candidate in the selected
5249
   solution and remove the unused ivs while this improves the cost.  */
5250
 
5251
static struct iv_ca *
5252
find_optimal_iv_set (struct ivopts_data *data)
5253
{
5254
  unsigned i;
5255
  struct iv_ca *set;
5256
  struct iv_use *use;
5257
 
5258
  /* Get the initial solution.  */
5259
  set = get_initial_solution (data);
5260
  if (!set)
5261
    {
5262
      if (dump_file && (dump_flags & TDF_DETAILS))
5263
        fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5264
      return NULL;
5265
    }
5266
 
5267
  if (dump_file && (dump_flags & TDF_DETAILS))
5268
    {
5269
      fprintf (dump_file, "Initial set of candidates:\n");
5270
      iv_ca_dump (data, dump_file, set);
5271
    }
5272
 
5273
  while (try_improve_iv_set (data, set))
5274
    {
5275
      if (dump_file && (dump_flags & TDF_DETAILS))
5276
        {
5277
          fprintf (dump_file, "Improved to:\n");
5278
          iv_ca_dump (data, dump_file, set);
5279
        }
5280
    }
5281
 
5282
  if (dump_file && (dump_flags & TDF_DETAILS))
5283
    {
5284
      comp_cost cost = iv_ca_cost (set);
5285
      fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5286
    }
5287
 
5288
  for (i = 0; i < n_iv_uses (data); i++)
5289
    {
5290
      use = iv_use (data, i);
5291
      use->selected = iv_ca_cand_for_use (set, use)->cand;
5292
    }
5293
 
5294
  return set;
5295
}
5296
 
5297
/* Creates a new induction variable corresponding to CAND.  */
5298
 
5299
static void
5300
create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5301
{
5302
  gimple_stmt_iterator incr_pos;
5303
  tree base;
5304
  bool after = false;
5305
 
5306
  if (!cand->iv)
5307
    return;
5308
 
5309
  switch (cand->pos)
5310
    {
5311
    case IP_NORMAL:
5312
      incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5313
      break;
5314
 
5315
    case IP_END:
5316
      incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5317
      after = true;
5318
      break;
5319
 
5320
    case IP_AFTER_USE:
5321
      after = true;
5322
      /* fall through */
5323
    case IP_BEFORE_USE:
5324
      incr_pos = gsi_for_stmt (cand->incremented_at);
5325
      break;
5326
 
5327
    case IP_ORIGINAL:
5328
      /* Mark that the iv is preserved.  */
5329
      name_info (data, cand->var_before)->preserve_biv = true;
5330
      name_info (data, cand->var_after)->preserve_biv = true;
5331
 
5332
      /* Rewrite the increment so that it uses var_before directly.  */
5333
      find_interesting_uses_op (data, cand->var_after)->selected = cand;
5334
 
5335
      return;
5336
    }
5337
 
5338
  gimple_add_tmp_var (cand->var_before);
5339
  add_referenced_var (cand->var_before);
5340
 
5341
  base = unshare_expr (cand->iv->base);
5342
 
5343
  create_iv (base, unshare_expr (cand->iv->step),
5344
             cand->var_before, data->current_loop,
5345
             &incr_pos, after, &cand->var_before, &cand->var_after);
5346
}
5347
 
5348
/* Creates new induction variables described in SET.  */
5349
 
5350
static void
5351
create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5352
{
5353
  unsigned i;
5354
  struct iv_cand *cand;
5355
  bitmap_iterator bi;
5356
 
5357
  EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5358
    {
5359
      cand = iv_cand (data, i);
5360
      create_new_iv (data, cand);
5361
    }
5362
}
5363
 
5364
 
5365
/* Rewrites USE (definition of iv used in a nonlinear expression)
5366
   using candidate CAND.  */
5367
 
5368
static void
5369
rewrite_use_nonlinear_expr (struct ivopts_data *data,
5370
                            struct iv_use *use, struct iv_cand *cand)
5371
{
5372
  tree comp;
5373
  tree op, tgt;
5374
  gimple ass;
5375
  gimple_stmt_iterator bsi;
5376
 
5377
  /* An important special case -- if we are asked to express value of
5378
     the original iv by itself, just exit; there is no need to
5379
     introduce a new computation (that might also need casting the
5380
     variable to unsigned and back).  */
5381
  if (cand->pos == IP_ORIGINAL
5382
      && cand->incremented_at == use->stmt)
5383
    {
5384
      tree step, ctype, utype;
5385
      enum tree_code incr_code = PLUS_EXPR, old_code;
5386
 
5387
      gcc_assert (is_gimple_assign (use->stmt));
5388
      gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5389
 
5390
      step = cand->iv->step;
5391
      ctype = TREE_TYPE (step);
5392
      utype = TREE_TYPE (cand->var_after);
5393
      if (TREE_CODE (step) == NEGATE_EXPR)
5394
        {
5395
          incr_code = MINUS_EXPR;
5396
          step = TREE_OPERAND (step, 0);
5397
        }
5398
 
5399
      /* Check whether we may leave the computation unchanged.
5400
         This is the case only if it does not rely on other
5401
         computations in the loop -- otherwise, the computation
5402
         we rely upon may be removed in remove_unused_ivs,
5403
         thus leading to ICE.  */
5404
      old_code = gimple_assign_rhs_code (use->stmt);
5405
      if (old_code == PLUS_EXPR
5406
          || old_code == MINUS_EXPR
5407
          || old_code == POINTER_PLUS_EXPR)
5408
        {
5409
          if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5410
            op = gimple_assign_rhs2 (use->stmt);
5411
          else if (old_code != MINUS_EXPR
5412
                   && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5413
            op = gimple_assign_rhs1 (use->stmt);
5414
          else
5415
            op = NULL_TREE;
5416
        }
5417
      else
5418
        op = NULL_TREE;
5419
 
5420
      if (op
5421
          && (TREE_CODE (op) == INTEGER_CST
5422
              || operand_equal_p (op, step, 0)))
5423
        return;
5424
 
5425
      /* Otherwise, add the necessary computations to express
5426
         the iv.  */
5427
      op = fold_convert (ctype, cand->var_before);
5428
      comp = fold_convert (utype,
5429
                           build2 (incr_code, ctype, op,
5430
                                   unshare_expr (step)));
5431
    }
5432
  else
5433
    {
5434
      comp = get_computation (data->current_loop, use, cand);
5435
      gcc_assert (comp != NULL_TREE);
5436
    }
5437
 
5438
  switch (gimple_code (use->stmt))
5439
    {
5440
    case GIMPLE_PHI:
5441
      tgt = PHI_RESULT (use->stmt);
5442
 
5443
      /* If we should keep the biv, do not replace it.  */
5444
      if (name_info (data, tgt)->preserve_biv)
5445
        return;
5446
 
5447
      bsi = gsi_after_labels (gimple_bb (use->stmt));
5448
      break;
5449
 
5450
    case GIMPLE_ASSIGN:
5451
      tgt = gimple_assign_lhs (use->stmt);
5452
      bsi = gsi_for_stmt (use->stmt);
5453
      break;
5454
 
5455
    default:
5456
      gcc_unreachable ();
5457
    }
5458
 
5459
  op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5460
                                 true, GSI_SAME_STMT);
5461
 
5462
  if (gimple_code (use->stmt) == GIMPLE_PHI)
5463
    {
5464
      ass = gimple_build_assign (tgt, op);
5465
      gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5466
 
5467
      bsi = gsi_for_stmt (use->stmt);
5468
      remove_phi_node (&bsi, false);
5469
    }
5470
  else
5471
    {
5472
      gimple_assign_set_rhs_from_tree (&bsi, op);
5473
      use->stmt = gsi_stmt (bsi);
5474
    }
5475
}
5476
 
5477
/* Replaces ssa name in index IDX by its basic variable.  Callback for
5478
   for_each_index.  */
5479
 
5480
static bool
5481
idx_remove_ssa_names (tree base, tree *idx,
5482
                      void *data ATTRIBUTE_UNUSED)
5483
{
5484
  tree *op;
5485
 
5486
  if (TREE_CODE (*idx) == SSA_NAME)
5487
    *idx = SSA_NAME_VAR (*idx);
5488
 
5489
  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5490
    {
5491
      op = &TREE_OPERAND (base, 2);
5492
      if (*op
5493
          && TREE_CODE (*op) == SSA_NAME)
5494
        *op = SSA_NAME_VAR (*op);
5495
      op = &TREE_OPERAND (base, 3);
5496
      if (*op
5497
          && TREE_CODE (*op) == SSA_NAME)
5498
        *op = SSA_NAME_VAR (*op);
5499
    }
5500
 
5501
  return true;
5502
}
5503
 
5504
/* Unshares REF and replaces ssa names inside it by their basic variables.  */
5505
 
5506
static tree
5507
unshare_and_remove_ssa_names (tree ref)
5508
{
5509
  ref = unshare_expr (ref);
5510
  for_each_index (&ref, idx_remove_ssa_names, NULL);
5511
 
5512
  return ref;
5513
}
5514
 
5515
/* Copies the reference information from OLD_REF to NEW_REF.  */
5516
 
5517
static void
5518
copy_ref_info (tree new_ref, tree old_ref)
5519
{
5520
  if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5521
    copy_mem_ref_info (new_ref, old_ref);
5522
  else
5523
    {
5524
      TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5525
      TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5526
      TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5527
    }
5528
}
5529
 
5530
/* Rewrites USE (address that is an iv) using candidate CAND.  */
5531
 
5532
static void
5533
rewrite_use_address (struct ivopts_data *data,
5534
                     struct iv_use *use, struct iv_cand *cand)
5535
{
5536
  aff_tree aff;
5537
  gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5538
  tree base_hint = NULL_TREE;
5539
  tree ref;
5540
  bool ok;
5541
 
5542
  ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5543
  gcc_assert (ok);
5544
  unshare_aff_combination (&aff);
5545
 
5546
  /* To avoid undefined overflow problems, all IV candidates use unsigned
5547
     integer types.  The drawback is that this makes it impossible for
5548
     create_mem_ref to distinguish an IV that is based on a memory object
5549
     from one that represents simply an offset.
5550
 
5551
     To work around this problem, we pass a hint to create_mem_ref that
5552
     indicates which variable (if any) in aff is an IV based on a memory
5553
     object.  Note that we only consider the candidate.  If this is not
5554
     based on an object, the base of the reference is in some subexpression
5555
     of the use -- but these will use pointer types, so they are recognized
5556
     by the create_mem_ref heuristics anyway.  */
5557
  if (cand->iv->base_object)
5558
    base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5559
 
5560
  ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5561
                        data->speed);
5562
  copy_ref_info (ref, *use->op_p);
5563
  *use->op_p = ref;
5564
}
5565
 
5566
/* Rewrites USE (the condition such that one of the arguments is an iv) using
5567
   candidate CAND.  */
5568
 
5569
static void
5570
rewrite_use_compare (struct ivopts_data *data,
5571
                     struct iv_use *use, struct iv_cand *cand)
5572
{
5573
  tree comp, *var_p, op, bound;
5574
  gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5575
  enum tree_code compare;
5576
  struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5577
  bool ok;
5578
 
5579
  bound = cp->value;
5580
  if (bound)
5581
    {
5582
      tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5583
      tree var_type = TREE_TYPE (var);
5584
      gimple_seq stmts;
5585
 
5586
      compare = iv_elimination_compare (data, use);
5587
      bound = unshare_expr (fold_convert (var_type, bound));
5588
      op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5589
      if (stmts)
5590
        gsi_insert_seq_on_edge_immediate (
5591
                loop_preheader_edge (data->current_loop),
5592
                stmts);
5593
 
5594
      gimple_cond_set_lhs (use->stmt, var);
5595
      gimple_cond_set_code (use->stmt, compare);
5596
      gimple_cond_set_rhs (use->stmt, op);
5597
      return;
5598
    }
5599
 
5600
  /* The induction variable elimination failed; just express the original
5601
     giv.  */
5602
  comp = get_computation (data->current_loop, use, cand);
5603
  gcc_assert (comp != NULL_TREE);
5604
 
5605
  ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5606
  gcc_assert (ok);
5607
 
5608
  *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5609
                                     true, GSI_SAME_STMT);
5610
}
5611
 
5612
/* Rewrites USE using candidate CAND.  */
5613
 
5614
static void
5615
rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5616
{
5617
  switch (use->type)
5618
    {
5619
      case USE_NONLINEAR_EXPR:
5620
        rewrite_use_nonlinear_expr (data, use, cand);
5621
        break;
5622
 
5623
      case USE_ADDRESS:
5624
        rewrite_use_address (data, use, cand);
5625
        break;
5626
 
5627
      case USE_COMPARE:
5628
        rewrite_use_compare (data, use, cand);
5629
        break;
5630
 
5631
      default:
5632
        gcc_unreachable ();
5633
    }
5634
 
5635
  update_stmt (use->stmt);
5636
}
5637
 
5638
/* Rewrite the uses using the selected induction variables.  */
5639
 
5640
static void
5641
rewrite_uses (struct ivopts_data *data)
5642
{
5643
  unsigned i;
5644
  struct iv_cand *cand;
5645
  struct iv_use *use;
5646
 
5647
  for (i = 0; i < n_iv_uses (data); i++)
5648
    {
5649
      use = iv_use (data, i);
5650
      cand = use->selected;
5651
      gcc_assert (cand);
5652
 
5653
      rewrite_use (data, use, cand);
5654
    }
5655
}
5656
 
5657
/* Removes the ivs that are not used after rewriting.  */
5658
 
5659
static void
5660
remove_unused_ivs (struct ivopts_data *data)
5661
{
5662
  unsigned j;
5663
  bitmap_iterator bi;
5664
  bitmap toremove = BITMAP_ALLOC (NULL);
5665
 
5666
  /* Figure out an order in which to release SSA DEFs so that we don't
5667
     release something that we'd have to propagate into a debug stmt
5668
     afterwards.  */
5669
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5670
    {
5671
      struct version_info *info;
5672
 
5673
      info = ver_info (data, j);
5674
      if (info->iv
5675
          && !integer_zerop (info->iv->step)
5676
          && !info->inv_id
5677
          && !info->iv->have_use_for
5678
          && !info->preserve_biv)
5679
        bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5680
    }
5681
 
5682
  release_defs_bitset (toremove);
5683
 
5684
  BITMAP_FREE (toremove);
5685
}
5686
 
5687
/* Frees data allocated by the optimization of a single loop.  */
5688
 
5689
static void
5690
free_loop_data (struct ivopts_data *data)
5691
{
5692
  unsigned i, j;
5693
  bitmap_iterator bi;
5694
  tree obj;
5695
 
5696
  if (data->niters)
5697
    {
5698
      pointer_map_destroy (data->niters);
5699
      data->niters = NULL;
5700
    }
5701
 
5702
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5703
    {
5704
      struct version_info *info;
5705
 
5706
      info = ver_info (data, i);
5707
      if (info->iv)
5708
        free (info->iv);
5709
      info->iv = NULL;
5710
      info->has_nonlin_use = false;
5711
      info->preserve_biv = false;
5712
      info->inv_id = 0;
5713
    }
5714
  bitmap_clear (data->relevant);
5715
  bitmap_clear (data->important_candidates);
5716
 
5717
  for (i = 0; i < n_iv_uses (data); i++)
5718
    {
5719
      struct iv_use *use = iv_use (data, i);
5720
 
5721
      free (use->iv);
5722
      BITMAP_FREE (use->related_cands);
5723
      for (j = 0; j < use->n_map_members; j++)
5724
        if (use->cost_map[j].depends_on)
5725
          BITMAP_FREE (use->cost_map[j].depends_on);
5726
      free (use->cost_map);
5727
      free (use);
5728
    }
5729
  VEC_truncate (iv_use_p, data->iv_uses, 0);
5730
 
5731
  for (i = 0; i < n_iv_cands (data); i++)
5732
    {
5733
      struct iv_cand *cand = iv_cand (data, i);
5734
 
5735
      if (cand->iv)
5736
        free (cand->iv);
5737
      if (cand->depends_on)
5738
        BITMAP_FREE (cand->depends_on);
5739
      free (cand);
5740
    }
5741
  VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5742
 
5743
  if (data->version_info_size < num_ssa_names)
5744
    {
5745
      data->version_info_size = 2 * num_ssa_names;
5746
      free (data->version_info);
5747
      data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5748
    }
5749
 
5750
  data->max_inv_id = 0;
5751
 
5752
  for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5753
    SET_DECL_RTL (obj, NULL_RTX);
5754
 
5755
  VEC_truncate (tree, decl_rtl_to_reset, 0);
5756
}
5757
 
5758
/* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5759
   loop tree.  */
5760
 
5761
static void
5762
tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5763
{
5764
  free_loop_data (data);
5765
  free (data->version_info);
5766
  BITMAP_FREE (data->relevant);
5767
  BITMAP_FREE (data->important_candidates);
5768
 
5769
  VEC_free (tree, heap, decl_rtl_to_reset);
5770
  VEC_free (iv_use_p, heap, data->iv_uses);
5771
  VEC_free (iv_cand_p, heap, data->iv_candidates);
5772
}
5773
 
5774
/* Optimizes the LOOP.  Returns true if anything changed.  */
5775
 
5776
static bool
5777
tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5778
{
5779
  bool changed = false;
5780
  struct iv_ca *iv_ca;
5781
  edge exit;
5782
  basic_block *body;
5783
 
5784
  gcc_assert (!data->niters);
5785
  data->current_loop = loop;
5786
  data->speed = optimize_loop_for_speed_p (loop);
5787
 
5788
  if (dump_file && (dump_flags & TDF_DETAILS))
5789
    {
5790
      fprintf (dump_file, "Processing loop %d\n", loop->num);
5791
 
5792
      exit = single_dom_exit (loop);
5793
      if (exit)
5794
        {
5795
          fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5796
                   exit->src->index, exit->dest->index);
5797
          print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5798
          fprintf (dump_file, "\n");
5799
        }
5800
 
5801
      fprintf (dump_file, "\n");
5802
    }
5803
 
5804
  body = get_loop_body (loop);
5805
  renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5806
  free (body);
5807
 
5808
  /* For each ssa name determines whether it behaves as an induction variable
5809
     in some loop.  */
5810
  if (!find_induction_variables (data))
5811
    goto finish;
5812
 
5813
  /* Finds interesting uses (item 1).  */
5814
  find_interesting_uses (data);
5815
  if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5816
    goto finish;
5817
 
5818
  /* Finds candidates for the induction variables (item 2).  */
5819
  find_iv_candidates (data);
5820
 
5821
  /* Calculates the costs (item 3, part 1).  */
5822
  determine_iv_costs (data);
5823
  determine_use_iv_costs (data);
5824
  determine_set_costs (data);
5825
 
5826
  /* Find the optimal set of induction variables (item 3, part 2).  */
5827
  iv_ca = find_optimal_iv_set (data);
5828
  if (!iv_ca)
5829
    goto finish;
5830
  changed = true;
5831
 
5832
  /* Create the new induction variables (item 4, part 1).  */
5833
  create_new_ivs (data, iv_ca);
5834
  iv_ca_free (&iv_ca);
5835
 
5836
  /* Rewrite the uses (item 4, part 2).  */
5837
  rewrite_uses (data);
5838
 
5839
  /* Remove the ivs that are unused after rewriting.  */
5840
  remove_unused_ivs (data);
5841
 
5842
  /* We have changed the structure of induction variables; it might happen
5843
     that definitions in the scev database refer to some of them that were
5844
     eliminated.  */
5845
  scev_reset ();
5846
 
5847
finish:
5848
  free_loop_data (data);
5849
 
5850
  return changed;
5851
}
5852
 
5853
/* Main entry point.  Optimizes induction variables in loops.  */
5854
 
5855
void
5856
tree_ssa_iv_optimize (void)
5857
{
5858
  struct loop *loop;
5859
  struct ivopts_data data;
5860
  loop_iterator li;
5861
 
5862
  tree_ssa_iv_optimize_init (&data);
5863
 
5864
  /* Optimize the loops starting with the innermost ones.  */
5865
  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5866
    {
5867
      if (dump_file && (dump_flags & TDF_DETAILS))
5868
        flow_loop_dump (loop, dump_file, NULL, 1);
5869
 
5870
      tree_ssa_iv_optimize_loop (&data, loop);
5871
    }
5872
 
5873
  tree_ssa_iv_optimize_finalize (&data);
5874
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.