OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [tree-ssa-loop-ivopts.c] - Blame information for rev 749

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Induction variable optimizations.
2
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3
   Free Software Foundation, Inc.
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it
8
under the terms of the GNU General Public License as published by the
9
Free Software Foundation; either version 3, or (at your option) any
10
later version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT
13
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
/* This pass tries to find the optimal set of induction variables for the loop.
22
   It optimizes just the basic linear induction variables (although adding
23
   support for other types should not be too hard).  It includes the
24
   optimizations commonly known as strength reduction, induction variable
25
   coalescing and induction variable elimination.  It does it in the
26
   following steps:
27
 
28
   1) The interesting uses of induction variables are found.  This includes
29
 
30
      -- uses of induction variables in non-linear expressions
31
      -- addresses of arrays
32
      -- comparisons of induction variables
33
 
34
   2) Candidates for the induction variables are found.  This includes
35
 
36
      -- old induction variables
37
      -- the variables defined by expressions derived from the "interesting
38
         uses" above
39
 
40
   3) The optimal (w.r. to a cost function) set of variables is chosen.  The
41
      cost function assigns a cost to sets of induction variables and consists
42
      of three parts:
43
 
44
      -- The use costs.  Each of the interesting uses chooses the best induction
45
         variable in the set and adds its cost to the sum.  The cost reflects
46
         the time spent on modifying the induction variables value to be usable
47
         for the given purpose (adding base and offset for arrays, etc.).
48
      -- The variable costs.  Each of the variables has a cost assigned that
49
         reflects the costs associated with incrementing the value of the
50
         variable.  The original variables are somewhat preferred.
51
      -- The set cost.  Depending on the size of the set, extra cost may be
52
         added to reflect register pressure.
53
 
54
      All the costs are defined in a machine-specific way, using the target
55
      hooks and machine descriptions to determine them.
56
 
57
   4) The trees are transformed to use the new variables, the dead code is
58
      removed.
59
 
60
   All of this is done loop by loop.  Doing it globally is theoretically
61
   possible, it might give a better performance and it might enable us
62
   to decide costs more precisely, but getting all the interactions right
63
   would be complicated.  */
64
 
65
#include "config.h"
66
#include "system.h"
67
#include "coretypes.h"
68
#include "tm.h"
69
#include "tree.h"
70
#include "tm_p.h"
71
#include "basic-block.h"
72
#include "output.h"
73
#include "tree-pretty-print.h"
74
#include "gimple-pretty-print.h"
75
#include "tree-flow.h"
76
#include "tree-dump.h"
77
#include "timevar.h"
78
#include "cfgloop.h"
79
#include "tree-pass.h"
80
#include "ggc.h"
81
#include "insn-config.h"
82
#include "recog.h"
83
#include "pointer-set.h"
84
#include "hashtab.h"
85
#include "tree-chrec.h"
86
#include "tree-scalar-evolution.h"
87
#include "cfgloop.h"
88
#include "params.h"
89
#include "langhooks.h"
90
#include "tree-affine.h"
91
#include "target.h"
92
#include "tree-inline.h"
93
#include "tree-ssa-propagate.h"
94
 
95
/* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses.
96
 */
97
#include "expmed.h"
98
#undef add_cost
99
#undef zero_cost
100
 
101
/* FIXME: Expressions are expanded to RTL in this pass to determine the
102
   cost of different addressing modes.  This should be moved to a TBD
103
   interface between the GIMPLE and RTL worlds.  */
104
#include "expr.h"
105
 
106
/* The infinite cost.  */
107
#define INFTY 10000000
108
 
109
#define AVG_LOOP_NITER(LOOP) 5
110
 
111
/* Returns the expected number of loop iterations for LOOP.
112
   The average trip count is computed from profile data if it
113
   exists. */
114
 
115
static inline HOST_WIDE_INT
116
avg_loop_niter (struct loop *loop)
117
{
118
  HOST_WIDE_INT niter = max_stmt_executions_int (loop, false);
119
  if (niter == -1)
120
    return AVG_LOOP_NITER (loop);
121
 
122
  return niter;
123
}
124
 
125
/* Representation of the induction variable.  */
126
struct iv
127
{
128
  tree base;            /* Initial value of the iv.  */
129
  tree base_object;     /* A memory object to that the induction variable points.  */
130
  tree step;            /* Step of the iv (constant only).  */
131
  tree ssa_name;        /* The ssa name with the value.  */
132
  bool biv_p;           /* Is it a biv?  */
133
  bool have_use_for;    /* Do we already have a use for it?  */
134
  unsigned use_id;      /* The identifier in the use if it is the case.  */
135
};
136
 
137
/* Per-ssa version information (induction variable descriptions, etc.).  */
138
struct version_info
139
{
140
  tree name;            /* The ssa name.  */
141
  struct iv *iv;        /* Induction variable description.  */
142
  bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
143
                           an expression that is not an induction variable.  */
144
  bool preserve_biv;    /* For the original biv, whether to preserve it.  */
145
  unsigned inv_id;      /* Id of an invariant.  */
146
};
147
 
148
/* Types of uses.  */
149
enum use_type
150
{
151
  USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
152
  USE_ADDRESS,          /* Use in an address.  */
153
  USE_COMPARE           /* Use is a compare.  */
154
};
155
 
156
/* Cost of a computation.  */
157
typedef struct
158
{
159
  int cost;             /* The runtime cost.  */
160
  unsigned complexity;  /* The estimate of the complexity of the code for
161
                           the computation (in no concrete units --
162
                           complexity field should be larger for more
163
                           complex expressions and addressing modes).  */
164
} comp_cost;
165
 
166
static const comp_cost zero_cost = {0, 0};
167
static const comp_cost infinite_cost = {INFTY, INFTY};
168
 
169
/* The candidate - cost pair.  */
170
struct cost_pair
171
{
172
  struct iv_cand *cand; /* The candidate.  */
173
  comp_cost cost;       /* The cost.  */
174
  bitmap depends_on;    /* The list of invariants that have to be
175
                           preserved.  */
176
  tree value;           /* For final value elimination, the expression for
177
                           the final value of the iv.  For iv elimination,
178
                           the new bound to compare with.  */
179
  enum tree_code comp;  /* For iv elimination, the comparison.  */
180
  int inv_expr_id;      /* Loop invariant expression id.  */
181
};
182
 
183
/* Use.  */
184
struct iv_use
185
{
186
  unsigned id;          /* The id of the use.  */
187
  enum use_type type;   /* Type of the use.  */
188
  struct iv *iv;        /* The induction variable it is based on.  */
189
  gimple stmt;          /* Statement in that it occurs.  */
190
  tree *op_p;           /* The place where it occurs.  */
191
  bitmap related_cands; /* The set of "related" iv candidates, plus the common
192
                           important ones.  */
193
 
194
  unsigned n_map_members; /* Number of candidates in the cost_map list.  */
195
  struct cost_pair *cost_map;
196
                        /* The costs wrto the iv candidates.  */
197
 
198
  struct iv_cand *selected;
199
                        /* The selected candidate.  */
200
};
201
 
202
/* The position where the iv is computed.  */
203
enum iv_position
204
{
205
  IP_NORMAL,            /* At the end, just before the exit condition.  */
206
  IP_END,               /* At the end of the latch block.  */
207
  IP_BEFORE_USE,        /* Immediately before a specific use.  */
208
  IP_AFTER_USE,         /* Immediately after a specific use.  */
209
  IP_ORIGINAL           /* The original biv.  */
210
};
211
 
212
/* The induction variable candidate.  */
213
struct iv_cand
214
{
215
  unsigned id;          /* The number of the candidate.  */
216
  bool important;       /* Whether this is an "important" candidate, i.e. such
217
                           that it should be considered by all uses.  */
218
  ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
219
  gimple incremented_at;/* For original biv, the statement where it is
220
                           incremented.  */
221
  tree var_before;      /* The variable used for it before increment.  */
222
  tree var_after;       /* The variable used for it after increment.  */
223
  struct iv *iv;        /* The value of the candidate.  NULL for
224
                           "pseudocandidate" used to indicate the possibility
225
                           to replace the final value of an iv by direct
226
                           computation of the value.  */
227
  unsigned cost;        /* Cost of the candidate.  */
228
  unsigned cost_step;   /* Cost of the candidate's increment operation.  */
229
  struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
230
                              where it is incremented.  */
231
  bitmap depends_on;    /* The list of invariants that are used in step of the
232
                           biv.  */
233
};
234
 
235
/* Loop invariant expression hashtable entry.  */
236
struct iv_inv_expr_ent
237
{
238
  tree expr;
239
  int id;
240
  hashval_t hash;
241
};
242
 
243
/* The data used by the induction variable optimizations.  */
244
 
245
typedef struct iv_use *iv_use_p;
246
DEF_VEC_P(iv_use_p);
247
DEF_VEC_ALLOC_P(iv_use_p,heap);
248
 
249
typedef struct iv_cand *iv_cand_p;
250
DEF_VEC_P(iv_cand_p);
251
DEF_VEC_ALLOC_P(iv_cand_p,heap);
252
 
253
struct ivopts_data
254
{
255
  /* The currently optimized loop.  */
256
  struct loop *current_loop;
257
 
258
  /* Numbers of iterations for all exits of the current loop.  */
259
  struct pointer_map_t *niters;
260
 
261
  /* Number of registers used in it.  */
262
  unsigned regs_used;
263
 
264
  /* The size of version_info array allocated.  */
265
  unsigned version_info_size;
266
 
267
  /* The array of information for the ssa names.  */
268
  struct version_info *version_info;
269
 
270
  /* The hashtable of loop invariant expressions created
271
     by ivopt.  */
272
  htab_t inv_expr_tab;
273
 
274
  /* Loop invariant expression id.  */
275
  int inv_expr_id;
276
 
277
  /* The bitmap of indices in version_info whose value was changed.  */
278
  bitmap relevant;
279
 
280
  /* The uses of induction variables.  */
281
  VEC(iv_use_p,heap) *iv_uses;
282
 
283
  /* The candidates.  */
284
  VEC(iv_cand_p,heap) *iv_candidates;
285
 
286
  /* A bitmap of important candidates.  */
287
  bitmap important_candidates;
288
 
289
  /* The maximum invariant id.  */
290
  unsigned max_inv_id;
291
 
292
  /* Whether to consider just related and important candidates when replacing a
293
     use.  */
294
  bool consider_all_candidates;
295
 
296
  /* Are we optimizing for speed?  */
297
  bool speed;
298
 
299
  /* Whether the loop body includes any function calls.  */
300
  bool body_includes_call;
301
 
302
  /* Whether the loop body can only be exited via single exit.  */
303
  bool loop_single_exit_p;
304
};
305
 
306
/* An assignment of iv candidates to uses.  */
307
 
308
struct iv_ca
309
{
310
  /* The number of uses covered by the assignment.  */
311
  unsigned upto;
312
 
313
  /* Number of uses that cannot be expressed by the candidates in the set.  */
314
  unsigned bad_uses;
315
 
316
  /* Candidate assigned to a use, together with the related costs.  */
317
  struct cost_pair **cand_for_use;
318
 
319
  /* Number of times each candidate is used.  */
320
  unsigned *n_cand_uses;
321
 
322
  /* The candidates used.  */
323
  bitmap cands;
324
 
325
  /* The number of candidates in the set.  */
326
  unsigned n_cands;
327
 
328
  /* Total number of registers needed.  */
329
  unsigned n_regs;
330
 
331
  /* Total cost of expressing uses.  */
332
  comp_cost cand_use_cost;
333
 
334
  /* Total cost of candidates.  */
335
  unsigned cand_cost;
336
 
337
  /* Number of times each invariant is used.  */
338
  unsigned *n_invariant_uses;
339
 
340
  /* The array holding the number of uses of each loop
341
     invariant expressions created by ivopt.  */
342
  unsigned *used_inv_expr;
343
 
344
  /* The number of created loop invariants.  */
345
  unsigned num_used_inv_expr;
346
 
347
  /* Total cost of the assignment.  */
348
  comp_cost cost;
349
};
350
 
351
/* Difference of two iv candidate assignments.  */
352
 
353
struct iv_ca_delta
354
{
355
  /* Changed use.  */
356
  struct iv_use *use;
357
 
358
  /* An old assignment (for rollback purposes).  */
359
  struct cost_pair *old_cp;
360
 
361
  /* A new assignment.  */
362
  struct cost_pair *new_cp;
363
 
364
  /* Next change in the list.  */
365
  struct iv_ca_delta *next_change;
366
};
367
 
368
/* Bound on number of candidates below that all candidates are considered.  */
369
 
370
#define CONSIDER_ALL_CANDIDATES_BOUND \
371
  ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
372
 
373
/* If there are more iv occurrences, we just give up (it is quite unlikely that
374
   optimizing such a loop would help, and it would take ages).  */
375
 
376
#define MAX_CONSIDERED_USES \
377
  ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
378
 
379
/* If there are at most this number of ivs in the set, try removing unnecessary
380
   ivs from the set always.  */
381
 
382
#define ALWAYS_PRUNE_CAND_SET_BOUND \
383
  ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
384
 
385
/* The list of trees for that the decl_rtl field must be reset is stored
386
   here.  */
387
 
388
static VEC(tree,heap) *decl_rtl_to_reset;
389
 
390
static comp_cost force_expr_to_var_cost (tree, bool);
391
 
392
/* Number of uses recorded in DATA.  */
393
 
394
static inline unsigned
395
n_iv_uses (struct ivopts_data *data)
396
{
397
  return VEC_length (iv_use_p, data->iv_uses);
398
}
399
 
400
/* Ith use recorded in DATA.  */
401
 
402
static inline struct iv_use *
403
iv_use (struct ivopts_data *data, unsigned i)
404
{
405
  return VEC_index (iv_use_p, data->iv_uses, i);
406
}
407
 
408
/* Number of candidates recorded in DATA.  */
409
 
410
static inline unsigned
411
n_iv_cands (struct ivopts_data *data)
412
{
413
  return VEC_length (iv_cand_p, data->iv_candidates);
414
}
415
 
416
/* Ith candidate recorded in DATA.  */
417
 
418
static inline struct iv_cand *
419
iv_cand (struct ivopts_data *data, unsigned i)
420
{
421
  return VEC_index (iv_cand_p, data->iv_candidates, i);
422
}
423
 
424
/* The single loop exit if it dominates the latch, NULL otherwise.  */
425
 
426
edge
427
single_dom_exit (struct loop *loop)
428
{
429
  edge exit = single_exit (loop);
430
 
431
  if (!exit)
432
    return NULL;
433
 
434
  if (!just_once_each_iteration_p (loop, exit->src))
435
    return NULL;
436
 
437
  return exit;
438
}
439
 
440
/* Dumps information about the induction variable IV to FILE.  */
441
 
442
extern void dump_iv (FILE *, struct iv *);
443
void
444
dump_iv (FILE *file, struct iv *iv)
445
{
446
  if (iv->ssa_name)
447
    {
448
      fprintf (file, "ssa name ");
449
      print_generic_expr (file, iv->ssa_name, TDF_SLIM);
450
      fprintf (file, "\n");
451
    }
452
 
453
  fprintf (file, "  type ");
454
  print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
455
  fprintf (file, "\n");
456
 
457
  if (iv->step)
458
    {
459
      fprintf (file, "  base ");
460
      print_generic_expr (file, iv->base, TDF_SLIM);
461
      fprintf (file, "\n");
462
 
463
      fprintf (file, "  step ");
464
      print_generic_expr (file, iv->step, TDF_SLIM);
465
      fprintf (file, "\n");
466
    }
467
  else
468
    {
469
      fprintf (file, "  invariant ");
470
      print_generic_expr (file, iv->base, TDF_SLIM);
471
      fprintf (file, "\n");
472
    }
473
 
474
  if (iv->base_object)
475
    {
476
      fprintf (file, "  base object ");
477
      print_generic_expr (file, iv->base_object, TDF_SLIM);
478
      fprintf (file, "\n");
479
    }
480
 
481
  if (iv->biv_p)
482
    fprintf (file, "  is a biv\n");
483
}
484
 
485
/* Dumps information about the USE to FILE.  */
486
 
487
extern void dump_use (FILE *, struct iv_use *);
488
void
489
dump_use (FILE *file, struct iv_use *use)
490
{
491
  fprintf (file, "use %d\n", use->id);
492
 
493
  switch (use->type)
494
    {
495
    case USE_NONLINEAR_EXPR:
496
      fprintf (file, "  generic\n");
497
      break;
498
 
499
    case USE_ADDRESS:
500
      fprintf (file, "  address\n");
501
      break;
502
 
503
    case USE_COMPARE:
504
      fprintf (file, "  compare\n");
505
      break;
506
 
507
    default:
508
      gcc_unreachable ();
509
    }
510
 
511
  fprintf (file, "  in statement ");
512
  print_gimple_stmt (file, use->stmt, 0, 0);
513
  fprintf (file, "\n");
514
 
515
  fprintf (file, "  at position ");
516
  if (use->op_p)
517
    print_generic_expr (file, *use->op_p, TDF_SLIM);
518
  fprintf (file, "\n");
519
 
520
  dump_iv (file, use->iv);
521
 
522
  if (use->related_cands)
523
    {
524
      fprintf (file, "  related candidates ");
525
      dump_bitmap (file, use->related_cands);
526
    }
527
}
528
 
529
/* Dumps information about the uses to FILE.  */
530
 
531
extern void dump_uses (FILE *, struct ivopts_data *);
532
void
533
dump_uses (FILE *file, struct ivopts_data *data)
534
{
535
  unsigned i;
536
  struct iv_use *use;
537
 
538
  for (i = 0; i < n_iv_uses (data); i++)
539
    {
540
      use = iv_use (data, i);
541
 
542
      dump_use (file, use);
543
      fprintf (file, "\n");
544
    }
545
}
546
 
547
/* Dumps information about induction variable candidate CAND to FILE.  */
548
 
549
extern void dump_cand (FILE *, struct iv_cand *);
550
void
551
dump_cand (FILE *file, struct iv_cand *cand)
552
{
553
  struct iv *iv = cand->iv;
554
 
555
  fprintf (file, "candidate %d%s\n",
556
           cand->id, cand->important ? " (important)" : "");
557
 
558
  if (cand->depends_on)
559
    {
560
      fprintf (file, "  depends on ");
561
      dump_bitmap (file, cand->depends_on);
562
    }
563
 
564
  if (!iv)
565
    {
566
      fprintf (file, "  final value replacement\n");
567
      return;
568
    }
569
 
570
  if (cand->var_before)
571
    {
572
      fprintf (file, "  var_before ");
573
      print_generic_expr (file, cand->var_before, TDF_SLIM);
574
      fprintf (file, "\n");
575
    }
576
  if (cand->var_after)
577
    {
578
      fprintf (file, "  var_after ");
579
      print_generic_expr (file, cand->var_after, TDF_SLIM);
580
      fprintf (file, "\n");
581
    }
582
 
583
  switch (cand->pos)
584
    {
585
    case IP_NORMAL:
586
      fprintf (file, "  incremented before exit test\n");
587
      break;
588
 
589
    case IP_BEFORE_USE:
590
      fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
591
      break;
592
 
593
    case IP_AFTER_USE:
594
      fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
595
      break;
596
 
597
    case IP_END:
598
      fprintf (file, "  incremented at end\n");
599
      break;
600
 
601
    case IP_ORIGINAL:
602
      fprintf (file, "  original biv\n");
603
      break;
604
    }
605
 
606
  dump_iv (file, iv);
607
}
608
 
609
/* Returns the info for ssa version VER.  */
610
 
611
static inline struct version_info *
612
ver_info (struct ivopts_data *data, unsigned ver)
613
{
614
  return data->version_info + ver;
615
}
616
 
617
/* Returns the info for ssa name NAME.  */
618
 
619
static inline struct version_info *
620
name_info (struct ivopts_data *data, tree name)
621
{
622
  return ver_info (data, SSA_NAME_VERSION (name));
623
}
624
 
625
/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
626
   emitted in LOOP.  */
627
 
628
static bool
629
stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
630
{
631
  basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
632
 
633
  gcc_assert (bb);
634
 
635
  if (sbb == loop->latch)
636
    return true;
637
 
638
  if (sbb != bb)
639
    return false;
640
 
641
  return stmt == last_stmt (bb);
642
}
643
 
644
/* Returns true if STMT if after the place where the original induction
645
   variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
646
   if the positions are identical.  */
647
 
648
static bool
649
stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
650
{
651
  basic_block cand_bb = gimple_bb (cand->incremented_at);
652
  basic_block stmt_bb = gimple_bb (stmt);
653
 
654
  if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
655
    return false;
656
 
657
  if (stmt_bb != cand_bb)
658
    return true;
659
 
660
  if (true_if_equal
661
      && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
662
    return true;
663
  return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
664
}
665
 
666
/* Returns true if STMT if after the place where the induction variable
667
   CAND is incremented in LOOP.  */
668
 
669
static bool
670
stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
671
{
672
  switch (cand->pos)
673
    {
674
    case IP_END:
675
      return false;
676
 
677
    case IP_NORMAL:
678
      return stmt_after_ip_normal_pos (loop, stmt);
679
 
680
    case IP_ORIGINAL:
681
    case IP_AFTER_USE:
682
      return stmt_after_inc_pos (cand, stmt, false);
683
 
684
    case IP_BEFORE_USE:
685
      return stmt_after_inc_pos (cand, stmt, true);
686
 
687
    default:
688
      gcc_unreachable ();
689
    }
690
}
691
 
692
/* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
693
 
694
static bool
695
abnormal_ssa_name_p (tree exp)
696
{
697
  if (!exp)
698
    return false;
699
 
700
  if (TREE_CODE (exp) != SSA_NAME)
701
    return false;
702
 
703
  return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
704
}
705
 
706
/* Returns false if BASE or INDEX contains a ssa name that occurs in an
707
   abnormal phi node.  Callback for for_each_index.  */
708
 
709
static bool
710
idx_contains_abnormal_ssa_name_p (tree base, tree *index,
711
                                  void *data ATTRIBUTE_UNUSED)
712
{
713
  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
714
    {
715
      if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
716
        return false;
717
      if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
718
        return false;
719
    }
720
 
721
  return !abnormal_ssa_name_p (*index);
722
}
723
 
724
/* Returns true if EXPR contains a ssa name that occurs in an
725
   abnormal phi node.  */
726
 
727
bool
728
contains_abnormal_ssa_name_p (tree expr)
729
{
730
  enum tree_code code;
731
  enum tree_code_class codeclass;
732
 
733
  if (!expr)
734
    return false;
735
 
736
  code = TREE_CODE (expr);
737
  codeclass = TREE_CODE_CLASS (code);
738
 
739
  if (code == SSA_NAME)
740
    return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
741
 
742
  if (code == INTEGER_CST
743
      || is_gimple_min_invariant (expr))
744
    return false;
745
 
746
  if (code == ADDR_EXPR)
747
    return !for_each_index (&TREE_OPERAND (expr, 0),
748
                            idx_contains_abnormal_ssa_name_p,
749
                            NULL);
750
 
751
  if (code == COND_EXPR)
752
    return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
753
      || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
754
      || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
755
 
756
  switch (codeclass)
757
    {
758
    case tcc_binary:
759
    case tcc_comparison:
760
      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
761
        return true;
762
 
763
      /* Fallthru.  */
764
    case tcc_unary:
765
      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
766
        return true;
767
 
768
      break;
769
 
770
    default:
771
      gcc_unreachable ();
772
    }
773
 
774
  return false;
775
}
776
 
777
/*  Returns the structure describing number of iterations determined from
778
    EXIT of DATA->current_loop, or NULL if something goes wrong.  */
779
 
780
static struct tree_niter_desc *
781
niter_for_exit (struct ivopts_data *data, edge exit)
782
{
783
  struct tree_niter_desc *desc;
784
  void **slot;
785
 
786
  if (!data->niters)
787
    {
788
      data->niters = pointer_map_create ();
789
      slot = NULL;
790
    }
791
  else
792
    slot = pointer_map_contains (data->niters, exit);
793
 
794
  if (!slot)
795
    {
796
      /* Try to determine number of iterations.  We cannot safely work with ssa
797
         names that appear in phi nodes on abnormal edges, so that we do not
798
         create overlapping life ranges for them (PR 27283).  */
799
      desc = XNEW (struct tree_niter_desc);
800
      if (!number_of_iterations_exit (data->current_loop,
801
                                      exit, desc, true)
802
          || contains_abnormal_ssa_name_p (desc->niter))
803
        {
804
          XDELETE (desc);
805
          desc = NULL;
806
        }
807
      slot = pointer_map_insert (data->niters, exit);
808
      *slot = desc;
809
    }
810
  else
811
    desc = (struct tree_niter_desc *) *slot;
812
 
813
  return desc;
814
}
815
 
816
/* Returns the structure describing number of iterations determined from
817
   single dominating exit of DATA->current_loop, or NULL if something
818
   goes wrong.  */
819
 
820
static struct tree_niter_desc *
821
niter_for_single_dom_exit (struct ivopts_data *data)
822
{
823
  edge exit = single_dom_exit (data->current_loop);
824
 
825
  if (!exit)
826
    return NULL;
827
 
828
  return niter_for_exit (data, exit);
829
}
830
 
831
/* Hash table equality function for expressions.  */
832
 
833
static int
834
htab_inv_expr_eq (const void *ent1, const void *ent2)
835
{
836
  const struct iv_inv_expr_ent *expr1 =
837
      (const struct iv_inv_expr_ent *)ent1;
838
  const struct iv_inv_expr_ent *expr2 =
839
      (const struct iv_inv_expr_ent *)ent2;
840
 
841
  return expr1->hash == expr2->hash
842
         && operand_equal_p (expr1->expr, expr2->expr, 0);
843
}
844
 
845
/* Hash function for loop invariant expressions.  */
846
 
847
static hashval_t
848
htab_inv_expr_hash (const void *ent)
849
{
850
  const struct iv_inv_expr_ent *expr =
851
      (const struct iv_inv_expr_ent *)ent;
852
  return expr->hash;
853
}
854
 
855
/* Initializes data structures used by the iv optimization pass, stored
856
   in DATA.  */
857
 
858
static void
859
tree_ssa_iv_optimize_init (struct ivopts_data *data)
860
{
861
  data->version_info_size = 2 * num_ssa_names;
862
  data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
863
  data->relevant = BITMAP_ALLOC (NULL);
864
  data->important_candidates = BITMAP_ALLOC (NULL);
865
  data->max_inv_id = 0;
866
  data->niters = NULL;
867
  data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
868
  data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
869
  data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
870
                                    htab_inv_expr_eq, free);
871
  data->inv_expr_id = 0;
872
  decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
873
}
874
 
875
/* Returns a memory object to that EXPR points.  In case we are able to
876
   determine that it does not point to any such object, NULL is returned.  */
877
 
878
static tree
879
determine_base_object (tree expr)
880
{
881
  enum tree_code code = TREE_CODE (expr);
882
  tree base, obj;
883
 
884
  /* If this is a pointer casted to any type, we need to determine
885
     the base object for the pointer; so handle conversions before
886
     throwing away non-pointer expressions.  */
887
  if (CONVERT_EXPR_P (expr))
888
    return determine_base_object (TREE_OPERAND (expr, 0));
889
 
890
  if (!POINTER_TYPE_P (TREE_TYPE (expr)))
891
    return NULL_TREE;
892
 
893
  switch (code)
894
    {
895
    case INTEGER_CST:
896
      return NULL_TREE;
897
 
898
    case ADDR_EXPR:
899
      obj = TREE_OPERAND (expr, 0);
900
      base = get_base_address (obj);
901
 
902
      if (!base)
903
        return expr;
904
 
905
      if (TREE_CODE (base) == MEM_REF)
906
        return determine_base_object (TREE_OPERAND (base, 0));
907
 
908
      return fold_convert (ptr_type_node,
909
                           build_fold_addr_expr (base));
910
 
911
    case POINTER_PLUS_EXPR:
912
      return determine_base_object (TREE_OPERAND (expr, 0));
913
 
914
    case PLUS_EXPR:
915
    case MINUS_EXPR:
916
      /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
917
      gcc_unreachable ();
918
 
919
    default:
920
      return fold_convert (ptr_type_node, expr);
921
    }
922
}
923
 
924
/* Allocates an induction variable with given initial value BASE and step STEP
925
   for loop LOOP.  */
926
 
927
static struct iv *
928
alloc_iv (tree base, tree step)
929
{
930
  struct iv *iv = XCNEW (struct iv);
931
  gcc_assert (step != NULL_TREE);
932
 
933
  iv->base = base;
934
  iv->base_object = determine_base_object (base);
935
  iv->step = step;
936
  iv->biv_p = false;
937
  iv->have_use_for = false;
938
  iv->use_id = 0;
939
  iv->ssa_name = NULL_TREE;
940
 
941
  return iv;
942
}
943
 
944
/* Sets STEP and BASE for induction variable IV.  */
945
 
946
static void
947
set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
948
{
949
  struct version_info *info = name_info (data, iv);
950
 
951
  gcc_assert (!info->iv);
952
 
953
  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
954
  info->iv = alloc_iv (base, step);
955
  info->iv->ssa_name = iv;
956
}
957
 
958
/* Finds induction variable declaration for VAR.  */
959
 
960
static struct iv *
961
get_iv (struct ivopts_data *data, tree var)
962
{
963
  basic_block bb;
964
  tree type = TREE_TYPE (var);
965
 
966
  if (!POINTER_TYPE_P (type)
967
      && !INTEGRAL_TYPE_P (type))
968
    return NULL;
969
 
970
  if (!name_info (data, var)->iv)
971
    {
972
      bb = gimple_bb (SSA_NAME_DEF_STMT (var));
973
 
974
      if (!bb
975
          || !flow_bb_inside_loop_p (data->current_loop, bb))
976
        set_iv (data, var, var, build_int_cst (type, 0));
977
    }
978
 
979
  return name_info (data, var)->iv;
980
}
981
 
982
/* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
983
   not define a simple affine biv with nonzero step.  */
984
 
985
static tree
986
determine_biv_step (gimple phi)
987
{
988
  struct loop *loop = gimple_bb (phi)->loop_father;
989
  tree name = PHI_RESULT (phi);
990
  affine_iv iv;
991
 
992
  if (!is_gimple_reg (name))
993
    return NULL_TREE;
994
 
995
  if (!simple_iv (loop, loop, name, &iv, true))
996
    return NULL_TREE;
997
 
998
  return integer_zerop (iv.step) ? NULL_TREE : iv.step;
999
}
1000
 
1001
/* Finds basic ivs.  */
1002
 
1003
static bool
1004
find_bivs (struct ivopts_data *data)
1005
{
1006
  gimple phi;
1007
  tree step, type, base;
1008
  bool found = false;
1009
  struct loop *loop = data->current_loop;
1010
  gimple_stmt_iterator psi;
1011
 
1012
  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1013
    {
1014
      phi = gsi_stmt (psi);
1015
 
1016
      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1017
        continue;
1018
 
1019
      step = determine_biv_step (phi);
1020
      if (!step)
1021
        continue;
1022
 
1023
      base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1024
      base = expand_simple_operations (base);
1025
      if (contains_abnormal_ssa_name_p (base)
1026
          || contains_abnormal_ssa_name_p (step))
1027
        continue;
1028
 
1029
      type = TREE_TYPE (PHI_RESULT (phi));
1030
      base = fold_convert (type, base);
1031
      if (step)
1032
        {
1033
          if (POINTER_TYPE_P (type))
1034
            step = convert_to_ptrofftype (step);
1035
          else
1036
            step = fold_convert (type, step);
1037
        }
1038
 
1039
      set_iv (data, PHI_RESULT (phi), base, step);
1040
      found = true;
1041
    }
1042
 
1043
  return found;
1044
}
1045
 
1046
/* Marks basic ivs.  */
1047
 
1048
static void
1049
mark_bivs (struct ivopts_data *data)
1050
{
1051
  gimple phi;
1052
  tree var;
1053
  struct iv *iv, *incr_iv;
1054
  struct loop *loop = data->current_loop;
1055
  basic_block incr_bb;
1056
  gimple_stmt_iterator psi;
1057
 
1058
  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1059
    {
1060
      phi = gsi_stmt (psi);
1061
 
1062
      iv = get_iv (data, PHI_RESULT (phi));
1063
      if (!iv)
1064
        continue;
1065
 
1066
      var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1067
      incr_iv = get_iv (data, var);
1068
      if (!incr_iv)
1069
        continue;
1070
 
1071
      /* If the increment is in the subloop, ignore it.  */
1072
      incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1073
      if (incr_bb->loop_father != data->current_loop
1074
          || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1075
        continue;
1076
 
1077
      iv->biv_p = true;
1078
      incr_iv->biv_p = true;
1079
    }
1080
}
1081
 
1082
/* Checks whether STMT defines a linear induction variable and stores its
1083
   parameters to IV.  */
1084
 
1085
static bool
1086
find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1087
{
1088
  tree lhs;
1089
  struct loop *loop = data->current_loop;
1090
 
1091
  iv->base = NULL_TREE;
1092
  iv->step = NULL_TREE;
1093
 
1094
  if (gimple_code (stmt) != GIMPLE_ASSIGN)
1095
    return false;
1096
 
1097
  lhs = gimple_assign_lhs (stmt);
1098
  if (TREE_CODE (lhs) != SSA_NAME)
1099
    return false;
1100
 
1101
  if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1102
    return false;
1103
  iv->base = expand_simple_operations (iv->base);
1104
 
1105
  if (contains_abnormal_ssa_name_p (iv->base)
1106
      || contains_abnormal_ssa_name_p (iv->step))
1107
    return false;
1108
 
1109
  /* If STMT could throw, then do not consider STMT as defining a GIV.
1110
     While this will suppress optimizations, we can not safely delete this
1111
     GIV and associated statements, even if it appears it is not used.  */
1112
  if (stmt_could_throw_p (stmt))
1113
    return false;
1114
 
1115
  return true;
1116
}
1117
 
1118
/* Finds general ivs in statement STMT.  */
1119
 
1120
static void
1121
find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1122
{
1123
  affine_iv iv;
1124
 
1125
  if (!find_givs_in_stmt_scev (data, stmt, &iv))
1126
    return;
1127
 
1128
  set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1129
}
1130
 
1131
/* Finds general ivs in basic block BB.  */
1132
 
1133
static void
1134
find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1135
{
1136
  gimple_stmt_iterator bsi;
1137
 
1138
  for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1139
    find_givs_in_stmt (data, gsi_stmt (bsi));
1140
}
1141
 
1142
/* Finds general ivs.  */
1143
 
1144
static void
1145
find_givs (struct ivopts_data *data)
1146
{
1147
  struct loop *loop = data->current_loop;
1148
  basic_block *body = get_loop_body_in_dom_order (loop);
1149
  unsigned i;
1150
 
1151
  for (i = 0; i < loop->num_nodes; i++)
1152
    find_givs_in_bb (data, body[i]);
1153
  free (body);
1154
}
1155
 
1156
/* For each ssa name defined in LOOP determines whether it is an induction
1157
   variable and if so, its initial value and step.  */
1158
 
1159
static bool
1160
find_induction_variables (struct ivopts_data *data)
1161
{
1162
  unsigned i;
1163
  bitmap_iterator bi;
1164
 
1165
  if (!find_bivs (data))
1166
    return false;
1167
 
1168
  find_givs (data);
1169
  mark_bivs (data);
1170
 
1171
  if (dump_file && (dump_flags & TDF_DETAILS))
1172
    {
1173
      struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1174
 
1175
      if (niter)
1176
        {
1177
          fprintf (dump_file, "  number of iterations ");
1178
          print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1179
          if (!integer_zerop (niter->may_be_zero))
1180
            {
1181
              fprintf (dump_file, "; zero if ");
1182
              print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1183
            }
1184
          fprintf (dump_file, "\n\n");
1185
        };
1186
 
1187
      fprintf (dump_file, "Induction variables:\n\n");
1188
 
1189
      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1190
        {
1191
          if (ver_info (data, i)->iv)
1192
            dump_iv (dump_file, ver_info (data, i)->iv);
1193
        }
1194
    }
1195
 
1196
  return true;
1197
}
1198
 
1199
/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1200
 
1201
static struct iv_use *
1202
record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1203
            gimple stmt, enum use_type use_type)
1204
{
1205
  struct iv_use *use = XCNEW (struct iv_use);
1206
 
1207
  use->id = n_iv_uses (data);
1208
  use->type = use_type;
1209
  use->iv = iv;
1210
  use->stmt = stmt;
1211
  use->op_p = use_p;
1212
  use->related_cands = BITMAP_ALLOC (NULL);
1213
 
1214
  /* To avoid showing ssa name in the dumps, if it was not reset by the
1215
     caller.  */
1216
  iv->ssa_name = NULL_TREE;
1217
 
1218
  if (dump_file && (dump_flags & TDF_DETAILS))
1219
    dump_use (dump_file, use);
1220
 
1221
  VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1222
 
1223
  return use;
1224
}
1225
 
1226
/* Checks whether OP is a loop-level invariant and if so, records it.
1227
   NONLINEAR_USE is true if the invariant is used in a way we do not
1228
   handle specially.  */
1229
 
1230
static void
1231
record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1232
{
1233
  basic_block bb;
1234
  struct version_info *info;
1235
 
1236
  if (TREE_CODE (op) != SSA_NAME
1237
      || !is_gimple_reg (op))
1238
    return;
1239
 
1240
  bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1241
  if (bb
1242
      && flow_bb_inside_loop_p (data->current_loop, bb))
1243
    return;
1244
 
1245
  info = name_info (data, op);
1246
  info->name = op;
1247
  info->has_nonlin_use |= nonlinear_use;
1248
  if (!info->inv_id)
1249
    info->inv_id = ++data->max_inv_id;
1250
  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1251
}
1252
 
1253
/* Checks whether the use OP is interesting and if so, records it.  */
1254
 
1255
static struct iv_use *
1256
find_interesting_uses_op (struct ivopts_data *data, tree op)
1257
{
1258
  struct iv *iv;
1259
  struct iv *civ;
1260
  gimple stmt;
1261
  struct iv_use *use;
1262
 
1263
  if (TREE_CODE (op) != SSA_NAME)
1264
    return NULL;
1265
 
1266
  iv = get_iv (data, op);
1267
  if (!iv)
1268
    return NULL;
1269
 
1270
  if (iv->have_use_for)
1271
    {
1272
      use = iv_use (data, iv->use_id);
1273
 
1274
      gcc_assert (use->type == USE_NONLINEAR_EXPR);
1275
      return use;
1276
    }
1277
 
1278
  if (integer_zerop (iv->step))
1279
    {
1280
      record_invariant (data, op, true);
1281
      return NULL;
1282
    }
1283
  iv->have_use_for = true;
1284
 
1285
  civ = XNEW (struct iv);
1286
  *civ = *iv;
1287
 
1288
  stmt = SSA_NAME_DEF_STMT (op);
1289
  gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1290
              || is_gimple_assign (stmt));
1291
 
1292
  use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1293
  iv->use_id = use->id;
1294
 
1295
  return use;
1296
}
1297
 
1298
/* Given a condition in statement STMT, checks whether it is a compare
1299
   of an induction variable and an invariant.  If this is the case,
1300
   CONTROL_VAR is set to location of the iv, BOUND to the location of
1301
   the invariant, IV_VAR and IV_BOUND are set to the corresponding
1302
   induction variable descriptions, and true is returned.  If this is not
1303
   the case, CONTROL_VAR and BOUND are set to the arguments of the
1304
   condition and false is returned.  */
1305
 
1306
static bool
1307
extract_cond_operands (struct ivopts_data *data, gimple stmt,
1308
                       tree **control_var, tree **bound,
1309
                       struct iv **iv_var, struct iv **iv_bound)
1310
{
1311
  /* The objects returned when COND has constant operands.  */
1312
  static struct iv const_iv;
1313
  static tree zero;
1314
  tree *op0 = &zero, *op1 = &zero, *tmp_op;
1315
  struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1316
  bool ret = false;
1317
 
1318
  if (gimple_code (stmt) == GIMPLE_COND)
1319
    {
1320
      op0 = gimple_cond_lhs_ptr (stmt);
1321
      op1 = gimple_cond_rhs_ptr (stmt);
1322
    }
1323
  else
1324
    {
1325
      op0 = gimple_assign_rhs1_ptr (stmt);
1326
      op1 = gimple_assign_rhs2_ptr (stmt);
1327
    }
1328
 
1329
  zero = integer_zero_node;
1330
  const_iv.step = integer_zero_node;
1331
 
1332
  if (TREE_CODE (*op0) == SSA_NAME)
1333
    iv0 = get_iv (data, *op0);
1334
  if (TREE_CODE (*op1) == SSA_NAME)
1335
    iv1 = get_iv (data, *op1);
1336
 
1337
  /* Exactly one of the compared values must be an iv, and the other one must
1338
     be an invariant.  */
1339
  if (!iv0 || !iv1)
1340
    goto end;
1341
 
1342
  if (integer_zerop (iv0->step))
1343
    {
1344
      /* Control variable may be on the other side.  */
1345
      tmp_op = op0; op0 = op1; op1 = tmp_op;
1346
      tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1347
    }
1348
  ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1349
 
1350
end:
1351
  if (control_var)
1352
    *control_var = op0;;
1353
  if (iv_var)
1354
    *iv_var = iv0;;
1355
  if (bound)
1356
    *bound = op1;
1357
  if (iv_bound)
1358
    *iv_bound = iv1;
1359
 
1360
  return ret;
1361
}
1362
 
1363
/* Checks whether the condition in STMT is interesting and if so,
1364
   records it.  */
1365
 
1366
static void
1367
find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1368
{
1369
  tree *var_p, *bound_p;
1370
  struct iv *var_iv, *civ;
1371
 
1372
  if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1373
    {
1374
      find_interesting_uses_op (data, *var_p);
1375
      find_interesting_uses_op (data, *bound_p);
1376
      return;
1377
    }
1378
 
1379
  civ = XNEW (struct iv);
1380
  *civ = *var_iv;
1381
  record_use (data, NULL, civ, stmt, USE_COMPARE);
1382
}
1383
 
1384
/* Returns true if expression EXPR is obviously invariant in LOOP,
1385
   i.e. if all its operands are defined outside of the LOOP.  LOOP
1386
   should not be the function body.  */
1387
 
1388
bool
1389
expr_invariant_in_loop_p (struct loop *loop, tree expr)
1390
{
1391
  basic_block def_bb;
1392
  unsigned i, len;
1393
 
1394
  gcc_assert (loop_depth (loop) > 0);
1395
 
1396
  if (is_gimple_min_invariant (expr))
1397
    return true;
1398
 
1399
  if (TREE_CODE (expr) == SSA_NAME)
1400
    {
1401
      def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1402
      if (def_bb
1403
          && flow_bb_inside_loop_p (loop, def_bb))
1404
        return false;
1405
 
1406
      return true;
1407
    }
1408
 
1409
  if (!EXPR_P (expr))
1410
    return false;
1411
 
1412
  len = TREE_OPERAND_LENGTH (expr);
1413
  for (i = 0; i < len; i++)
1414
    if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1415
      return false;
1416
 
1417
  return true;
1418
}
1419
 
1420
/* Returns true if statement STMT is obviously invariant in LOOP,
1421
   i.e. if all its operands on the RHS are defined outside of the LOOP.
1422
   LOOP should not be the function body.  */
1423
 
1424
bool
1425
stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1426
{
1427
  unsigned i;
1428
  tree lhs;
1429
 
1430
  gcc_assert (loop_depth (loop) > 0);
1431
 
1432
  lhs = gimple_get_lhs (stmt);
1433
  for (i = 0; i < gimple_num_ops (stmt); i++)
1434
    {
1435
      tree op = gimple_op (stmt, i);
1436
      if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1437
        return false;
1438
    }
1439
 
1440
  return true;
1441
}
1442
 
1443
/* Cumulates the steps of indices into DATA and replaces their values with the
1444
   initial ones.  Returns false when the value of the index cannot be determined.
1445
   Callback for for_each_index.  */
1446
 
1447
struct ifs_ivopts_data
1448
{
1449
  struct ivopts_data *ivopts_data;
1450
  gimple stmt;
1451
  tree step;
1452
};
1453
 
1454
static bool
1455
idx_find_step (tree base, tree *idx, void *data)
1456
{
1457
  struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1458
  struct iv *iv;
1459
  tree step, iv_base, iv_step, lbound, off;
1460
  struct loop *loop = dta->ivopts_data->current_loop;
1461
 
1462
  /* If base is a component ref, require that the offset of the reference
1463
     be invariant.  */
1464
  if (TREE_CODE (base) == COMPONENT_REF)
1465
    {
1466
      off = component_ref_field_offset (base);
1467
      return expr_invariant_in_loop_p (loop, off);
1468
    }
1469
 
1470
  /* If base is array, first check whether we will be able to move the
1471
     reference out of the loop (in order to take its address in strength
1472
     reduction).  In order for this to work we need both lower bound
1473
     and step to be loop invariants.  */
1474
  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1475
    {
1476
      /* Moreover, for a range, the size needs to be invariant as well.  */
1477
      if (TREE_CODE (base) == ARRAY_RANGE_REF
1478
          && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1479
        return false;
1480
 
1481
      step = array_ref_element_size (base);
1482
      lbound = array_ref_low_bound (base);
1483
 
1484
      if (!expr_invariant_in_loop_p (loop, step)
1485
          || !expr_invariant_in_loop_p (loop, lbound))
1486
        return false;
1487
    }
1488
 
1489
  if (TREE_CODE (*idx) != SSA_NAME)
1490
    return true;
1491
 
1492
  iv = get_iv (dta->ivopts_data, *idx);
1493
  if (!iv)
1494
    return false;
1495
 
1496
  /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1497
          *&x[0], which is not folded and does not trigger the
1498
          ARRAY_REF path below.  */
1499
  *idx = iv->base;
1500
 
1501
  if (integer_zerop (iv->step))
1502
    return true;
1503
 
1504
  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1505
    {
1506
      step = array_ref_element_size (base);
1507
 
1508
      /* We only handle addresses whose step is an integer constant.  */
1509
      if (TREE_CODE (step) != INTEGER_CST)
1510
        return false;
1511
    }
1512
  else
1513
    /* The step for pointer arithmetics already is 1 byte.  */
1514
    step = size_one_node;
1515
 
1516
  iv_base = iv->base;
1517
  iv_step = iv->step;
1518
  if (!convert_affine_scev (dta->ivopts_data->current_loop,
1519
                            sizetype, &iv_base, &iv_step, dta->stmt,
1520
                            false))
1521
    {
1522
      /* The index might wrap.  */
1523
      return false;
1524
    }
1525
 
1526
  step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1527
  dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1528
 
1529
  return true;
1530
}
1531
 
1532
/* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1533
   object is passed to it in DATA.  */
1534
 
1535
static bool
1536
idx_record_use (tree base, tree *idx,
1537
                void *vdata)
1538
{
1539
  struct ivopts_data *data = (struct ivopts_data *) vdata;
1540
  find_interesting_uses_op (data, *idx);
1541
  if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1542
    {
1543
      find_interesting_uses_op (data, array_ref_element_size (base));
1544
      find_interesting_uses_op (data, array_ref_low_bound (base));
1545
    }
1546
  return true;
1547
}
1548
 
1549
/* If we can prove that TOP = cst * BOT for some constant cst,
1550
   store cst to MUL and return true.  Otherwise return false.
1551
   The returned value is always sign-extended, regardless of the
1552
   signedness of TOP and BOT.  */
1553
 
1554
static bool
1555
constant_multiple_of (tree top, tree bot, double_int *mul)
1556
{
1557
  tree mby;
1558
  enum tree_code code;
1559
  double_int res, p0, p1;
1560
  unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1561
 
1562
  STRIP_NOPS (top);
1563
  STRIP_NOPS (bot);
1564
 
1565
  if (operand_equal_p (top, bot, 0))
1566
    {
1567
      *mul = double_int_one;
1568
      return true;
1569
    }
1570
 
1571
  code = TREE_CODE (top);
1572
  switch (code)
1573
    {
1574
    case MULT_EXPR:
1575
      mby = TREE_OPERAND (top, 1);
1576
      if (TREE_CODE (mby) != INTEGER_CST)
1577
        return false;
1578
 
1579
      if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1580
        return false;
1581
 
1582
      *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1583
                              precision);
1584
      return true;
1585
 
1586
    case PLUS_EXPR:
1587
    case MINUS_EXPR:
1588
      if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1589
          || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1590
        return false;
1591
 
1592
      if (code == MINUS_EXPR)
1593
        p1 = double_int_neg (p1);
1594
      *mul = double_int_sext (double_int_add (p0, p1), precision);
1595
      return true;
1596
 
1597
    case INTEGER_CST:
1598
      if (TREE_CODE (bot) != INTEGER_CST)
1599
        return false;
1600
 
1601
      p0 = double_int_sext (tree_to_double_int (top), precision);
1602
      p1 = double_int_sext (tree_to_double_int (bot), precision);
1603
      if (double_int_zero_p (p1))
1604
        return false;
1605
      *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1606
                              precision);
1607
      return double_int_zero_p (res);
1608
 
1609
    default:
1610
      return false;
1611
    }
1612
}
1613
 
1614
/* Returns true if memory reference REF with step STEP may be unaligned.  */
1615
 
1616
static bool
1617
may_be_unaligned_p (tree ref, tree step)
1618
{
1619
  tree base;
1620
  tree base_type;
1621
  HOST_WIDE_INT bitsize;
1622
  HOST_WIDE_INT bitpos;
1623
  tree toffset;
1624
  enum machine_mode mode;
1625
  int unsignedp, volatilep;
1626
  unsigned base_align;
1627
 
1628
  /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1629
     thus they are not misaligned.  */
1630
  if (TREE_CODE (ref) == TARGET_MEM_REF)
1631
    return false;
1632
 
1633
  /* The test below is basically copy of what expr.c:normal_inner_ref
1634
     does to check whether the object must be loaded by parts when
1635
     STRICT_ALIGNMENT is true.  */
1636
  base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1637
                              &unsignedp, &volatilep, true);
1638
  base_type = TREE_TYPE (base);
1639
  base_align = get_object_alignment (base);
1640
  base_align = MAX (base_align, TYPE_ALIGN (base_type));
1641
 
1642
  if (mode != BLKmode)
1643
    {
1644
      unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1645
 
1646
      if (base_align < mode_align
1647
          || (bitpos % mode_align) != 0
1648
          || (bitpos % BITS_PER_UNIT) != 0)
1649
        return true;
1650
 
1651
      if (toffset
1652
          && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1653
        return true;
1654
 
1655
      if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1656
        return true;
1657
    }
1658
 
1659
  return false;
1660
}
1661
 
1662
/* Return true if EXPR may be non-addressable.   */
1663
 
1664
bool
1665
may_be_nonaddressable_p (tree expr)
1666
{
1667
  switch (TREE_CODE (expr))
1668
    {
1669
    case TARGET_MEM_REF:
1670
      /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1671
         target, thus they are always addressable.  */
1672
      return false;
1673
 
1674
    case COMPONENT_REF:
1675
      return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1676
             || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1677
 
1678
    case VIEW_CONVERT_EXPR:
1679
      /* This kind of view-conversions may wrap non-addressable objects
1680
         and make them look addressable.  After some processing the
1681
         non-addressability may be uncovered again, causing ADDR_EXPRs
1682
         of inappropriate objects to be built.  */
1683
      if (is_gimple_reg (TREE_OPERAND (expr, 0))
1684
          || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1685
        return true;
1686
 
1687
      /* ... fall through ... */
1688
 
1689
    case ARRAY_REF:
1690
    case ARRAY_RANGE_REF:
1691
      return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1692
 
1693
    CASE_CONVERT:
1694
      return true;
1695
 
1696
    default:
1697
      break;
1698
    }
1699
 
1700
  return false;
1701
}
1702
 
1703
/* Finds addresses in *OP_P inside STMT.  */
1704
 
1705
static void
1706
find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1707
{
1708
  tree base = *op_p, step = size_zero_node;
1709
  struct iv *civ;
1710
  struct ifs_ivopts_data ifs_ivopts_data;
1711
 
1712
  /* Do not play with volatile memory references.  A bit too conservative,
1713
     perhaps, but safe.  */
1714
  if (gimple_has_volatile_ops (stmt))
1715
    goto fail;
1716
 
1717
  /* Ignore bitfields for now.  Not really something terribly complicated
1718
     to handle.  TODO.  */
1719
  if (TREE_CODE (base) == BIT_FIELD_REF)
1720
    goto fail;
1721
 
1722
  base = unshare_expr (base);
1723
 
1724
  if (TREE_CODE (base) == TARGET_MEM_REF)
1725
    {
1726
      tree type = build_pointer_type (TREE_TYPE (base));
1727
      tree astep;
1728
 
1729
      if (TMR_BASE (base)
1730
          && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1731
        {
1732
          civ = get_iv (data, TMR_BASE (base));
1733
          if (!civ)
1734
            goto fail;
1735
 
1736
          TMR_BASE (base) = civ->base;
1737
          step = civ->step;
1738
        }
1739
      if (TMR_INDEX2 (base)
1740
          && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1741
        {
1742
          civ = get_iv (data, TMR_INDEX2 (base));
1743
          if (!civ)
1744
            goto fail;
1745
 
1746
          TMR_INDEX2 (base) = civ->base;
1747
          step = civ->step;
1748
        }
1749
      if (TMR_INDEX (base)
1750
          && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1751
        {
1752
          civ = get_iv (data, TMR_INDEX (base));
1753
          if (!civ)
1754
            goto fail;
1755
 
1756
          TMR_INDEX (base) = civ->base;
1757
          astep = civ->step;
1758
 
1759
          if (astep)
1760
            {
1761
              if (TMR_STEP (base))
1762
                astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1763
 
1764
              step = fold_build2 (PLUS_EXPR, type, step, astep);
1765
            }
1766
        }
1767
 
1768
      if (integer_zerop (step))
1769
        goto fail;
1770
      base = tree_mem_ref_addr (type, base);
1771
    }
1772
  else
1773
    {
1774
      ifs_ivopts_data.ivopts_data = data;
1775
      ifs_ivopts_data.stmt = stmt;
1776
      ifs_ivopts_data.step = size_zero_node;
1777
      if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1778
          || integer_zerop (ifs_ivopts_data.step))
1779
        goto fail;
1780
      step = ifs_ivopts_data.step;
1781
 
1782
      /* Check that the base expression is addressable.  This needs
1783
         to be done after substituting bases of IVs into it.  */
1784
      if (may_be_nonaddressable_p (base))
1785
        goto fail;
1786
 
1787
      /* Moreover, on strict alignment platforms, check that it is
1788
         sufficiently aligned.  */
1789
      if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1790
        goto fail;
1791
 
1792
      base = build_fold_addr_expr (base);
1793
 
1794
      /* Substituting bases of IVs into the base expression might
1795
         have caused folding opportunities.  */
1796
      if (TREE_CODE (base) == ADDR_EXPR)
1797
        {
1798
          tree *ref = &TREE_OPERAND (base, 0);
1799
          while (handled_component_p (*ref))
1800
            ref = &TREE_OPERAND (*ref, 0);
1801
          if (TREE_CODE (*ref) == MEM_REF)
1802
            {
1803
              tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1804
                                      TREE_OPERAND (*ref, 0),
1805
                                      TREE_OPERAND (*ref, 1));
1806
              if (tem)
1807
                *ref = tem;
1808
            }
1809
        }
1810
    }
1811
 
1812
  civ = alloc_iv (base, step);
1813
  record_use (data, op_p, civ, stmt, USE_ADDRESS);
1814
  return;
1815
 
1816
fail:
1817
  for_each_index (op_p, idx_record_use, data);
1818
}
1819
 
1820
/* Finds and records invariants used in STMT.  */
1821
 
1822
static void
1823
find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1824
{
1825
  ssa_op_iter iter;
1826
  use_operand_p use_p;
1827
  tree op;
1828
 
1829
  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1830
    {
1831
      op = USE_FROM_PTR (use_p);
1832
      record_invariant (data, op, false);
1833
    }
1834
}
1835
 
1836
/* Finds interesting uses of induction variables in the statement STMT.  */
1837
 
1838
static void
1839
find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1840
{
1841
  struct iv *iv;
1842
  tree op, *lhs, *rhs;
1843
  ssa_op_iter iter;
1844
  use_operand_p use_p;
1845
  enum tree_code code;
1846
 
1847
  find_invariants_stmt (data, stmt);
1848
 
1849
  if (gimple_code (stmt) == GIMPLE_COND)
1850
    {
1851
      find_interesting_uses_cond (data, stmt);
1852
      return;
1853
    }
1854
 
1855
  if (is_gimple_assign (stmt))
1856
    {
1857
      lhs = gimple_assign_lhs_ptr (stmt);
1858
      rhs = gimple_assign_rhs1_ptr (stmt);
1859
 
1860
      if (TREE_CODE (*lhs) == SSA_NAME)
1861
        {
1862
          /* If the statement defines an induction variable, the uses are not
1863
             interesting by themselves.  */
1864
 
1865
          iv = get_iv (data, *lhs);
1866
 
1867
          if (iv && !integer_zerop (iv->step))
1868
            return;
1869
        }
1870
 
1871
      code = gimple_assign_rhs_code (stmt);
1872
      if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1873
          && (REFERENCE_CLASS_P (*rhs)
1874
              || is_gimple_val (*rhs)))
1875
        {
1876
          if (REFERENCE_CLASS_P (*rhs))
1877
            find_interesting_uses_address (data, stmt, rhs);
1878
          else
1879
            find_interesting_uses_op (data, *rhs);
1880
 
1881
          if (REFERENCE_CLASS_P (*lhs))
1882
            find_interesting_uses_address (data, stmt, lhs);
1883
          return;
1884
        }
1885
      else if (TREE_CODE_CLASS (code) == tcc_comparison)
1886
        {
1887
          find_interesting_uses_cond (data, stmt);
1888
          return;
1889
        }
1890
 
1891
      /* TODO -- we should also handle address uses of type
1892
 
1893
         memory = call (whatever);
1894
 
1895
         and
1896
 
1897
         call (memory).  */
1898
    }
1899
 
1900
  if (gimple_code (stmt) == GIMPLE_PHI
1901
      && gimple_bb (stmt) == data->current_loop->header)
1902
    {
1903
      iv = get_iv (data, PHI_RESULT (stmt));
1904
 
1905
      if (iv && !integer_zerop (iv->step))
1906
        return;
1907
    }
1908
 
1909
  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1910
    {
1911
      op = USE_FROM_PTR (use_p);
1912
 
1913
      if (TREE_CODE (op) != SSA_NAME)
1914
        continue;
1915
 
1916
      iv = get_iv (data, op);
1917
      if (!iv)
1918
        continue;
1919
 
1920
      find_interesting_uses_op (data, op);
1921
    }
1922
}
1923
 
1924
/* Finds interesting uses of induction variables outside of loops
1925
   on loop exit edge EXIT.  */
1926
 
1927
static void
1928
find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1929
{
1930
  gimple phi;
1931
  gimple_stmt_iterator psi;
1932
  tree def;
1933
 
1934
  for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1935
    {
1936
      phi = gsi_stmt (psi);
1937
      def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1938
      if (is_gimple_reg (def))
1939
        find_interesting_uses_op (data, def);
1940
    }
1941
}
1942
 
1943
/* Finds uses of the induction variables that are interesting.  */
1944
 
1945
static void
1946
find_interesting_uses (struct ivopts_data *data)
1947
{
1948
  basic_block bb;
1949
  gimple_stmt_iterator bsi;
1950
  basic_block *body = get_loop_body (data->current_loop);
1951
  unsigned i;
1952
  struct version_info *info;
1953
  edge e;
1954
 
1955
  if (dump_file && (dump_flags & TDF_DETAILS))
1956
    fprintf (dump_file, "Uses:\n\n");
1957
 
1958
  for (i = 0; i < data->current_loop->num_nodes; i++)
1959
    {
1960
      edge_iterator ei;
1961
      bb = body[i];
1962
 
1963
      FOR_EACH_EDGE (e, ei, bb->succs)
1964
        if (e->dest != EXIT_BLOCK_PTR
1965
            && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1966
          find_interesting_uses_outside (data, e);
1967
 
1968
      for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1969
        find_interesting_uses_stmt (data, gsi_stmt (bsi));
1970
      for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1971
        if (!is_gimple_debug (gsi_stmt (bsi)))
1972
          find_interesting_uses_stmt (data, gsi_stmt (bsi));
1973
    }
1974
 
1975
  if (dump_file && (dump_flags & TDF_DETAILS))
1976
    {
1977
      bitmap_iterator bi;
1978
 
1979
      fprintf (dump_file, "\n");
1980
 
1981
      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1982
        {
1983
          info = ver_info (data, i);
1984
          if (info->inv_id)
1985
            {
1986
              fprintf (dump_file, "  ");
1987
              print_generic_expr (dump_file, info->name, TDF_SLIM);
1988
              fprintf (dump_file, " is invariant (%d)%s\n",
1989
                       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1990
            }
1991
        }
1992
 
1993
      fprintf (dump_file, "\n");
1994
    }
1995
 
1996
  free (body);
1997
}
1998
 
1999
/* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2000
   is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2001
   we are at the top-level of the processed address.  */
2002
 
2003
static tree
2004
strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2005
                unsigned HOST_WIDE_INT *offset)
2006
{
2007
  tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2008
  enum tree_code code;
2009
  tree type, orig_type = TREE_TYPE (expr);
2010
  unsigned HOST_WIDE_INT off0, off1, st;
2011
  tree orig_expr = expr;
2012
 
2013
  STRIP_NOPS (expr);
2014
 
2015
  type = TREE_TYPE (expr);
2016
  code = TREE_CODE (expr);
2017
  *offset = 0;
2018
 
2019
  switch (code)
2020
    {
2021
    case INTEGER_CST:
2022
      if (!cst_and_fits_in_hwi (expr)
2023
          || integer_zerop (expr))
2024
        return orig_expr;
2025
 
2026
      *offset = int_cst_value (expr);
2027
      return build_int_cst (orig_type, 0);
2028
 
2029
    case POINTER_PLUS_EXPR:
2030
    case PLUS_EXPR:
2031
    case MINUS_EXPR:
2032
      op0 = TREE_OPERAND (expr, 0);
2033
      op1 = TREE_OPERAND (expr, 1);
2034
 
2035
      op0 = strip_offset_1 (op0, false, false, &off0);
2036
      op1 = strip_offset_1 (op1, false, false, &off1);
2037
 
2038
      *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2039
      if (op0 == TREE_OPERAND (expr, 0)
2040
          && op1 == TREE_OPERAND (expr, 1))
2041
        return orig_expr;
2042
 
2043
      if (integer_zerop (op1))
2044
        expr = op0;
2045
      else if (integer_zerop (op0))
2046
        {
2047
          if (code == MINUS_EXPR)
2048
            expr = fold_build1 (NEGATE_EXPR, type, op1);
2049
          else
2050
            expr = op1;
2051
        }
2052
      else
2053
        expr = fold_build2 (code, type, op0, op1);
2054
 
2055
      return fold_convert (orig_type, expr);
2056
 
2057
    case MULT_EXPR:
2058
      op1 = TREE_OPERAND (expr, 1);
2059
      if (!cst_and_fits_in_hwi (op1))
2060
        return orig_expr;
2061
 
2062
      op0 = TREE_OPERAND (expr, 0);
2063
      op0 = strip_offset_1 (op0, false, false, &off0);
2064
      if (op0 == TREE_OPERAND (expr, 0))
2065
        return orig_expr;
2066
 
2067
      *offset = off0 * int_cst_value (op1);
2068
      if (integer_zerop (op0))
2069
        expr = op0;
2070
      else
2071
        expr = fold_build2 (MULT_EXPR, type, op0, op1);
2072
 
2073
      return fold_convert (orig_type, expr);
2074
 
2075
    case ARRAY_REF:
2076
    case ARRAY_RANGE_REF:
2077
      if (!inside_addr)
2078
        return orig_expr;
2079
 
2080
      step = array_ref_element_size (expr);
2081
      if (!cst_and_fits_in_hwi (step))
2082
        break;
2083
 
2084
      st = int_cst_value (step);
2085
      op1 = TREE_OPERAND (expr, 1);
2086
      op1 = strip_offset_1 (op1, false, false, &off1);
2087
      *offset = off1 * st;
2088
 
2089
      if (top_compref
2090
          && integer_zerop (op1))
2091
        {
2092
          /* Strip the component reference completely.  */
2093
          op0 = TREE_OPERAND (expr, 0);
2094
          op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2095
          *offset += off0;
2096
          return op0;
2097
        }
2098
      break;
2099
 
2100
    case COMPONENT_REF:
2101
      if (!inside_addr)
2102
        return orig_expr;
2103
 
2104
      tmp = component_ref_field_offset (expr);
2105
      if (top_compref
2106
          && cst_and_fits_in_hwi (tmp))
2107
        {
2108
          /* Strip the component reference completely.  */
2109
          op0 = TREE_OPERAND (expr, 0);
2110
          op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2111
          *offset = off0 + int_cst_value (tmp);
2112
          return op0;
2113
        }
2114
      break;
2115
 
2116
    case ADDR_EXPR:
2117
      op0 = TREE_OPERAND (expr, 0);
2118
      op0 = strip_offset_1 (op0, true, true, &off0);
2119
      *offset += off0;
2120
 
2121
      if (op0 == TREE_OPERAND (expr, 0))
2122
        return orig_expr;
2123
 
2124
      expr = build_fold_addr_expr (op0);
2125
      return fold_convert (orig_type, expr);
2126
 
2127
    case MEM_REF:
2128
      /* ???  Offset operand?  */
2129
      inside_addr = false;
2130
      break;
2131
 
2132
    default:
2133
      return orig_expr;
2134
    }
2135
 
2136
  /* Default handling of expressions for that we want to recurse into
2137
     the first operand.  */
2138
  op0 = TREE_OPERAND (expr, 0);
2139
  op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2140
  *offset += off0;
2141
 
2142
  if (op0 == TREE_OPERAND (expr, 0)
2143
      && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2144
    return orig_expr;
2145
 
2146
  expr = copy_node (expr);
2147
  TREE_OPERAND (expr, 0) = op0;
2148
  if (op1)
2149
    TREE_OPERAND (expr, 1) = op1;
2150
 
2151
  /* Inside address, we might strip the top level component references,
2152
     thus changing type of the expression.  Handling of ADDR_EXPR
2153
     will fix that.  */
2154
  expr = fold_convert (orig_type, expr);
2155
 
2156
  return expr;
2157
}
2158
 
2159
/* Strips constant offsets from EXPR and stores them to OFFSET.  */
2160
 
2161
static tree
2162
strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2163
{
2164
  return strip_offset_1 (expr, false, false, offset);
2165
}
2166
 
2167
/* Returns variant of TYPE that can be used as base for different uses.
2168
   We return unsigned type with the same precision, which avoids problems
2169
   with overflows.  */
2170
 
2171
static tree
2172
generic_type_for (tree type)
2173
{
2174
  if (POINTER_TYPE_P (type))
2175
    return unsigned_type_for (type);
2176
 
2177
  if (TYPE_UNSIGNED (type))
2178
    return type;
2179
 
2180
  return unsigned_type_for (type);
2181
}
2182
 
2183
/* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2184
   the bitmap to that we should store it.  */
2185
 
2186
static struct ivopts_data *fd_ivopts_data;
2187
static tree
2188
find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2189
{
2190
  bitmap *depends_on = (bitmap *) data;
2191
  struct version_info *info;
2192
 
2193
  if (TREE_CODE (*expr_p) != SSA_NAME)
2194
    return NULL_TREE;
2195
  info = name_info (fd_ivopts_data, *expr_p);
2196
 
2197
  if (!info->inv_id || info->has_nonlin_use)
2198
    return NULL_TREE;
2199
 
2200
  if (!*depends_on)
2201
    *depends_on = BITMAP_ALLOC (NULL);
2202
  bitmap_set_bit (*depends_on, info->inv_id);
2203
 
2204
  return NULL_TREE;
2205
}
2206
 
2207
/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2208
   position to POS.  If USE is not NULL, the candidate is set as related to
2209
   it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2210
   replacement of the final value of the iv by a direct computation.  */
2211
 
2212
static struct iv_cand *
2213
add_candidate_1 (struct ivopts_data *data,
2214
                 tree base, tree step, bool important, enum iv_position pos,
2215
                 struct iv_use *use, gimple incremented_at)
2216
{
2217
  unsigned i;
2218
  struct iv_cand *cand = NULL;
2219
  tree type, orig_type;
2220
 
2221
  /* For non-original variables, make sure their values are computed in a type
2222
     that does not invoke undefined behavior on overflows (since in general,
2223
     we cannot prove that these induction variables are non-wrapping).  */
2224
  if (pos != IP_ORIGINAL)
2225
    {
2226
      orig_type = TREE_TYPE (base);
2227
      type = generic_type_for (orig_type);
2228
      if (type != orig_type)
2229
        {
2230
          base = fold_convert (type, base);
2231
          step = fold_convert (type, step);
2232
        }
2233
    }
2234
 
2235
  for (i = 0; i < n_iv_cands (data); i++)
2236
    {
2237
      cand = iv_cand (data, i);
2238
 
2239
      if (cand->pos != pos)
2240
        continue;
2241
 
2242
      if (cand->incremented_at != incremented_at
2243
          || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2244
              && cand->ainc_use != use))
2245
        continue;
2246
 
2247
      if (!cand->iv)
2248
        {
2249
          if (!base && !step)
2250
            break;
2251
 
2252
          continue;
2253
        }
2254
 
2255
      if (!base && !step)
2256
        continue;
2257
 
2258
      if (operand_equal_p (base, cand->iv->base, 0)
2259
          && operand_equal_p (step, cand->iv->step, 0)
2260
          && (TYPE_PRECISION (TREE_TYPE (base))
2261
              == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2262
        break;
2263
    }
2264
 
2265
  if (i == n_iv_cands (data))
2266
    {
2267
      cand = XCNEW (struct iv_cand);
2268
      cand->id = i;
2269
 
2270
      if (!base && !step)
2271
        cand->iv = NULL;
2272
      else
2273
        cand->iv = alloc_iv (base, step);
2274
 
2275
      cand->pos = pos;
2276
      if (pos != IP_ORIGINAL && cand->iv)
2277
        {
2278
          cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2279
          cand->var_after = cand->var_before;
2280
        }
2281
      cand->important = important;
2282
      cand->incremented_at = incremented_at;
2283
      VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2284
 
2285
      if (step
2286
          && TREE_CODE (step) != INTEGER_CST)
2287
        {
2288
          fd_ivopts_data = data;
2289
          walk_tree (&step, find_depends, &cand->depends_on, NULL);
2290
        }
2291
 
2292
      if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2293
        cand->ainc_use = use;
2294
      else
2295
        cand->ainc_use = NULL;
2296
 
2297
      if (dump_file && (dump_flags & TDF_DETAILS))
2298
        dump_cand (dump_file, cand);
2299
    }
2300
 
2301
  if (important && !cand->important)
2302
    {
2303
      cand->important = true;
2304
      if (dump_file && (dump_flags & TDF_DETAILS))
2305
        fprintf (dump_file, "Candidate %d is important\n", cand->id);
2306
    }
2307
 
2308
  if (use)
2309
    {
2310
      bitmap_set_bit (use->related_cands, i);
2311
      if (dump_file && (dump_flags & TDF_DETAILS))
2312
        fprintf (dump_file, "Candidate %d is related to use %d\n",
2313
                 cand->id, use->id);
2314
    }
2315
 
2316
  return cand;
2317
}
2318
 
2319
/* Returns true if incrementing the induction variable at the end of the LOOP
2320
   is allowed.
2321
 
2322
   The purpose is to avoid splitting latch edge with a biv increment, thus
2323
   creating a jump, possibly confusing other optimization passes and leaving
2324
   less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2325
   is not available (so we do not have a better alternative), or if the latch
2326
   edge is already nonempty.  */
2327
 
2328
static bool
2329
allow_ip_end_pos_p (struct loop *loop)
2330
{
2331
  if (!ip_normal_pos (loop))
2332
    return true;
2333
 
2334
  if (!empty_block_p (ip_end_pos (loop)))
2335
    return true;
2336
 
2337
  return false;
2338
}
2339
 
2340
/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2341
   Important field is set to IMPORTANT.  */
2342
 
2343
static void
2344
add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2345
                        bool important, struct iv_use *use)
2346
{
2347
  basic_block use_bb = gimple_bb (use->stmt);
2348
  enum machine_mode mem_mode;
2349
  unsigned HOST_WIDE_INT cstepi;
2350
 
2351
  /* If we insert the increment in any position other than the standard
2352
     ones, we must ensure that it is incremented once per iteration.
2353
     It must not be in an inner nested loop, or one side of an if
2354
     statement.  */
2355
  if (use_bb->loop_father != data->current_loop
2356
      || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2357
      || stmt_could_throw_p (use->stmt)
2358
      || !cst_and_fits_in_hwi (step))
2359
    return;
2360
 
2361
  cstepi = int_cst_value (step);
2362
 
2363
  mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2364
  if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2365
      || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2366
    {
2367
      enum tree_code code = MINUS_EXPR;
2368
      tree new_base;
2369
      tree new_step = step;
2370
 
2371
      if (POINTER_TYPE_P (TREE_TYPE (base)))
2372
        {
2373
          new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2374
          code = POINTER_PLUS_EXPR;
2375
        }
2376
      else
2377
        new_step = fold_convert (TREE_TYPE (base), new_step);
2378
      new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2379
      add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2380
                       use->stmt);
2381
    }
2382
  if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2383
      || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2384
    {
2385
      add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2386
                       use->stmt);
2387
    }
2388
}
2389
 
2390
/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2391
   position to POS.  If USE is not NULL, the candidate is set as related to
2392
   it.  The candidate computation is scheduled on all available positions.  */
2393
 
2394
static void
2395
add_candidate (struct ivopts_data *data,
2396
               tree base, tree step, bool important, struct iv_use *use)
2397
{
2398
  if (ip_normal_pos (data->current_loop))
2399
    add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2400
  if (ip_end_pos (data->current_loop)
2401
      && allow_ip_end_pos_p (data->current_loop))
2402
    add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2403
 
2404
  if (use != NULL && use->type == USE_ADDRESS)
2405
    add_autoinc_candidates (data, base, step, important, use);
2406
}
2407
 
2408
/* Add a standard "0 + 1 * iteration" iv candidate for a
2409
   type with SIZE bits.  */
2410
 
2411
static void
2412
add_standard_iv_candidates_for_size (struct ivopts_data *data,
2413
                                     unsigned int size)
2414
{
2415
  tree type = lang_hooks.types.type_for_size (size, true);
2416
  add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2417
                 true, NULL);
2418
}
2419
 
2420
/* Adds standard iv candidates.  */
2421
 
2422
static void
2423
add_standard_iv_candidates (struct ivopts_data *data)
2424
{
2425
  add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2426
 
2427
  /* The same for a double-integer type if it is still fast enough.  */
2428
  if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2429
    add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2430
}
2431
 
2432
 
2433
/* Adds candidates bases on the old induction variable IV.  */
2434
 
2435
static void
2436
add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2437
{
2438
  gimple phi;
2439
  tree def;
2440
  struct iv_cand *cand;
2441
 
2442
  add_candidate (data, iv->base, iv->step, true, NULL);
2443
 
2444
  /* The same, but with initial value zero.  */
2445
  if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2446
    add_candidate (data, size_int (0), iv->step, true, NULL);
2447
  else
2448
    add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2449
                   iv->step, true, NULL);
2450
 
2451
  phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2452
  if (gimple_code (phi) == GIMPLE_PHI)
2453
    {
2454
      /* Additionally record the possibility of leaving the original iv
2455
         untouched.  */
2456
      def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2457
      cand = add_candidate_1 (data,
2458
                              iv->base, iv->step, true, IP_ORIGINAL, NULL,
2459
                              SSA_NAME_DEF_STMT (def));
2460
      cand->var_before = iv->ssa_name;
2461
      cand->var_after = def;
2462
    }
2463
}
2464
 
2465
/* Adds candidates based on the old induction variables.  */
2466
 
2467
static void
2468
add_old_ivs_candidates (struct ivopts_data *data)
2469
{
2470
  unsigned i;
2471
  struct iv *iv;
2472
  bitmap_iterator bi;
2473
 
2474
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2475
    {
2476
      iv = ver_info (data, i)->iv;
2477
      if (iv && iv->biv_p && !integer_zerop (iv->step))
2478
        add_old_iv_candidates (data, iv);
2479
    }
2480
}
2481
 
2482
/* Adds candidates based on the value of the induction variable IV and USE.  */
2483
 
2484
static void
2485
add_iv_value_candidates (struct ivopts_data *data,
2486
                         struct iv *iv, struct iv_use *use)
2487
{
2488
  unsigned HOST_WIDE_INT offset;
2489
  tree base;
2490
  tree basetype;
2491
 
2492
  add_candidate (data, iv->base, iv->step, false, use);
2493
 
2494
  /* The same, but with initial value zero.  Make such variable important,
2495
     since it is generic enough so that possibly many uses may be based
2496
     on it.  */
2497
  basetype = TREE_TYPE (iv->base);
2498
  if (POINTER_TYPE_P (basetype))
2499
    basetype = sizetype;
2500
  add_candidate (data, build_int_cst (basetype, 0),
2501
                 iv->step, true, use);
2502
 
2503
  /* Third, try removing the constant offset.  Make sure to even
2504
     add a candidate for &a[0] vs. (T *)&a.  */
2505
  base = strip_offset (iv->base, &offset);
2506
  if (offset
2507
      || base != iv->base)
2508
    add_candidate (data, base, iv->step, false, use);
2509
}
2510
 
2511
/* Adds candidates based on the uses.  */
2512
 
2513
static void
2514
add_derived_ivs_candidates (struct ivopts_data *data)
2515
{
2516
  unsigned i;
2517
 
2518
  for (i = 0; i < n_iv_uses (data); i++)
2519
    {
2520
      struct iv_use *use = iv_use (data, i);
2521
 
2522
      if (!use)
2523
        continue;
2524
 
2525
      switch (use->type)
2526
        {
2527
        case USE_NONLINEAR_EXPR:
2528
        case USE_COMPARE:
2529
        case USE_ADDRESS:
2530
          /* Just add the ivs based on the value of the iv used here.  */
2531
          add_iv_value_candidates (data, use->iv, use);
2532
          break;
2533
 
2534
        default:
2535
          gcc_unreachable ();
2536
        }
2537
    }
2538
}
2539
 
2540
/* Record important candidates and add them to related_cands bitmaps
2541
   if needed.  */
2542
 
2543
static void
2544
record_important_candidates (struct ivopts_data *data)
2545
{
2546
  unsigned i;
2547
  struct iv_use *use;
2548
 
2549
  for (i = 0; i < n_iv_cands (data); i++)
2550
    {
2551
      struct iv_cand *cand = iv_cand (data, i);
2552
 
2553
      if (cand->important)
2554
        bitmap_set_bit (data->important_candidates, i);
2555
    }
2556
 
2557
  data->consider_all_candidates = (n_iv_cands (data)
2558
                                   <= CONSIDER_ALL_CANDIDATES_BOUND);
2559
 
2560
  if (data->consider_all_candidates)
2561
    {
2562
      /* We will not need "related_cands" bitmaps in this case,
2563
         so release them to decrease peak memory consumption.  */
2564
      for (i = 0; i < n_iv_uses (data); i++)
2565
        {
2566
          use = iv_use (data, i);
2567
          BITMAP_FREE (use->related_cands);
2568
        }
2569
    }
2570
  else
2571
    {
2572
      /* Add important candidates to the related_cands bitmaps.  */
2573
      for (i = 0; i < n_iv_uses (data); i++)
2574
        bitmap_ior_into (iv_use (data, i)->related_cands,
2575
                         data->important_candidates);
2576
    }
2577
}
2578
 
2579
/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2580
   If consider_all_candidates is true, we use a two-dimensional array, otherwise
2581
   we allocate a simple list to every use.  */
2582
 
2583
static void
2584
alloc_use_cost_map (struct ivopts_data *data)
2585
{
2586
  unsigned i, size, s, j;
2587
 
2588
  for (i = 0; i < n_iv_uses (data); i++)
2589
    {
2590
      struct iv_use *use = iv_use (data, i);
2591
      bitmap_iterator bi;
2592
 
2593
      if (data->consider_all_candidates)
2594
        size = n_iv_cands (data);
2595
      else
2596
        {
2597
          s = 0;
2598
          EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2599
            {
2600
              s++;
2601
            }
2602
 
2603
          /* Round up to the power of two, so that moduling by it is fast.  */
2604
          for (size = 1; size < s; size <<= 1)
2605
            continue;
2606
        }
2607
 
2608
      use->n_map_members = size;
2609
      use->cost_map = XCNEWVEC (struct cost_pair, size);
2610
    }
2611
}
2612
 
2613
/* Returns description of computation cost of expression whose runtime
2614
   cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2615
 
2616
static comp_cost
2617
new_cost (unsigned runtime, unsigned complexity)
2618
{
2619
  comp_cost cost;
2620
 
2621
  cost.cost = runtime;
2622
  cost.complexity = complexity;
2623
 
2624
  return cost;
2625
}
2626
 
2627
/* Adds costs COST1 and COST2.  */
2628
 
2629
static comp_cost
2630
add_costs (comp_cost cost1, comp_cost cost2)
2631
{
2632
  cost1.cost += cost2.cost;
2633
  cost1.complexity += cost2.complexity;
2634
 
2635
  return cost1;
2636
}
2637
/* Subtracts costs COST1 and COST2.  */
2638
 
2639
static comp_cost
2640
sub_costs (comp_cost cost1, comp_cost cost2)
2641
{
2642
  cost1.cost -= cost2.cost;
2643
  cost1.complexity -= cost2.complexity;
2644
 
2645
  return cost1;
2646
}
2647
 
2648
/* Returns a negative number if COST1 < COST2, a positive number if
2649
   COST1 > COST2, and 0 if COST1 = COST2.  */
2650
 
2651
static int
2652
compare_costs (comp_cost cost1, comp_cost cost2)
2653
{
2654
  if (cost1.cost == cost2.cost)
2655
    return cost1.complexity - cost2.complexity;
2656
 
2657
  return cost1.cost - cost2.cost;
2658
}
2659
 
2660
/* Returns true if COST is infinite.  */
2661
 
2662
static bool
2663
infinite_cost_p (comp_cost cost)
2664
{
2665
  return cost.cost == INFTY;
2666
}
2667
 
2668
/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2669
   on invariants DEPENDS_ON and that the value used in expressing it
2670
   is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2671
 
2672
static void
2673
set_use_iv_cost (struct ivopts_data *data,
2674
                 struct iv_use *use, struct iv_cand *cand,
2675
                 comp_cost cost, bitmap depends_on, tree value,
2676
                 enum tree_code comp, int inv_expr_id)
2677
{
2678
  unsigned i, s;
2679
 
2680
  if (infinite_cost_p (cost))
2681
    {
2682
      BITMAP_FREE (depends_on);
2683
      return;
2684
    }
2685
 
2686
  if (data->consider_all_candidates)
2687
    {
2688
      use->cost_map[cand->id].cand = cand;
2689
      use->cost_map[cand->id].cost = cost;
2690
      use->cost_map[cand->id].depends_on = depends_on;
2691
      use->cost_map[cand->id].value = value;
2692
      use->cost_map[cand->id].comp = comp;
2693
      use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2694
      return;
2695
    }
2696
 
2697
  /* n_map_members is a power of two, so this computes modulo.  */
2698
  s = cand->id & (use->n_map_members - 1);
2699
  for (i = s; i < use->n_map_members; i++)
2700
    if (!use->cost_map[i].cand)
2701
      goto found;
2702
  for (i = 0; i < s; i++)
2703
    if (!use->cost_map[i].cand)
2704
      goto found;
2705
 
2706
  gcc_unreachable ();
2707
 
2708
found:
2709
  use->cost_map[i].cand = cand;
2710
  use->cost_map[i].cost = cost;
2711
  use->cost_map[i].depends_on = depends_on;
2712
  use->cost_map[i].value = value;
2713
  use->cost_map[i].comp = comp;
2714
  use->cost_map[i].inv_expr_id = inv_expr_id;
2715
}
2716
 
2717
/* Gets cost of (USE, CANDIDATE) pair.  */
2718
 
2719
static struct cost_pair *
2720
get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2721
                 struct iv_cand *cand)
2722
{
2723
  unsigned i, s;
2724
  struct cost_pair *ret;
2725
 
2726
  if (!cand)
2727
    return NULL;
2728
 
2729
  if (data->consider_all_candidates)
2730
    {
2731
      ret = use->cost_map + cand->id;
2732
      if (!ret->cand)
2733
        return NULL;
2734
 
2735
      return ret;
2736
    }
2737
 
2738
  /* n_map_members is a power of two, so this computes modulo.  */
2739
  s = cand->id & (use->n_map_members - 1);
2740
  for (i = s; i < use->n_map_members; i++)
2741
    if (use->cost_map[i].cand == cand)
2742
      return use->cost_map + i;
2743
 
2744
  for (i = 0; i < s; i++)
2745
    if (use->cost_map[i].cand == cand)
2746
      return use->cost_map + i;
2747
 
2748
  return NULL;
2749
}
2750
 
2751
/* Returns estimate on cost of computing SEQ.  */
2752
 
2753
static unsigned
2754
seq_cost (rtx seq, bool speed)
2755
{
2756
  unsigned cost = 0;
2757
  rtx set;
2758
 
2759
  for (; seq; seq = NEXT_INSN (seq))
2760
    {
2761
      set = single_set (seq);
2762
      if (set)
2763
        cost += set_src_cost (SET_SRC (set), speed);
2764
      else
2765
        cost++;
2766
    }
2767
 
2768
  return cost;
2769
}
2770
 
2771
/* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2772
static rtx
2773
produce_memory_decl_rtl (tree obj, int *regno)
2774
{
2775
  addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2776
  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2777
  rtx x;
2778
 
2779
  gcc_assert (obj);
2780
  if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2781
    {
2782
      const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2783
      x = gen_rtx_SYMBOL_REF (address_mode, name);
2784
      SET_SYMBOL_REF_DECL (x, obj);
2785
      x = gen_rtx_MEM (DECL_MODE (obj), x);
2786
      set_mem_addr_space (x, as);
2787
      targetm.encode_section_info (obj, x, true);
2788
    }
2789
  else
2790
    {
2791
      x = gen_raw_REG (address_mode, (*regno)++);
2792
      x = gen_rtx_MEM (DECL_MODE (obj), x);
2793
      set_mem_addr_space (x, as);
2794
    }
2795
 
2796
  return x;
2797
}
2798
 
2799
/* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2800
   walk_tree.  DATA contains the actual fake register number.  */
2801
 
2802
static tree
2803
prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2804
{
2805
  tree obj = NULL_TREE;
2806
  rtx x = NULL_RTX;
2807
  int *regno = (int *) data;
2808
 
2809
  switch (TREE_CODE (*expr_p))
2810
    {
2811
    case ADDR_EXPR:
2812
      for (expr_p = &TREE_OPERAND (*expr_p, 0);
2813
           handled_component_p (*expr_p);
2814
           expr_p = &TREE_OPERAND (*expr_p, 0))
2815
        continue;
2816
      obj = *expr_p;
2817
      if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2818
        x = produce_memory_decl_rtl (obj, regno);
2819
      break;
2820
 
2821
    case SSA_NAME:
2822
      *ws = 0;
2823
      obj = SSA_NAME_VAR (*expr_p);
2824
      if (!DECL_RTL_SET_P (obj))
2825
        x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2826
      break;
2827
 
2828
    case VAR_DECL:
2829
    case PARM_DECL:
2830
    case RESULT_DECL:
2831
      *ws = 0;
2832
      obj = *expr_p;
2833
 
2834
      if (DECL_RTL_SET_P (obj))
2835
        break;
2836
 
2837
      if (DECL_MODE (obj) == BLKmode)
2838
        x = produce_memory_decl_rtl (obj, regno);
2839
      else
2840
        x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2841
 
2842
      break;
2843
 
2844
    default:
2845
      break;
2846
    }
2847
 
2848
  if (x)
2849
    {
2850
      VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2851
      SET_DECL_RTL (obj, x);
2852
    }
2853
 
2854
  return NULL_TREE;
2855
}
2856
 
2857
/* Determines cost of the computation of EXPR.  */
2858
 
2859
static unsigned
2860
computation_cost (tree expr, bool speed)
2861
{
2862
  rtx seq, rslt;
2863
  tree type = TREE_TYPE (expr);
2864
  unsigned cost;
2865
  /* Avoid using hard regs in ways which may be unsupported.  */
2866
  int regno = LAST_VIRTUAL_REGISTER + 1;
2867
  struct cgraph_node *node = cgraph_get_node (current_function_decl);
2868
  enum node_frequency real_frequency = node->frequency;
2869
 
2870
  node->frequency = NODE_FREQUENCY_NORMAL;
2871
  crtl->maybe_hot_insn_p = speed;
2872
  walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2873
  start_sequence ();
2874
  rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2875
  seq = get_insns ();
2876
  end_sequence ();
2877
  default_rtl_profile ();
2878
  node->frequency = real_frequency;
2879
 
2880
  cost = seq_cost (seq, speed);
2881
  if (MEM_P (rslt))
2882
    cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2883
                          TYPE_ADDR_SPACE (type), speed);
2884
  else if (!REG_P (rslt))
2885
    cost += set_src_cost (rslt, speed);
2886
 
2887
  return cost;
2888
}
2889
 
2890
/* Returns variable containing the value of candidate CAND at statement AT.  */
2891
 
2892
static tree
2893
var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2894
{
2895
  if (stmt_after_increment (loop, cand, stmt))
2896
    return cand->var_after;
2897
  else
2898
    return cand->var_before;
2899
}
2900
 
2901
/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2902
   same precision that is at least as wide as the precision of TYPE, stores
2903
   BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2904
   type of A and B.  */
2905
 
2906
static tree
2907
determine_common_wider_type (tree *a, tree *b)
2908
{
2909
  tree wider_type = NULL;
2910
  tree suba, subb;
2911
  tree atype = TREE_TYPE (*a);
2912
 
2913
  if (CONVERT_EXPR_P (*a))
2914
    {
2915
      suba = TREE_OPERAND (*a, 0);
2916
      wider_type = TREE_TYPE (suba);
2917
      if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2918
        return atype;
2919
    }
2920
  else
2921
    return atype;
2922
 
2923
  if (CONVERT_EXPR_P (*b))
2924
    {
2925
      subb = TREE_OPERAND (*b, 0);
2926
      if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2927
        return atype;
2928
    }
2929
  else
2930
    return atype;
2931
 
2932
  *a = suba;
2933
  *b = subb;
2934
  return wider_type;
2935
}
2936
 
2937
/* Determines the expression by that USE is expressed from induction variable
2938
   CAND at statement AT in LOOP.  The expression is stored in a decomposed
2939
   form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2940
 
2941
static bool
2942
get_computation_aff (struct loop *loop,
2943
                     struct iv_use *use, struct iv_cand *cand, gimple at,
2944
                     struct affine_tree_combination *aff)
2945
{
2946
  tree ubase = use->iv->base;
2947
  tree ustep = use->iv->step;
2948
  tree cbase = cand->iv->base;
2949
  tree cstep = cand->iv->step, cstep_common;
2950
  tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2951
  tree common_type, var;
2952
  tree uutype;
2953
  aff_tree cbase_aff, var_aff;
2954
  double_int rat;
2955
 
2956
  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2957
    {
2958
      /* We do not have a precision to express the values of use.  */
2959
      return false;
2960
    }
2961
 
2962
  var = var_at_stmt (loop, cand, at);
2963
  uutype = unsigned_type_for (utype);
2964
 
2965
  /* If the conversion is not noop, perform it.  */
2966
  if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2967
    {
2968
      cstep = fold_convert (uutype, cstep);
2969
      cbase = fold_convert (uutype, cbase);
2970
      var = fold_convert (uutype, var);
2971
    }
2972
 
2973
  if (!constant_multiple_of (ustep, cstep, &rat))
2974
    return false;
2975
 
2976
  /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2977
     type, we achieve better folding by computing their difference in this
2978
     wider type, and cast the result to UUTYPE.  We do not need to worry about
2979
     overflows, as all the arithmetics will in the end be performed in UUTYPE
2980
     anyway.  */
2981
  common_type = determine_common_wider_type (&ubase, &cbase);
2982
 
2983
  /* use = ubase - ratio * cbase + ratio * var.  */
2984
  tree_to_aff_combination (ubase, common_type, aff);
2985
  tree_to_aff_combination (cbase, common_type, &cbase_aff);
2986
  tree_to_aff_combination (var, uutype, &var_aff);
2987
 
2988
  /* We need to shift the value if we are after the increment.  */
2989
  if (stmt_after_increment (loop, cand, at))
2990
    {
2991
      aff_tree cstep_aff;
2992
 
2993
      if (common_type != uutype)
2994
        cstep_common = fold_convert (common_type, cstep);
2995
      else
2996
        cstep_common = cstep;
2997
 
2998
      tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2999
      aff_combination_add (&cbase_aff, &cstep_aff);
3000
    }
3001
 
3002
  aff_combination_scale (&cbase_aff, double_int_neg (rat));
3003
  aff_combination_add (aff, &cbase_aff);
3004
  if (common_type != uutype)
3005
    aff_combination_convert (aff, uutype);
3006
 
3007
  aff_combination_scale (&var_aff, rat);
3008
  aff_combination_add (aff, &var_aff);
3009
 
3010
  return true;
3011
}
3012
 
3013
/* Determines the expression by that USE is expressed from induction variable
3014
   CAND at statement AT in LOOP.  The computation is unshared.  */
3015
 
3016
static tree
3017
get_computation_at (struct loop *loop,
3018
                    struct iv_use *use, struct iv_cand *cand, gimple at)
3019
{
3020
  aff_tree aff;
3021
  tree type = TREE_TYPE (use->iv->base);
3022
 
3023
  if (!get_computation_aff (loop, use, cand, at, &aff))
3024
    return NULL_TREE;
3025
  unshare_aff_combination (&aff);
3026
  return fold_convert (type, aff_combination_to_tree (&aff));
3027
}
3028
 
3029
/* Determines the expression by that USE is expressed from induction variable
3030
   CAND in LOOP.  The computation is unshared.  */
3031
 
3032
static tree
3033
get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3034
{
3035
  return get_computation_at (loop, use, cand, use->stmt);
3036
}
3037
 
3038
/* Adjust the cost COST for being in loop setup rather than loop body.
3039
   If we're optimizing for space, the loop setup overhead is constant;
3040
   if we're optimizing for speed, amortize it over the per-iteration cost.  */
3041
static unsigned
3042
adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3043
{
3044
  if (cost == INFTY)
3045
    return cost;
3046
  else if (optimize_loop_for_speed_p (data->current_loop))
3047
    return cost / avg_loop_niter (data->current_loop);
3048
  else
3049
    return cost;
3050
}
3051
 
3052
/* Returns cost of addition in MODE.  */
3053
 
3054
static unsigned
3055
add_cost (enum machine_mode mode, bool speed)
3056
{
3057
  static unsigned costs[NUM_MACHINE_MODES];
3058
  rtx seq;
3059
  unsigned cost;
3060
 
3061
  if (costs[mode])
3062
    return costs[mode];
3063
 
3064
  start_sequence ();
3065
  force_operand (gen_rtx_fmt_ee (PLUS, mode,
3066
                                 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3067
                                 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3068
                 NULL_RTX);
3069
  seq = get_insns ();
3070
  end_sequence ();
3071
 
3072
  cost = seq_cost (seq, speed);
3073
  if (!cost)
3074
    cost = 1;
3075
 
3076
  costs[mode] = cost;
3077
 
3078
  if (dump_file && (dump_flags & TDF_DETAILS))
3079
    fprintf (dump_file, "Addition in %s costs %d\n",
3080
             GET_MODE_NAME (mode), cost);
3081
  return cost;
3082
}
3083
 
3084
/* Entry in a hashtable of already known costs for multiplication.  */
3085
struct mbc_entry
3086
{
3087
  HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3088
  enum machine_mode mode;       /* In mode.  */
3089
  unsigned cost;                /* The cost.  */
3090
};
3091
 
3092
/* Counts hash value for the ENTRY.  */
3093
 
3094
static hashval_t
3095
mbc_entry_hash (const void *entry)
3096
{
3097
  const struct mbc_entry *e = (const struct mbc_entry *) entry;
3098
 
3099
  return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3100
}
3101
 
3102
/* Compares the hash table entries ENTRY1 and ENTRY2.  */
3103
 
3104
static int
3105
mbc_entry_eq (const void *entry1, const void *entry2)
3106
{
3107
  const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3108
  const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3109
 
3110
  return (e1->mode == e2->mode
3111
          && e1->cst == e2->cst);
3112
}
3113
 
3114
/* Returns cost of multiplication by constant CST in MODE.  */
3115
 
3116
unsigned
3117
multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3118
{
3119
  static htab_t costs;
3120
  struct mbc_entry **cached, act;
3121
  rtx seq;
3122
  unsigned cost;
3123
 
3124
  if (!costs)
3125
    costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3126
 
3127
  act.mode = mode;
3128
  act.cst = cst;
3129
  cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3130
  if (*cached)
3131
    return (*cached)->cost;
3132
 
3133
  *cached = XNEW (struct mbc_entry);
3134
  (*cached)->mode = mode;
3135
  (*cached)->cst = cst;
3136
 
3137
  start_sequence ();
3138
  expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3139
               gen_int_mode (cst, mode), NULL_RTX, 0);
3140
  seq = get_insns ();
3141
  end_sequence ();
3142
 
3143
  cost = seq_cost (seq, speed);
3144
 
3145
  if (dump_file && (dump_flags & TDF_DETAILS))
3146
    fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3147
             (int) cst, GET_MODE_NAME (mode), cost);
3148
 
3149
  (*cached)->cost = cost;
3150
 
3151
  return cost;
3152
}
3153
 
3154
/* Returns true if multiplying by RATIO is allowed in an address.  Test the
3155
   validity for a memory reference accessing memory of mode MODE in
3156
   address space AS.  */
3157
 
3158
DEF_VEC_P (sbitmap);
3159
DEF_VEC_ALLOC_P (sbitmap, heap);
3160
 
3161
bool
3162
multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3163
                                 addr_space_t as)
3164
{
3165
#define MAX_RATIO 128
3166
  unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3167
  static VEC (sbitmap, heap) *valid_mult_list;
3168
  sbitmap valid_mult;
3169
 
3170
  if (data_index >= VEC_length (sbitmap, valid_mult_list))
3171
    VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3172
 
3173
  valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3174
  if (!valid_mult)
3175
    {
3176
      enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3177
      rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3178
      rtx addr;
3179
      HOST_WIDE_INT i;
3180
 
3181
      valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3182
      sbitmap_zero (valid_mult);
3183
      addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3184
      for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3185
        {
3186
          XEXP (addr, 1) = gen_int_mode (i, address_mode);
3187
          if (memory_address_addr_space_p (mode, addr, as))
3188
            SET_BIT (valid_mult, i + MAX_RATIO);
3189
        }
3190
 
3191
      if (dump_file && (dump_flags & TDF_DETAILS))
3192
        {
3193
          fprintf (dump_file, "  allowed multipliers:");
3194
          for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3195
            if (TEST_BIT (valid_mult, i + MAX_RATIO))
3196
              fprintf (dump_file, " %d", (int) i);
3197
          fprintf (dump_file, "\n");
3198
          fprintf (dump_file, "\n");
3199
        }
3200
 
3201
      VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3202
    }
3203
 
3204
  if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3205
    return false;
3206
 
3207
  return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3208
}
3209
 
3210
/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3211
   If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3212
   variable is omitted.  Compute the cost for a memory reference that accesses
3213
   a memory location of mode MEM_MODE in address space AS.
3214
 
3215
   MAY_AUTOINC is set to true if the autoincrement (increasing index by
3216
   size of MEM_MODE / RATIO) is available.  To make this determination, we
3217
   look at the size of the increment to be made, which is given in CSTEP.
3218
   CSTEP may be zero if the step is unknown.
3219
   STMT_AFTER_INC is true iff the statement we're looking at is after the
3220
   increment of the original biv.
3221
 
3222
   TODO -- there must be some better way.  This all is quite crude.  */
3223
 
3224
typedef struct
3225
{
3226
  HOST_WIDE_INT min_offset, max_offset;
3227
  unsigned costs[2][2][2][2];
3228
} *address_cost_data;
3229
 
3230
DEF_VEC_P (address_cost_data);
3231
DEF_VEC_ALLOC_P (address_cost_data, heap);
3232
 
3233
static comp_cost
3234
get_address_cost (bool symbol_present, bool var_present,
3235
                  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3236
                  HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3237
                  addr_space_t as, bool speed,
3238
                  bool stmt_after_inc, bool *may_autoinc)
3239
{
3240
  enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3241
  static VEC(address_cost_data, heap) *address_cost_data_list;
3242
  unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3243
  address_cost_data data;
3244
  static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3245
  static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3246
  unsigned cost, acost, complexity;
3247
  bool offset_p, ratio_p, autoinc;
3248
  HOST_WIDE_INT s_offset, autoinc_offset, msize;
3249
  unsigned HOST_WIDE_INT mask;
3250
  unsigned bits;
3251
 
3252
  if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3253
    VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3254
                           data_index + 1);
3255
 
3256
  data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3257
  if (!data)
3258
    {
3259
      HOST_WIDE_INT i;
3260
      HOST_WIDE_INT rat, off = 0;
3261
      int old_cse_not_expected, width;
3262
      unsigned sym_p, var_p, off_p, rat_p, add_c;
3263
      rtx seq, addr, base;
3264
      rtx reg0, reg1;
3265
 
3266
      data = (address_cost_data) xcalloc (1, sizeof (*data));
3267
 
3268
      reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3269
 
3270
      width = GET_MODE_BITSIZE (address_mode) - 1;
3271
      if (width > (HOST_BITS_PER_WIDE_INT - 1))
3272
        width = HOST_BITS_PER_WIDE_INT - 1;
3273
      addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3274
 
3275
      for (i = width; i >= 0; i--)
3276
        {
3277
          off = -((HOST_WIDE_INT) 1 << i);
3278
          XEXP (addr, 1) = gen_int_mode (off, address_mode);
3279
          if (memory_address_addr_space_p (mem_mode, addr, as))
3280
            break;
3281
        }
3282
      data->min_offset = (i == -1? 0 : off);
3283
 
3284
      for (i = width; i >= 0; i--)
3285
        {
3286
          off = ((HOST_WIDE_INT) 1 << i) - 1;
3287
          XEXP (addr, 1) = gen_int_mode (off, address_mode);
3288
          if (memory_address_addr_space_p (mem_mode, addr, as))
3289
            break;
3290
        }
3291
      if (i == -1)
3292
        off = 0;
3293
      data->max_offset = off;
3294
 
3295
      if (dump_file && (dump_flags & TDF_DETAILS))
3296
        {
3297
          fprintf (dump_file, "get_address_cost:\n");
3298
          fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3299
                   GET_MODE_NAME (mem_mode),
3300
                   data->min_offset);
3301
          fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3302
                   GET_MODE_NAME (mem_mode),
3303
                   data->max_offset);
3304
        }
3305
 
3306
      rat = 1;
3307
      for (i = 2; i <= MAX_RATIO; i++)
3308
        if (multiplier_allowed_in_address_p (i, mem_mode, as))
3309
          {
3310
            rat = i;
3311
            break;
3312
          }
3313
 
3314
      /* Compute the cost of various addressing modes.  */
3315
      acost = 0;
3316
      reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3317
      reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3318
 
3319
      if (HAVE_PRE_DECREMENT)
3320
        {
3321
          addr = gen_rtx_PRE_DEC (address_mode, reg0);
3322
          has_predec[mem_mode]
3323
            = memory_address_addr_space_p (mem_mode, addr, as);
3324
        }
3325
      if (HAVE_POST_DECREMENT)
3326
        {
3327
          addr = gen_rtx_POST_DEC (address_mode, reg0);
3328
          has_postdec[mem_mode]
3329
            = memory_address_addr_space_p (mem_mode, addr, as);
3330
        }
3331
      if (HAVE_PRE_INCREMENT)
3332
        {
3333
          addr = gen_rtx_PRE_INC (address_mode, reg0);
3334
          has_preinc[mem_mode]
3335
            = memory_address_addr_space_p (mem_mode, addr, as);
3336
        }
3337
      if (HAVE_POST_INCREMENT)
3338
        {
3339
          addr = gen_rtx_POST_INC (address_mode, reg0);
3340
          has_postinc[mem_mode]
3341
            = memory_address_addr_space_p (mem_mode, addr, as);
3342
        }
3343
      for (i = 0; i < 16; i++)
3344
        {
3345
          sym_p = i & 1;
3346
          var_p = (i >> 1) & 1;
3347
          off_p = (i >> 2) & 1;
3348
          rat_p = (i >> 3) & 1;
3349
 
3350
          addr = reg0;
3351
          if (rat_p)
3352
            addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3353
                                   gen_int_mode (rat, address_mode));
3354
 
3355
          if (var_p)
3356
            addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3357
 
3358
          if (sym_p)
3359
            {
3360
              base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3361
              /* ??? We can run into trouble with some backends by presenting
3362
                 it with symbols which haven't been properly passed through
3363
                 targetm.encode_section_info.  By setting the local bit, we
3364
                 enhance the probability of things working.  */
3365
              SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3366
 
3367
              if (off_p)
3368
                base = gen_rtx_fmt_e (CONST, address_mode,
3369
                                      gen_rtx_fmt_ee
3370
                                        (PLUS, address_mode, base,
3371
                                         gen_int_mode (off, address_mode)));
3372
            }
3373
          else if (off_p)
3374
            base = gen_int_mode (off, address_mode);
3375
          else
3376
            base = NULL_RTX;
3377
 
3378
          if (base)
3379
            addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3380
 
3381
          start_sequence ();
3382
          /* To avoid splitting addressing modes, pretend that no cse will
3383
             follow.  */
3384
          old_cse_not_expected = cse_not_expected;
3385
          cse_not_expected = true;
3386
          addr = memory_address_addr_space (mem_mode, addr, as);
3387
          cse_not_expected = old_cse_not_expected;
3388
          seq = get_insns ();
3389
          end_sequence ();
3390
 
3391
          acost = seq_cost (seq, speed);
3392
          acost += address_cost (addr, mem_mode, as, speed);
3393
 
3394
          if (!acost)
3395
            acost = 1;
3396
          data->costs[sym_p][var_p][off_p][rat_p] = acost;
3397
        }
3398
 
3399
      /* On some targets, it is quite expensive to load symbol to a register,
3400
         which makes addresses that contain symbols look much more expensive.
3401
         However, the symbol will have to be loaded in any case before the
3402
         loop (and quite likely we have it in register already), so it does not
3403
         make much sense to penalize them too heavily.  So make some final
3404
         tweaks for the SYMBOL_PRESENT modes:
3405
 
3406
         If VAR_PRESENT is false, and the mode obtained by changing symbol to
3407
         var is cheaper, use this mode with small penalty.
3408
         If VAR_PRESENT is true, try whether the mode with
3409
         SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3410
         if this is the case, use it.  */
3411
      add_c = add_cost (address_mode, speed);
3412
      for (i = 0; i < 8; i++)
3413
        {
3414
          var_p = i & 1;
3415
          off_p = (i >> 1) & 1;
3416
          rat_p = (i >> 2) & 1;
3417
 
3418
          acost = data->costs[0][1][off_p][rat_p] + 1;
3419
          if (var_p)
3420
            acost += add_c;
3421
 
3422
          if (acost < data->costs[1][var_p][off_p][rat_p])
3423
            data->costs[1][var_p][off_p][rat_p] = acost;
3424
        }
3425
 
3426
      if (dump_file && (dump_flags & TDF_DETAILS))
3427
        {
3428
          fprintf (dump_file, "Address costs:\n");
3429
 
3430
          for (i = 0; i < 16; i++)
3431
            {
3432
              sym_p = i & 1;
3433
              var_p = (i >> 1) & 1;
3434
              off_p = (i >> 2) & 1;
3435
              rat_p = (i >> 3) & 1;
3436
 
3437
              fprintf (dump_file, "  ");
3438
              if (sym_p)
3439
                fprintf (dump_file, "sym + ");
3440
              if (var_p)
3441
                fprintf (dump_file, "var + ");
3442
              if (off_p)
3443
                fprintf (dump_file, "cst + ");
3444
              if (rat_p)
3445
                fprintf (dump_file, "rat * ");
3446
 
3447
              acost = data->costs[sym_p][var_p][off_p][rat_p];
3448
              fprintf (dump_file, "index costs %d\n", acost);
3449
            }
3450
          if (has_predec[mem_mode] || has_postdec[mem_mode]
3451
              || has_preinc[mem_mode] || has_postinc[mem_mode])
3452
            fprintf (dump_file, "  May include autoinc/dec\n");
3453
          fprintf (dump_file, "\n");
3454
        }
3455
 
3456
      VEC_replace (address_cost_data, address_cost_data_list,
3457
                   data_index, data);
3458
    }
3459
 
3460
  bits = GET_MODE_BITSIZE (address_mode);
3461
  mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3462
  offset &= mask;
3463
  if ((offset >> (bits - 1) & 1))
3464
    offset |= ~mask;
3465
  s_offset = offset;
3466
 
3467
  autoinc = false;
3468
  msize = GET_MODE_SIZE (mem_mode);
3469
  autoinc_offset = offset;
3470
  if (stmt_after_inc)
3471
    autoinc_offset += ratio * cstep;
3472
  if (symbol_present || var_present || ratio != 1)
3473
    autoinc = false;
3474
  else if ((has_postinc[mem_mode] && autoinc_offset == 0
3475
               && msize == cstep)
3476
           || (has_postdec[mem_mode] && autoinc_offset == 0
3477
               && msize == -cstep)
3478
           || (has_preinc[mem_mode] && autoinc_offset == msize
3479
               && msize == cstep)
3480
           || (has_predec[mem_mode] && autoinc_offset == -msize
3481
               && msize == -cstep))
3482
    autoinc = true;
3483
 
3484
  cost = 0;
3485
  offset_p = (s_offset != 0
3486
              && data->min_offset <= s_offset
3487
              && s_offset <= data->max_offset);
3488
  ratio_p = (ratio != 1
3489
             && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3490
 
3491
  if (ratio != 1 && !ratio_p)
3492
    cost += multiply_by_cost (ratio, address_mode, speed);
3493
 
3494
  if (s_offset && !offset_p && !symbol_present)
3495
    cost += add_cost (address_mode, speed);
3496
 
3497
  if (may_autoinc)
3498
    *may_autoinc = autoinc;
3499
  acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3500
  complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3501
  return new_cost (cost + acost, complexity);
3502
}
3503
 
3504
 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3505
    the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3506
    calculating the operands of EXPR.  Returns true if successful, and returns
3507
    the cost in COST.  */
3508
 
3509
static bool
3510
get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3511
                   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3512
{
3513
  comp_cost res;
3514
  tree op1 = TREE_OPERAND (expr, 1);
3515
  tree cst = TREE_OPERAND (mult, 1);
3516
  tree multop = TREE_OPERAND (mult, 0);
3517
  int m = exact_log2 (int_cst_value (cst));
3518
  int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3519
  int sa_cost;
3520
 
3521
  if (!(m >= 0 && m < maxm))
3522
    return false;
3523
 
3524
  sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3525
             ? shiftadd_cost[speed][mode][m]
3526
             : (mult == op1
3527
                ? shiftsub1_cost[speed][mode][m]
3528
                : shiftsub0_cost[speed][mode][m]));
3529
  res = new_cost (sa_cost, 0);
3530
  res = add_costs (res, mult == op1 ? cost0 : cost1);
3531
 
3532
  STRIP_NOPS (multop);
3533
  if (!is_gimple_val (multop))
3534
    res = add_costs (res, force_expr_to_var_cost (multop, speed));
3535
 
3536
  *cost = res;
3537
  return true;
3538
}
3539
 
3540
/* Estimates cost of forcing expression EXPR into a variable.  */
3541
 
3542
static comp_cost
3543
force_expr_to_var_cost (tree expr, bool speed)
3544
{
3545
  static bool costs_initialized = false;
3546
  static unsigned integer_cost [2];
3547
  static unsigned symbol_cost [2];
3548
  static unsigned address_cost [2];
3549
  tree op0, op1;
3550
  comp_cost cost0, cost1, cost;
3551
  enum machine_mode mode;
3552
 
3553
  if (!costs_initialized)
3554
    {
3555
      tree type = build_pointer_type (integer_type_node);
3556
      tree var, addr;
3557
      rtx x;
3558
      int i;
3559
 
3560
      var = create_tmp_var_raw (integer_type_node, "test_var");
3561
      TREE_STATIC (var) = 1;
3562
      x = produce_memory_decl_rtl (var, NULL);
3563
      SET_DECL_RTL (var, x);
3564
 
3565
      addr = build1 (ADDR_EXPR, type, var);
3566
 
3567
 
3568
      for (i = 0; i < 2; i++)
3569
        {
3570
          integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3571
                                                             2000), i);
3572
 
3573
          symbol_cost[i] = computation_cost (addr, i) + 1;
3574
 
3575
          address_cost[i]
3576
            = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3577
          if (dump_file && (dump_flags & TDF_DETAILS))
3578
            {
3579
              fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3580
              fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3581
              fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3582
              fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3583
              fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3584
              fprintf (dump_file, "\n");
3585
            }
3586
        }
3587
 
3588
      costs_initialized = true;
3589
    }
3590
 
3591
  STRIP_NOPS (expr);
3592
 
3593
  if (SSA_VAR_P (expr))
3594
    return zero_cost;
3595
 
3596
  if (is_gimple_min_invariant (expr))
3597
    {
3598
      if (TREE_CODE (expr) == INTEGER_CST)
3599
        return new_cost (integer_cost [speed], 0);
3600
 
3601
      if (TREE_CODE (expr) == ADDR_EXPR)
3602
        {
3603
          tree obj = TREE_OPERAND (expr, 0);
3604
 
3605
          if (TREE_CODE (obj) == VAR_DECL
3606
              || TREE_CODE (obj) == PARM_DECL
3607
              || TREE_CODE (obj) == RESULT_DECL)
3608
            return new_cost (symbol_cost [speed], 0);
3609
        }
3610
 
3611
      return new_cost (address_cost [speed], 0);
3612
    }
3613
 
3614
  switch (TREE_CODE (expr))
3615
    {
3616
    case POINTER_PLUS_EXPR:
3617
    case PLUS_EXPR:
3618
    case MINUS_EXPR:
3619
    case MULT_EXPR:
3620
      op0 = TREE_OPERAND (expr, 0);
3621
      op1 = TREE_OPERAND (expr, 1);
3622
      STRIP_NOPS (op0);
3623
      STRIP_NOPS (op1);
3624
 
3625
      if (is_gimple_val (op0))
3626
        cost0 = zero_cost;
3627
      else
3628
        cost0 = force_expr_to_var_cost (op0, speed);
3629
 
3630
      if (is_gimple_val (op1))
3631
        cost1 = zero_cost;
3632
      else
3633
        cost1 = force_expr_to_var_cost (op1, speed);
3634
 
3635
      break;
3636
 
3637
    case NEGATE_EXPR:
3638
      op0 = TREE_OPERAND (expr, 0);
3639
      STRIP_NOPS (op0);
3640
      op1 = NULL_TREE;
3641
 
3642
      if (is_gimple_val (op0))
3643
        cost0 = zero_cost;
3644
      else
3645
        cost0 = force_expr_to_var_cost (op0, speed);
3646
 
3647
      cost1 = zero_cost;
3648
      break;
3649
 
3650
    default:
3651
      /* Just an arbitrary value, FIXME.  */
3652
      return new_cost (target_spill_cost[speed], 0);
3653
    }
3654
 
3655
  mode = TYPE_MODE (TREE_TYPE (expr));
3656
  switch (TREE_CODE (expr))
3657
    {
3658
    case POINTER_PLUS_EXPR:
3659
    case PLUS_EXPR:
3660
    case MINUS_EXPR:
3661
    case NEGATE_EXPR:
3662
      cost = new_cost (add_cost (mode, speed), 0);
3663
      if (TREE_CODE (expr) != NEGATE_EXPR)
3664
        {
3665
          tree mult = NULL_TREE;
3666
          comp_cost sa_cost;
3667
          if (TREE_CODE (op1) == MULT_EXPR)
3668
            mult = op1;
3669
          else if (TREE_CODE (op0) == MULT_EXPR)
3670
            mult = op0;
3671
 
3672
          if (mult != NULL_TREE
3673
              && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3674
              && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3675
                                    &sa_cost))
3676
            return sa_cost;
3677
        }
3678
      break;
3679
 
3680
    case MULT_EXPR:
3681
      if (cst_and_fits_in_hwi (op0))
3682
        cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3683
      else if (cst_and_fits_in_hwi (op1))
3684
        cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3685
      else
3686
        return new_cost (target_spill_cost [speed], 0);
3687
      break;
3688
 
3689
    default:
3690
      gcc_unreachable ();
3691
    }
3692
 
3693
  cost = add_costs (cost, cost0);
3694
  cost = add_costs (cost, cost1);
3695
 
3696
  /* Bound the cost by target_spill_cost.  The parts of complicated
3697
     computations often are either loop invariant or at least can
3698
     be shared between several iv uses, so letting this grow without
3699
     limits would not give reasonable results.  */
3700
  if (cost.cost > (int) target_spill_cost [speed])
3701
    cost.cost = target_spill_cost [speed];
3702
 
3703
  return cost;
3704
}
3705
 
3706
/* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3707
   invariants the computation depends on.  */
3708
 
3709
static comp_cost
3710
force_var_cost (struct ivopts_data *data,
3711
                tree expr, bitmap *depends_on)
3712
{
3713
  if (depends_on)
3714
    {
3715
      fd_ivopts_data = data;
3716
      walk_tree (&expr, find_depends, depends_on, NULL);
3717
    }
3718
 
3719
  return force_expr_to_var_cost (expr, data->speed);
3720
}
3721
 
3722
/* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3723
   value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3724
   to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3725
   invariants the computation depends on.  */
3726
 
3727
static comp_cost
3728
split_address_cost (struct ivopts_data *data,
3729
                    tree addr, bool *symbol_present, bool *var_present,
3730
                    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3731
{
3732
  tree core;
3733
  HOST_WIDE_INT bitsize;
3734
  HOST_WIDE_INT bitpos;
3735
  tree toffset;
3736
  enum machine_mode mode;
3737
  int unsignedp, volatilep;
3738
 
3739
  core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3740
                              &unsignedp, &volatilep, false);
3741
 
3742
  if (toffset != 0
3743
      || bitpos % BITS_PER_UNIT != 0
3744
      || TREE_CODE (core) != VAR_DECL)
3745
    {
3746
      *symbol_present = false;
3747
      *var_present = true;
3748
      fd_ivopts_data = data;
3749
      walk_tree (&addr, find_depends, depends_on, NULL);
3750
      return new_cost (target_spill_cost[data->speed], 0);
3751
    }
3752
 
3753
  *offset += bitpos / BITS_PER_UNIT;
3754
  if (TREE_STATIC (core)
3755
      || DECL_EXTERNAL (core))
3756
    {
3757
      *symbol_present = true;
3758
      *var_present = false;
3759
      return zero_cost;
3760
    }
3761
 
3762
  *symbol_present = false;
3763
  *var_present = true;
3764
  return zero_cost;
3765
}
3766
 
3767
/* Estimates cost of expressing difference of addresses E1 - E2 as
3768
   var + symbol + offset.  The value of offset is added to OFFSET,
3769
   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3770
   part is missing.  DEPENDS_ON is a set of the invariants the computation
3771
   depends on.  */
3772
 
3773
static comp_cost
3774
ptr_difference_cost (struct ivopts_data *data,
3775
                     tree e1, tree e2, bool *symbol_present, bool *var_present,
3776
                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3777
{
3778
  HOST_WIDE_INT diff = 0;
3779
  aff_tree aff_e1, aff_e2;
3780
  tree type;
3781
 
3782
  gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3783
 
3784
  if (ptr_difference_const (e1, e2, &diff))
3785
    {
3786
      *offset += diff;
3787
      *symbol_present = false;
3788
      *var_present = false;
3789
      return zero_cost;
3790
    }
3791
 
3792
  if (integer_zerop (e2))
3793
    return split_address_cost (data, TREE_OPERAND (e1, 0),
3794
                               symbol_present, var_present, offset, depends_on);
3795
 
3796
  *symbol_present = false;
3797
  *var_present = true;
3798
 
3799
  type = signed_type_for (TREE_TYPE (e1));
3800
  tree_to_aff_combination (e1, type, &aff_e1);
3801
  tree_to_aff_combination (e2, type, &aff_e2);
3802
  aff_combination_scale (&aff_e2, double_int_minus_one);
3803
  aff_combination_add (&aff_e1, &aff_e2);
3804
 
3805
  return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3806
}
3807
 
3808
/* Estimates cost of expressing difference E1 - E2 as
3809
   var + symbol + offset.  The value of offset is added to OFFSET,
3810
   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3811
   part is missing.  DEPENDS_ON is a set of the invariants the computation
3812
   depends on.  */
3813
 
3814
static comp_cost
3815
difference_cost (struct ivopts_data *data,
3816
                 tree e1, tree e2, bool *symbol_present, bool *var_present,
3817
                 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3818
{
3819
  enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3820
  unsigned HOST_WIDE_INT off1, off2;
3821
  aff_tree aff_e1, aff_e2;
3822
  tree type;
3823
 
3824
  e1 = strip_offset (e1, &off1);
3825
  e2 = strip_offset (e2, &off2);
3826
  *offset += off1 - off2;
3827
 
3828
  STRIP_NOPS (e1);
3829
  STRIP_NOPS (e2);
3830
 
3831
  if (TREE_CODE (e1) == ADDR_EXPR)
3832
    return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3833
                                offset, depends_on);
3834
  *symbol_present = false;
3835
 
3836
  if (operand_equal_p (e1, e2, 0))
3837
    {
3838
      *var_present = false;
3839
      return zero_cost;
3840
    }
3841
 
3842
  *var_present = true;
3843
 
3844
  if (integer_zerop (e2))
3845
    return force_var_cost (data, e1, depends_on);
3846
 
3847
  if (integer_zerop (e1))
3848
    {
3849
      comp_cost cost = force_var_cost (data, e2, depends_on);
3850
      cost.cost += multiply_by_cost (-1, mode, data->speed);
3851
      return cost;
3852
    }
3853
 
3854
  type = signed_type_for (TREE_TYPE (e1));
3855
  tree_to_aff_combination (e1, type, &aff_e1);
3856
  tree_to_aff_combination (e2, type, &aff_e2);
3857
  aff_combination_scale (&aff_e2, double_int_minus_one);
3858
  aff_combination_add (&aff_e1, &aff_e2);
3859
 
3860
  return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3861
}
3862
 
3863
/* Returns true if AFF1 and AFF2 are identical.  */
3864
 
3865
static bool
3866
compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3867
{
3868
  unsigned i;
3869
 
3870
  if (aff1->n != aff2->n)
3871
    return false;
3872
 
3873
  for (i = 0; i < aff1->n; i++)
3874
    {
3875
      if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3876
        return false;
3877
 
3878
      if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3879
        return false;
3880
    }
3881
  return true;
3882
}
3883
 
3884
/* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3885
 
3886
static int
3887
get_expr_id (struct ivopts_data *data, tree expr)
3888
{
3889
  struct iv_inv_expr_ent ent;
3890
  struct iv_inv_expr_ent **slot;
3891
 
3892
  ent.expr = expr;
3893
  ent.hash = iterative_hash_expr (expr, 0);
3894
  slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3895
                                                     &ent, INSERT);
3896
  if (*slot)
3897
    return (*slot)->id;
3898
 
3899
  *slot = XNEW (struct iv_inv_expr_ent);
3900
  (*slot)->expr = expr;
3901
  (*slot)->hash = ent.hash;
3902
  (*slot)->id = data->inv_expr_id++;
3903
  return (*slot)->id;
3904
}
3905
 
3906
/* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3907
   requires a new compiler generated temporary.  Returns -1 otherwise.
3908
   ADDRESS_P is a flag indicating if the expression is for address
3909
   computation.  */
3910
 
3911
static int
3912
get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3913
                            tree cbase, HOST_WIDE_INT ratio,
3914
                            bool address_p)
3915
{
3916
  aff_tree ubase_aff, cbase_aff;
3917
  tree expr, ub, cb;
3918
 
3919
  STRIP_NOPS (ubase);
3920
  STRIP_NOPS (cbase);
3921
  ub = ubase;
3922
  cb = cbase;
3923
 
3924
  if ((TREE_CODE (ubase) == INTEGER_CST)
3925
      && (TREE_CODE (cbase) == INTEGER_CST))
3926
    return -1;
3927
 
3928
  /* Strips the constant part. */
3929
  if (TREE_CODE (ubase) == PLUS_EXPR
3930
      || TREE_CODE (ubase) == MINUS_EXPR
3931
      || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3932
    {
3933
      if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3934
        ubase = TREE_OPERAND (ubase, 0);
3935
    }
3936
 
3937
  /* Strips the constant part. */
3938
  if (TREE_CODE (cbase) == PLUS_EXPR
3939
      || TREE_CODE (cbase) == MINUS_EXPR
3940
      || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3941
    {
3942
      if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3943
        cbase = TREE_OPERAND (cbase, 0);
3944
    }
3945
 
3946
  if (address_p)
3947
    {
3948
      if (((TREE_CODE (ubase) == SSA_NAME)
3949
           || (TREE_CODE (ubase) == ADDR_EXPR
3950
               && is_gimple_min_invariant (ubase)))
3951
          && (TREE_CODE (cbase) == INTEGER_CST))
3952
        return -1;
3953
 
3954
      if (((TREE_CODE (cbase) == SSA_NAME)
3955
           || (TREE_CODE (cbase) == ADDR_EXPR
3956
               && is_gimple_min_invariant (cbase)))
3957
          && (TREE_CODE (ubase) == INTEGER_CST))
3958
        return -1;
3959
    }
3960
 
3961
  if (ratio == 1)
3962
    {
3963
      if(operand_equal_p (ubase, cbase, 0))
3964
        return -1;
3965
 
3966
      if (TREE_CODE (ubase) == ADDR_EXPR
3967
          && TREE_CODE (cbase) == ADDR_EXPR)
3968
        {
3969
          tree usym, csym;
3970
 
3971
          usym = TREE_OPERAND (ubase, 0);
3972
          csym = TREE_OPERAND (cbase, 0);
3973
          if (TREE_CODE (usym) == ARRAY_REF)
3974
            {
3975
              tree ind = TREE_OPERAND (usym, 1);
3976
              if (TREE_CODE (ind) == INTEGER_CST
3977
                  && host_integerp (ind, 0)
3978
                  && TREE_INT_CST_LOW (ind) == 0)
3979
                usym = TREE_OPERAND (usym, 0);
3980
            }
3981
          if (TREE_CODE (csym) == ARRAY_REF)
3982
            {
3983
              tree ind = TREE_OPERAND (csym, 1);
3984
              if (TREE_CODE (ind) == INTEGER_CST
3985
                  && host_integerp (ind, 0)
3986
                  && TREE_INT_CST_LOW (ind) == 0)
3987
                csym = TREE_OPERAND (csym, 0);
3988
            }
3989
          if (operand_equal_p (usym, csym, 0))
3990
            return -1;
3991
        }
3992
      /* Now do more complex comparison  */
3993
      tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3994
      tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3995
      if (compare_aff_trees (&ubase_aff, &cbase_aff))
3996
        return -1;
3997
    }
3998
 
3999
  tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4000
  tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4001
 
4002
  aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
4003
  aff_combination_add (&ubase_aff, &cbase_aff);
4004
  expr = aff_combination_to_tree (&ubase_aff);
4005
  return get_expr_id (data, expr);
4006
}
4007
 
4008
 
4009
 
4010
/* Determines the cost of the computation by that USE is expressed
4011
   from induction variable CAND.  If ADDRESS_P is true, we just need
4012
   to create an address from it, otherwise we want to get it into
4013
   register.  A set of invariants we depend on is stored in
4014
   DEPENDS_ON.  AT is the statement at that the value is computed.
4015
   If CAN_AUTOINC is nonnull, use it to record whether autoinc
4016
   addressing is likely.  */
4017
 
4018
static comp_cost
4019
get_computation_cost_at (struct ivopts_data *data,
4020
                         struct iv_use *use, struct iv_cand *cand,
4021
                         bool address_p, bitmap *depends_on, gimple at,
4022
                         bool *can_autoinc,
4023
                         int *inv_expr_id)
4024
{
4025
  tree ubase = use->iv->base, ustep = use->iv->step;
4026
  tree cbase, cstep;
4027
  tree utype = TREE_TYPE (ubase), ctype;
4028
  unsigned HOST_WIDE_INT cstepi, offset = 0;
4029
  HOST_WIDE_INT ratio, aratio;
4030
  bool var_present, symbol_present, stmt_is_after_inc;
4031
  comp_cost cost;
4032
  double_int rat;
4033
  bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4034
 
4035
  *depends_on = NULL;
4036
 
4037
  /* Only consider real candidates.  */
4038
  if (!cand->iv)
4039
    return infinite_cost;
4040
 
4041
  cbase = cand->iv->base;
4042
  cstep = cand->iv->step;
4043
  ctype = TREE_TYPE (cbase);
4044
 
4045
  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4046
    {
4047
      /* We do not have a precision to express the values of use.  */
4048
      return infinite_cost;
4049
    }
4050
 
4051
  if (address_p
4052
      || (use->iv->base_object
4053
          && cand->iv->base_object
4054
          && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4055
          && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4056
    {
4057
      /* Do not try to express address of an object with computation based
4058
         on address of a different object.  This may cause problems in rtl
4059
         level alias analysis (that does not expect this to be happening,
4060
         as this is illegal in C), and would be unlikely to be useful
4061
         anyway.  */
4062
      if (use->iv->base_object
4063
          && cand->iv->base_object
4064
          && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4065
        return infinite_cost;
4066
    }
4067
 
4068
  if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4069
    {
4070
      /* TODO -- add direct handling of this case.  */
4071
      goto fallback;
4072
    }
4073
 
4074
  /* CSTEPI is removed from the offset in case statement is after the
4075
     increment.  If the step is not constant, we use zero instead.
4076
     This is a bit imprecise (there is the extra addition), but
4077
     redundancy elimination is likely to transform the code so that
4078
     it uses value of the variable before increment anyway,
4079
     so it is not that much unrealistic.  */
4080
  if (cst_and_fits_in_hwi (cstep))
4081
    cstepi = int_cst_value (cstep);
4082
  else
4083
    cstepi = 0;
4084
 
4085
  if (!constant_multiple_of (ustep, cstep, &rat))
4086
    return infinite_cost;
4087
 
4088
  if (double_int_fits_in_shwi_p (rat))
4089
    ratio = double_int_to_shwi (rat);
4090
  else
4091
    return infinite_cost;
4092
 
4093
  STRIP_NOPS (cbase);
4094
  ctype = TREE_TYPE (cbase);
4095
 
4096
  stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4097
 
4098
  /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4099
     or ratio == 1, it is better to handle this like
4100
 
4101
     ubase - ratio * cbase + ratio * var
4102
 
4103
     (also holds in the case ratio == -1, TODO.  */
4104
 
4105
  if (cst_and_fits_in_hwi (cbase))
4106
    {
4107
      offset = - ratio * int_cst_value (cbase);
4108
      cost = difference_cost (data,
4109
                              ubase, build_int_cst (utype, 0),
4110
                              &symbol_present, &var_present, &offset,
4111
                              depends_on);
4112
      cost.cost /= avg_loop_niter (data->current_loop);
4113
    }
4114
  else if (ratio == 1)
4115
    {
4116
      tree real_cbase = cbase;
4117
 
4118
      /* Check to see if any adjustment is needed.  */
4119
      if (cstepi == 0 && stmt_is_after_inc)
4120
        {
4121
          aff_tree real_cbase_aff;
4122
          aff_tree cstep_aff;
4123
 
4124
          tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4125
                                   &real_cbase_aff);
4126
          tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4127
 
4128
          aff_combination_add (&real_cbase_aff, &cstep_aff);
4129
          real_cbase = aff_combination_to_tree (&real_cbase_aff);
4130
        }
4131
 
4132
      cost = difference_cost (data,
4133
                              ubase, real_cbase,
4134
                              &symbol_present, &var_present, &offset,
4135
                              depends_on);
4136
      cost.cost /= avg_loop_niter (data->current_loop);
4137
    }
4138
  else if (address_p
4139
           && !POINTER_TYPE_P (ctype)
4140
           && multiplier_allowed_in_address_p
4141
                (ratio, TYPE_MODE (TREE_TYPE (utype)),
4142
                        TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4143
    {
4144
      cbase
4145
        = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4146
      cost = difference_cost (data,
4147
                              ubase, cbase,
4148
                              &symbol_present, &var_present, &offset,
4149
                              depends_on);
4150
      cost.cost /= avg_loop_niter (data->current_loop);
4151
    }
4152
  else
4153
    {
4154
      cost = force_var_cost (data, cbase, depends_on);
4155
      cost = add_costs (cost,
4156
                        difference_cost (data,
4157
                                         ubase, build_int_cst (utype, 0),
4158
                                         &symbol_present, &var_present,
4159
                                         &offset, depends_on));
4160
      cost.cost /= avg_loop_niter (data->current_loop);
4161
      cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4162
    }
4163
 
4164
  if (inv_expr_id)
4165
    {
4166
      *inv_expr_id =
4167
          get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4168
      /* Clear depends on.  */
4169
      if (*inv_expr_id != -1 && depends_on && *depends_on)
4170
        bitmap_clear (*depends_on);
4171
    }
4172
 
4173
  /* If we are after the increment, the value of the candidate is higher by
4174
     one iteration.  */
4175
  if (stmt_is_after_inc)
4176
    offset -= ratio * cstepi;
4177
 
4178
  /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4179
     (symbol/var1/const parts may be omitted).  If we are looking for an
4180
     address, find the cost of addressing this.  */
4181
  if (address_p)
4182
    return add_costs (cost,
4183
                      get_address_cost (symbol_present, var_present,
4184
                                        offset, ratio, cstepi,
4185
                                        TYPE_MODE (TREE_TYPE (utype)),
4186
                                        TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4187
                                        speed, stmt_is_after_inc,
4188
                                        can_autoinc));
4189
 
4190
  /* Otherwise estimate the costs for computing the expression.  */
4191
  if (!symbol_present && !var_present && !offset)
4192
    {
4193
      if (ratio != 1)
4194
        cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4195
      return cost;
4196
    }
4197
 
4198
  /* Symbol + offset should be compile-time computable so consider that they
4199
      are added once to the variable, if present.  */
4200
  if (var_present && (symbol_present || offset))
4201
    cost.cost += adjust_setup_cost (data,
4202
                                    add_cost (TYPE_MODE (ctype), speed));
4203
 
4204
  /* Having offset does not affect runtime cost in case it is added to
4205
     symbol, but it increases complexity.  */
4206
  if (offset)
4207
    cost.complexity++;
4208
 
4209
  cost.cost += add_cost (TYPE_MODE (ctype), speed);
4210
 
4211
  aratio = ratio > 0 ? ratio : -ratio;
4212
  if (aratio != 1)
4213
    cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4214
  return cost;
4215
 
4216
fallback:
4217
  if (can_autoinc)
4218
    *can_autoinc = false;
4219
 
4220
  {
4221
    /* Just get the expression, expand it and measure the cost.  */
4222
    tree comp = get_computation_at (data->current_loop, use, cand, at);
4223
 
4224
    if (!comp)
4225
      return infinite_cost;
4226
 
4227
    if (address_p)
4228
      comp = build_simple_mem_ref (comp);
4229
 
4230
    return new_cost (computation_cost (comp, speed), 0);
4231
  }
4232
}
4233
 
4234
/* Determines the cost of the computation by that USE is expressed
4235
   from induction variable CAND.  If ADDRESS_P is true, we just need
4236
   to create an address from it, otherwise we want to get it into
4237
   register.  A set of invariants we depend on is stored in
4238
   DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4239
   autoinc addressing is likely.  */
4240
 
4241
static comp_cost
4242
get_computation_cost (struct ivopts_data *data,
4243
                      struct iv_use *use, struct iv_cand *cand,
4244
                      bool address_p, bitmap *depends_on,
4245
                      bool *can_autoinc, int *inv_expr_id)
4246
{
4247
  return get_computation_cost_at (data,
4248
                                  use, cand, address_p, depends_on, use->stmt,
4249
                                  can_autoinc, inv_expr_id);
4250
}
4251
 
4252
/* Determines cost of basing replacement of USE on CAND in a generic
4253
   expression.  */
4254
 
4255
static bool
4256
determine_use_iv_cost_generic (struct ivopts_data *data,
4257
                               struct iv_use *use, struct iv_cand *cand)
4258
{
4259
  bitmap depends_on;
4260
  comp_cost cost;
4261
  int inv_expr_id = -1;
4262
 
4263
  /* The simple case first -- if we need to express value of the preserved
4264
     original biv, the cost is 0.  This also prevents us from counting the
4265
     cost of increment twice -- once at this use and once in the cost of
4266
     the candidate.  */
4267
  if (cand->pos == IP_ORIGINAL
4268
      && cand->incremented_at == use->stmt)
4269
    {
4270
      set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE,
4271
                       ERROR_MARK, -1);
4272
      return true;
4273
    }
4274
 
4275
  cost = get_computation_cost (data, use, cand, false, &depends_on,
4276
                               NULL, &inv_expr_id);
4277
 
4278
  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4279
                   inv_expr_id);
4280
 
4281
  return !infinite_cost_p (cost);
4282
}
4283
 
4284
/* Determines cost of basing replacement of USE on CAND in an address.  */
4285
 
4286
static bool
4287
determine_use_iv_cost_address (struct ivopts_data *data,
4288
                               struct iv_use *use, struct iv_cand *cand)
4289
{
4290
  bitmap depends_on;
4291
  bool can_autoinc;
4292
  int inv_expr_id = -1;
4293
  comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4294
                                         &can_autoinc, &inv_expr_id);
4295
 
4296
  if (cand->ainc_use == use)
4297
    {
4298
      if (can_autoinc)
4299
        cost.cost -= cand->cost_step;
4300
      /* If we generated the candidate solely for exploiting autoincrement
4301
         opportunities, and it turns out it can't be used, set the cost to
4302
         infinity to make sure we ignore it.  */
4303
      else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4304
        cost = infinite_cost;
4305
    }
4306
  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4307
                   inv_expr_id);
4308
 
4309
  return !infinite_cost_p (cost);
4310
}
4311
 
4312
/* Computes value of candidate CAND at position AT in iteration NITER, and
4313
   stores it to VAL.  */
4314
 
4315
static void
4316
cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4317
               aff_tree *val)
4318
{
4319
  aff_tree step, delta, nit;
4320
  struct iv *iv = cand->iv;
4321
  tree type = TREE_TYPE (iv->base);
4322
  tree steptype = type;
4323
  if (POINTER_TYPE_P (type))
4324
    steptype = sizetype;
4325
 
4326
  tree_to_aff_combination (iv->step, steptype, &step);
4327
  tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4328
  aff_combination_convert (&nit, steptype);
4329
  aff_combination_mult (&nit, &step, &delta);
4330
  if (stmt_after_increment (loop, cand, at))
4331
    aff_combination_add (&delta, &step);
4332
 
4333
  tree_to_aff_combination (iv->base, type, val);
4334
  aff_combination_add (val, &delta);
4335
}
4336
 
4337
/* Returns period of induction variable iv.  */
4338
 
4339
static tree
4340
iv_period (struct iv *iv)
4341
{
4342
  tree step = iv->step, period, type;
4343
  tree pow2div;
4344
 
4345
  gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4346
 
4347
  type = unsigned_type_for (TREE_TYPE (step));
4348
  /* Period of the iv is lcm (step, type_range)/step -1,
4349
     i.e., N*type_range/step - 1. Since type range is power
4350
     of two, N == (step >> num_of_ending_zeros_binary (step),
4351
     so the final result is
4352
 
4353
       (type_range >> num_of_ending_zeros_binary (step)) - 1
4354
 
4355
  */
4356
  pow2div = num_ending_zeros (step);
4357
 
4358
  period = build_low_bits_mask (type,
4359
                                (TYPE_PRECISION (type)
4360
                                 - tree_low_cst (pow2div, 1)));
4361
 
4362
  return period;
4363
}
4364
 
4365
/* Returns the comparison operator used when eliminating the iv USE.  */
4366
 
4367
static enum tree_code
4368
iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4369
{
4370
  struct loop *loop = data->current_loop;
4371
  basic_block ex_bb;
4372
  edge exit;
4373
 
4374
  ex_bb = gimple_bb (use->stmt);
4375
  exit = EDGE_SUCC (ex_bb, 0);
4376
  if (flow_bb_inside_loop_p (loop, exit->dest))
4377
    exit = EDGE_SUCC (ex_bb, 1);
4378
 
4379
  return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4380
}
4381
 
4382
static tree
4383
strip_wrap_conserving_type_conversions (tree exp)
4384
{
4385
  while (tree_ssa_useless_type_conversion (exp)
4386
         && (nowrap_type_p (TREE_TYPE (exp))
4387
             == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4388
    exp = TREE_OPERAND (exp, 0);
4389
  return exp;
4390
}
4391
 
4392
/* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4393
   check for an exact match.  */
4394
 
4395
static bool
4396
expr_equal_p (tree e, tree what)
4397
{
4398
  gimple stmt;
4399
  enum tree_code code;
4400
 
4401
  e = strip_wrap_conserving_type_conversions (e);
4402
  what = strip_wrap_conserving_type_conversions (what);
4403
 
4404
  code = TREE_CODE (what);
4405
  if (TREE_TYPE (e) != TREE_TYPE (what))
4406
    return false;
4407
 
4408
  if (operand_equal_p (e, what, 0))
4409
    return true;
4410
 
4411
  if (TREE_CODE (e) != SSA_NAME)
4412
    return false;
4413
 
4414
  stmt = SSA_NAME_DEF_STMT (e);
4415
  if (gimple_code (stmt) != GIMPLE_ASSIGN
4416
      || gimple_assign_rhs_code (stmt) != code)
4417
    return false;
4418
 
4419
  switch (get_gimple_rhs_class (code))
4420
    {
4421
    case GIMPLE_BINARY_RHS:
4422
      if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4423
        return false;
4424
      /* Fallthru.  */
4425
 
4426
    case GIMPLE_UNARY_RHS:
4427
    case GIMPLE_SINGLE_RHS:
4428
      return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4429
    default:
4430
      return false;
4431
    }
4432
}
4433
 
4434
/* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4435
   we only detect the situation that BASE = SOMETHING + OFFSET, where the
4436
   calculation is performed in non-wrapping type.
4437
 
4438
   TODO: More generally, we could test for the situation that
4439
         BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4440
         This would require knowing the sign of OFFSET.
4441
 
4442
         Also, we only look for the first addition in the computation of BASE.
4443
         More complex analysis would be better, but introducing it just for
4444
         this optimization seems like an overkill.  */
4445
 
4446
static bool
4447
difference_cannot_overflow_p (tree base, tree offset)
4448
{
4449
  enum tree_code code;
4450
  tree e1, e2;
4451
 
4452
  if (!nowrap_type_p (TREE_TYPE (base)))
4453
    return false;
4454
 
4455
  base = expand_simple_operations (base);
4456
 
4457
  if (TREE_CODE (base) == SSA_NAME)
4458
    {
4459
      gimple stmt = SSA_NAME_DEF_STMT (base);
4460
 
4461
      if (gimple_code (stmt) != GIMPLE_ASSIGN)
4462
        return false;
4463
 
4464
      code = gimple_assign_rhs_code (stmt);
4465
      if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4466
        return false;
4467
 
4468
      e1 = gimple_assign_rhs1 (stmt);
4469
      e2 = gimple_assign_rhs2 (stmt);
4470
    }
4471
  else
4472
    {
4473
      code = TREE_CODE (base);
4474
      if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4475
        return false;
4476
      e1 = TREE_OPERAND (base, 0);
4477
      e2 = TREE_OPERAND (base, 1);
4478
    }
4479
 
4480
  /* TODO: deeper inspection may be necessary to prove the equality.  */
4481
  switch (code)
4482
    {
4483
    case PLUS_EXPR:
4484
      return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4485
    case POINTER_PLUS_EXPR:
4486
      return expr_equal_p (e2, offset);
4487
 
4488
    default:
4489
      return false;
4490
    }
4491
}
4492
 
4493
/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4494
   comparison with CAND.  NITER describes the number of iterations of
4495
   the loops.  If successful, the comparison in COMP_P is altered accordingly.
4496
 
4497
   We aim to handle the following situation:
4498
 
4499
   sometype *base, *p;
4500
   int a, b, i;
4501
 
4502
   i = a;
4503
   p = p_0 = base + a;
4504
 
4505
   do
4506
     {
4507
       bla (*p);
4508
       p++;
4509
       i++;
4510
     }
4511
   while (i < b);
4512
 
4513
   Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4514
   We aim to optimize this to
4515
 
4516
   p = p_0 = base + a;
4517
   do
4518
     {
4519
       bla (*p);
4520
       p++;
4521
     }
4522
   while (p < p_0 - a + b);
4523
 
4524
   This preserves the correctness, since the pointer arithmetics does not
4525
   overflow.  More precisely:
4526
 
4527
   1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4528
      overflow in computing it or the values of p.
4529
   2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4530
      overflow.  To prove this, we use the fact that p_0 = base + a.  */
4531
 
4532
static bool
4533
iv_elimination_compare_lt (struct ivopts_data *data,
4534
                           struct iv_cand *cand, enum tree_code *comp_p,
4535
                           struct tree_niter_desc *niter)
4536
{
4537
  tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4538
  struct affine_tree_combination nit, tmpa, tmpb;
4539
  enum tree_code comp;
4540
  HOST_WIDE_INT step;
4541
 
4542
  /* We need to know that the candidate induction variable does not overflow.
4543
     While more complex analysis may be used to prove this, for now just
4544
     check that the variable appears in the original program and that it
4545
     is computed in a type that guarantees no overflows.  */
4546
  cand_type = TREE_TYPE (cand->iv->base);
4547
  if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4548
    return false;
4549
 
4550
  /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4551
     the calculation of the BOUND could overflow, making the comparison
4552
     invalid.  */
4553
  if (!data->loop_single_exit_p)
4554
    return false;
4555
 
4556
  /* We need to be able to decide whether candidate is increasing or decreasing
4557
     in order to choose the right comparison operator.  */
4558
  if (!cst_and_fits_in_hwi (cand->iv->step))
4559
    return false;
4560
  step = int_cst_value (cand->iv->step);
4561
 
4562
  /* Check that the number of iterations matches the expected pattern:
4563
     a + 1 > b ? 0 : b - a - 1.  */
4564
  mbz = niter->may_be_zero;
4565
  if (TREE_CODE (mbz) == GT_EXPR)
4566
    {
4567
      /* Handle a + 1 > b.  */
4568
      tree op0 = TREE_OPERAND (mbz, 0);
4569
      if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4570
        {
4571
          a = TREE_OPERAND (op0, 0);
4572
          b = TREE_OPERAND (mbz, 1);
4573
        }
4574
      else
4575
        return false;
4576
    }
4577
  else if (TREE_CODE (mbz) == LT_EXPR)
4578
    {
4579
      tree op1 = TREE_OPERAND (mbz, 1);
4580
 
4581
      /* Handle b < a + 1.  */
4582
      if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4583
        {
4584
          a = TREE_OPERAND (op1, 0);
4585
          b = TREE_OPERAND (mbz, 0);
4586
        }
4587
      else
4588
        return false;
4589
    }
4590
  else
4591
    return false;
4592
 
4593
  /* Expected number of iterations is B - A - 1.  Check that it matches
4594
     the actual number, i.e., that B - A - NITER = 1.  */
4595
  tree_to_aff_combination (niter->niter, nit_type, &nit);
4596
  tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4597
  tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4598
  aff_combination_scale (&nit, double_int_minus_one);
4599
  aff_combination_scale (&tmpa, double_int_minus_one);
4600
  aff_combination_add (&tmpb, &tmpa);
4601
  aff_combination_add (&tmpb, &nit);
4602
  if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
4603
    return false;
4604
 
4605
  /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4606
     overflow.  */
4607
  offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4608
                        cand->iv->step,
4609
                        fold_convert (TREE_TYPE (cand->iv->step), a));
4610
  if (!difference_cannot_overflow_p (cand->iv->base, offset))
4611
    return false;
4612
 
4613
  /* Determine the new comparison operator.  */
4614
  comp = step < 0 ? GT_EXPR : LT_EXPR;
4615
  if (*comp_p == NE_EXPR)
4616
    *comp_p = comp;
4617
  else if (*comp_p == EQ_EXPR)
4618
    *comp_p = invert_tree_comparison (comp, false);
4619
  else
4620
    gcc_unreachable ();
4621
 
4622
  return true;
4623
}
4624
 
4625
/* Check whether it is possible to express the condition in USE by comparison
4626
   of candidate CAND.  If so, store the value compared with to BOUND, and the
4627
   comparison operator to COMP.  */
4628
 
4629
static bool
4630
may_eliminate_iv (struct ivopts_data *data,
4631
                  struct iv_use *use, struct iv_cand *cand, tree *bound,
4632
                  enum tree_code *comp)
4633
{
4634
  basic_block ex_bb;
4635
  edge exit;
4636
  tree period;
4637
  struct loop *loop = data->current_loop;
4638
  aff_tree bnd;
4639
  struct tree_niter_desc *desc = NULL;
4640
 
4641
  if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4642
    return false;
4643
 
4644
  /* For now works only for exits that dominate the loop latch.
4645
     TODO: extend to other conditions inside loop body.  */
4646
  ex_bb = gimple_bb (use->stmt);
4647
  if (use->stmt != last_stmt (ex_bb)
4648
      || gimple_code (use->stmt) != GIMPLE_COND
4649
      || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4650
    return false;
4651
 
4652
  exit = EDGE_SUCC (ex_bb, 0);
4653
  if (flow_bb_inside_loop_p (loop, exit->dest))
4654
    exit = EDGE_SUCC (ex_bb, 1);
4655
  if (flow_bb_inside_loop_p (loop, exit->dest))
4656
    return false;
4657
 
4658
  desc = niter_for_exit (data, exit);
4659
  if (!desc)
4660
    return false;
4661
 
4662
  /* Determine whether we can use the variable to test the exit condition.
4663
     This is the case iff the period of the induction variable is greater
4664
     than the number of iterations for which the exit condition is true.  */
4665
  period = iv_period (cand->iv);
4666
 
4667
  /* If the number of iterations is constant, compare against it directly.  */
4668
  if (TREE_CODE (desc->niter) == INTEGER_CST)
4669
    {
4670
      /* See cand_value_at.  */
4671
      if (stmt_after_increment (loop, cand, use->stmt))
4672
        {
4673
          if (!tree_int_cst_lt (desc->niter, period))
4674
            return false;
4675
        }
4676
      else
4677
        {
4678
          if (tree_int_cst_lt (period, desc->niter))
4679
            return false;
4680
        }
4681
    }
4682
 
4683
  /* If not, and if this is the only possible exit of the loop, see whether
4684
     we can get a conservative estimate on the number of iterations of the
4685
     entire loop and compare against that instead.  */
4686
  else
4687
    {
4688
      double_int period_value, max_niter;
4689
 
4690
      max_niter = desc->max;
4691
      if (stmt_after_increment (loop, cand, use->stmt))
4692
        max_niter = double_int_add (max_niter, double_int_one);
4693
      period_value = tree_to_double_int (period);
4694
      if (double_int_ucmp (max_niter, period_value) > 0)
4695
        {
4696
          /* See if we can take advantage of infered loop bound information.  */
4697
          if (data->loop_single_exit_p)
4698
            {
4699
              if (!estimated_loop_iterations (loop, true, &max_niter))
4700
                return false;
4701
              /* The loop bound is already adjusted by adding 1.  */
4702
              if (double_int_ucmp (max_niter, period_value) > 0)
4703
                return false;
4704
            }
4705
          else
4706
            return false;
4707
        }
4708
    }
4709
 
4710
  cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4711
 
4712
  *bound = aff_combination_to_tree (&bnd);
4713
  *comp = iv_elimination_compare (data, use);
4714
 
4715
  /* It is unlikely that computing the number of iterations using division
4716
     would be more profitable than keeping the original induction variable.  */
4717
  if (expression_expensive_p (*bound))
4718
    return false;
4719
 
4720
  /* Sometimes, it is possible to handle the situation that the number of
4721
     iterations may be zero unless additional assumtions by using <
4722
     instead of != in the exit condition.
4723
 
4724
     TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4725
           base the exit condition on it.  However, that is often too
4726
           expensive.  */
4727
  if (!integer_zerop (desc->may_be_zero))
4728
    return iv_elimination_compare_lt (data, cand, comp, desc);
4729
 
4730
  return true;
4731
}
4732
 
4733
 /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4734
    be copied, if is is used in the loop body and DATA->body_includes_call.  */
4735
 
4736
static int
4737
parm_decl_cost (struct ivopts_data *data, tree bound)
4738
{
4739
  tree sbound = bound;
4740
  STRIP_NOPS (sbound);
4741
 
4742
  if (TREE_CODE (sbound) == SSA_NAME
4743
      && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4744
      && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4745
      && data->body_includes_call)
4746
    return COSTS_N_INSNS (1);
4747
 
4748
  return 0;
4749
}
4750
 
4751
/* Determines cost of basing replacement of USE on CAND in a condition.  */
4752
 
4753
static bool
4754
determine_use_iv_cost_condition (struct ivopts_data *data,
4755
                                 struct iv_use *use, struct iv_cand *cand)
4756
{
4757
  tree bound = NULL_TREE;
4758
  struct iv *cmp_iv;
4759
  bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4760
  comp_cost elim_cost, express_cost, cost, bound_cost;
4761
  bool ok;
4762
  int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4763
  tree *control_var, *bound_cst;
4764
  enum tree_code comp = ERROR_MARK;
4765
 
4766
  /* Only consider real candidates.  */
4767
  if (!cand->iv)
4768
    {
4769
      set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4770
                       ERROR_MARK, -1);
4771
      return false;
4772
    }
4773
 
4774
  /* Try iv elimination.  */
4775
  if (may_eliminate_iv (data, use, cand, &bound, &comp))
4776
    {
4777
      elim_cost = force_var_cost (data, bound, &depends_on_elim);
4778
      if (elim_cost.cost == 0)
4779
        elim_cost.cost = parm_decl_cost (data, bound);
4780
      else if (TREE_CODE (bound) == INTEGER_CST)
4781
        elim_cost.cost = 0;
4782
      /* If we replace a loop condition 'i < n' with 'p < base + n',
4783
         depends_on_elim will have 'base' and 'n' set, which implies
4784
         that both 'base' and 'n' will be live during the loop.  More likely,
4785
         'base + n' will be loop invariant, resulting in only one live value
4786
         during the loop.  So in that case we clear depends_on_elim and set
4787
        elim_inv_expr_id instead.  */
4788
      if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4789
        {
4790
          elim_inv_expr_id = get_expr_id (data, bound);
4791
          bitmap_clear (depends_on_elim);
4792
        }
4793
      /* The bound is a loop invariant, so it will be only computed
4794
         once.  */
4795
      elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4796
    }
4797
  else
4798
    elim_cost = infinite_cost;
4799
 
4800
  /* Try expressing the original giv.  If it is compared with an invariant,
4801
     note that we cannot get rid of it.  */
4802
  ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4803
                              NULL, &cmp_iv);
4804
  gcc_assert (ok);
4805
 
4806
  /* When the condition is a comparison of the candidate IV against
4807
     zero, prefer this IV.
4808
 
4809
     TODO: The constant that we're substracting from the cost should
4810
     be target-dependent.  This information should be added to the
4811
     target costs for each backend.  */
4812
  if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4813
      && integer_zerop (*bound_cst)
4814
      && (operand_equal_p (*control_var, cand->var_after, 0)
4815
          || operand_equal_p (*control_var, cand->var_before, 0)))
4816
    elim_cost.cost -= 1;
4817
 
4818
  express_cost = get_computation_cost (data, use, cand, false,
4819
                                       &depends_on_express, NULL,
4820
                                       &express_inv_expr_id);
4821
  fd_ivopts_data = data;
4822
  walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4823
 
4824
  /* Count the cost of the original bound as well.  */
4825
  bound_cost = force_var_cost (data, *bound_cst, NULL);
4826
  if (bound_cost.cost == 0)
4827
    bound_cost.cost = parm_decl_cost (data, *bound_cst);
4828
  else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4829
    bound_cost.cost = 0;
4830
  express_cost.cost += bound_cost.cost;
4831
 
4832
  /* Choose the better approach, preferring the eliminated IV. */
4833
  if (compare_costs (elim_cost, express_cost) <= 0)
4834
    {
4835
      cost = elim_cost;
4836
      depends_on = depends_on_elim;
4837
      depends_on_elim = NULL;
4838
      inv_expr_id = elim_inv_expr_id;
4839
    }
4840
  else
4841
    {
4842
      cost = express_cost;
4843
      depends_on = depends_on_express;
4844
      depends_on_express = NULL;
4845
      bound = NULL_TREE;
4846
      comp = ERROR_MARK;
4847
      inv_expr_id = express_inv_expr_id;
4848
    }
4849
 
4850
  set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4851
 
4852
  if (depends_on_elim)
4853
    BITMAP_FREE (depends_on_elim);
4854
  if (depends_on_express)
4855
    BITMAP_FREE (depends_on_express);
4856
 
4857
  return !infinite_cost_p (cost);
4858
}
4859
 
4860
/* Determines cost of basing replacement of USE on CAND.  Returns false
4861
   if USE cannot be based on CAND.  */
4862
 
4863
static bool
4864
determine_use_iv_cost (struct ivopts_data *data,
4865
                       struct iv_use *use, struct iv_cand *cand)
4866
{
4867
  switch (use->type)
4868
    {
4869
    case USE_NONLINEAR_EXPR:
4870
      return determine_use_iv_cost_generic (data, use, cand);
4871
 
4872
    case USE_ADDRESS:
4873
      return determine_use_iv_cost_address (data, use, cand);
4874
 
4875
    case USE_COMPARE:
4876
      return determine_use_iv_cost_condition (data, use, cand);
4877
 
4878
    default:
4879
      gcc_unreachable ();
4880
    }
4881
}
4882
 
4883
/* Return true if get_computation_cost indicates that autoincrement is
4884
   a possibility for the pair of USE and CAND, false otherwise.  */
4885
 
4886
static bool
4887
autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4888
                           struct iv_cand *cand)
4889
{
4890
  bitmap depends_on;
4891
  bool can_autoinc;
4892
  comp_cost cost;
4893
 
4894
  if (use->type != USE_ADDRESS)
4895
    return false;
4896
 
4897
  cost = get_computation_cost (data, use, cand, true, &depends_on,
4898
                               &can_autoinc, NULL);
4899
 
4900
  BITMAP_FREE (depends_on);
4901
 
4902
  return !infinite_cost_p (cost) && can_autoinc;
4903
}
4904
 
4905
/* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4906
   use that allows autoincrement, and set their AINC_USE if possible.  */
4907
 
4908
static void
4909
set_autoinc_for_original_candidates (struct ivopts_data *data)
4910
{
4911
  unsigned i, j;
4912
 
4913
  for (i = 0; i < n_iv_cands (data); i++)
4914
    {
4915
      struct iv_cand *cand = iv_cand (data, i);
4916
      struct iv_use *closest = NULL;
4917
      if (cand->pos != IP_ORIGINAL)
4918
        continue;
4919
      for (j = 0; j < n_iv_uses (data); j++)
4920
        {
4921
          struct iv_use *use = iv_use (data, j);
4922
          unsigned uid = gimple_uid (use->stmt);
4923
          if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4924
              || uid > gimple_uid (cand->incremented_at))
4925
            continue;
4926
          if (closest == NULL || uid > gimple_uid (closest->stmt))
4927
            closest = use;
4928
        }
4929
      if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4930
        continue;
4931
      cand->ainc_use = closest;
4932
    }
4933
}
4934
 
4935
/* Finds the candidates for the induction variables.  */
4936
 
4937
static void
4938
find_iv_candidates (struct ivopts_data *data)
4939
{
4940
  /* Add commonly used ivs.  */
4941
  add_standard_iv_candidates (data);
4942
 
4943
  /* Add old induction variables.  */
4944
  add_old_ivs_candidates (data);
4945
 
4946
  /* Add induction variables derived from uses.  */
4947
  add_derived_ivs_candidates (data);
4948
 
4949
  set_autoinc_for_original_candidates (data);
4950
 
4951
  /* Record the important candidates.  */
4952
  record_important_candidates (data);
4953
}
4954
 
4955
/* Determines costs of basing the use of the iv on an iv candidate.  */
4956
 
4957
static void
4958
determine_use_iv_costs (struct ivopts_data *data)
4959
{
4960
  unsigned i, j;
4961
  struct iv_use *use;
4962
  struct iv_cand *cand;
4963
  bitmap to_clear = BITMAP_ALLOC (NULL);
4964
 
4965
  alloc_use_cost_map (data);
4966
 
4967
  for (i = 0; i < n_iv_uses (data); i++)
4968
    {
4969
      use = iv_use (data, i);
4970
 
4971
      if (data->consider_all_candidates)
4972
        {
4973
          for (j = 0; j < n_iv_cands (data); j++)
4974
            {
4975
              cand = iv_cand (data, j);
4976
              determine_use_iv_cost (data, use, cand);
4977
            }
4978
        }
4979
      else
4980
        {
4981
          bitmap_iterator bi;
4982
 
4983
          EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4984
            {
4985
              cand = iv_cand (data, j);
4986
              if (!determine_use_iv_cost (data, use, cand))
4987
                bitmap_set_bit (to_clear, j);
4988
            }
4989
 
4990
          /* Remove the candidates for that the cost is infinite from
4991
             the list of related candidates.  */
4992
          bitmap_and_compl_into (use->related_cands, to_clear);
4993
          bitmap_clear (to_clear);
4994
        }
4995
    }
4996
 
4997
  BITMAP_FREE (to_clear);
4998
 
4999
  if (dump_file && (dump_flags & TDF_DETAILS))
5000
    {
5001
      fprintf (dump_file, "Use-candidate costs:\n");
5002
 
5003
      for (i = 0; i < n_iv_uses (data); i++)
5004
        {
5005
          use = iv_use (data, i);
5006
 
5007
          fprintf (dump_file, "Use %d:\n", i);
5008
          fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5009
          for (j = 0; j < use->n_map_members; j++)
5010
            {
5011
              if (!use->cost_map[j].cand
5012
                  || infinite_cost_p (use->cost_map[j].cost))
5013
                continue;
5014
 
5015
              fprintf (dump_file, "  %d\t%d\t%d\t",
5016
                       use->cost_map[j].cand->id,
5017
                       use->cost_map[j].cost.cost,
5018
                       use->cost_map[j].cost.complexity);
5019
              if (use->cost_map[j].depends_on)
5020
                bitmap_print (dump_file,
5021
                              use->cost_map[j].depends_on, "","");
5022
              if (use->cost_map[j].inv_expr_id != -1)
5023
                fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5024
              fprintf (dump_file, "\n");
5025
            }
5026
 
5027
          fprintf (dump_file, "\n");
5028
        }
5029
      fprintf (dump_file, "\n");
5030
    }
5031
}
5032
 
5033
/* Determines cost of the candidate CAND.  */
5034
 
5035
static void
5036
determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5037
{
5038
  comp_cost cost_base;
5039
  unsigned cost, cost_step;
5040
  tree base;
5041
 
5042
  if (!cand->iv)
5043
    {
5044
      cand->cost = 0;
5045
      return;
5046
    }
5047
 
5048
  /* There are two costs associated with the candidate -- its increment
5049
     and its initialization.  The second is almost negligible for any loop
5050
     that rolls enough, so we take it just very little into account.  */
5051
 
5052
  base = cand->iv->base;
5053
  cost_base = force_var_cost (data, base, NULL);
5054
  /* It will be exceptional that the iv register happens to be initialized with
5055
     the proper value at no cost.  In general, there will at least be a regcopy
5056
     or a const set.  */
5057
  if (cost_base.cost == 0)
5058
    cost_base.cost = COSTS_N_INSNS (1);
5059
  cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
5060
 
5061
  cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5062
 
5063
  /* Prefer the original ivs unless we may gain something by replacing it.
5064
     The reason is to make debugging simpler; so this is not relevant for
5065
     artificial ivs created by other optimization passes.  */
5066
  if (cand->pos != IP_ORIGINAL
5067
      || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5068
    cost++;
5069
 
5070
  /* Prefer not to insert statements into latch unless there are some
5071
     already (so that we do not create unnecessary jumps).  */
5072
  if (cand->pos == IP_END
5073
      && empty_block_p (ip_end_pos (data->current_loop)))
5074
    cost++;
5075
 
5076
  cand->cost = cost;
5077
  cand->cost_step = cost_step;
5078
}
5079
 
5080
/* Determines costs of computation of the candidates.  */
5081
 
5082
static void
5083
determine_iv_costs (struct ivopts_data *data)
5084
{
5085
  unsigned i;
5086
 
5087
  if (dump_file && (dump_flags & TDF_DETAILS))
5088
    {
5089
      fprintf (dump_file, "Candidate costs:\n");
5090
      fprintf (dump_file, "  cand\tcost\n");
5091
    }
5092
 
5093
  for (i = 0; i < n_iv_cands (data); i++)
5094
    {
5095
      struct iv_cand *cand = iv_cand (data, i);
5096
 
5097
      determine_iv_cost (data, cand);
5098
 
5099
      if (dump_file && (dump_flags & TDF_DETAILS))
5100
        fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5101
    }
5102
 
5103
  if (dump_file && (dump_flags & TDF_DETAILS))
5104
    fprintf (dump_file, "\n");
5105
}
5106
 
5107
/* Calculates cost for having SIZE induction variables.  */
5108
 
5109
static unsigned
5110
ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5111
{
5112
  /* We add size to the cost, so that we prefer eliminating ivs
5113
     if possible.  */
5114
  return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5115
                                            data->body_includes_call);
5116
}
5117
 
5118
/* For each size of the induction variable set determine the penalty.  */
5119
 
5120
static void
5121
determine_set_costs (struct ivopts_data *data)
5122
{
5123
  unsigned j, n;
5124
  gimple phi;
5125
  gimple_stmt_iterator psi;
5126
  tree op;
5127
  struct loop *loop = data->current_loop;
5128
  bitmap_iterator bi;
5129
 
5130
  if (dump_file && (dump_flags & TDF_DETAILS))
5131
    {
5132
      fprintf (dump_file, "Global costs:\n");
5133
      fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5134
      fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5135
      fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5136
      fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5137
    }
5138
 
5139
  n = 0;
5140
  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5141
    {
5142
      phi = gsi_stmt (psi);
5143
      op = PHI_RESULT (phi);
5144
 
5145
      if (!is_gimple_reg (op))
5146
        continue;
5147
 
5148
      if (get_iv (data, op))
5149
        continue;
5150
 
5151
      n++;
5152
    }
5153
 
5154
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5155
    {
5156
      struct version_info *info = ver_info (data, j);
5157
 
5158
      if (info->inv_id && info->has_nonlin_use)
5159
        n++;
5160
    }
5161
 
5162
  data->regs_used = n;
5163
  if (dump_file && (dump_flags & TDF_DETAILS))
5164
    fprintf (dump_file, "  regs_used %d\n", n);
5165
 
5166
  if (dump_file && (dump_flags & TDF_DETAILS))
5167
    {
5168
      fprintf (dump_file, "  cost for size:\n");
5169
      fprintf (dump_file, "  ivs\tcost\n");
5170
      for (j = 0; j <= 2 * target_avail_regs; j++)
5171
        fprintf (dump_file, "  %d\t%d\n", j,
5172
                 ivopts_global_cost_for_size (data, j));
5173
      fprintf (dump_file, "\n");
5174
    }
5175
}
5176
 
5177
/* Returns true if A is a cheaper cost pair than B.  */
5178
 
5179
static bool
5180
cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5181
{
5182
  int cmp;
5183
 
5184
  if (!a)
5185
    return false;
5186
 
5187
  if (!b)
5188
    return true;
5189
 
5190
  cmp = compare_costs (a->cost, b->cost);
5191
  if (cmp < 0)
5192
    return true;
5193
 
5194
  if (cmp > 0)
5195
    return false;
5196
 
5197
  /* In case the costs are the same, prefer the cheaper candidate.  */
5198
  if (a->cand->cost < b->cand->cost)
5199
    return true;
5200
 
5201
  return false;
5202
}
5203
 
5204
 
5205
/* Returns candidate by that USE is expressed in IVS.  */
5206
 
5207
static struct cost_pair *
5208
iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5209
{
5210
  return ivs->cand_for_use[use->id];
5211
}
5212
 
5213
/* Computes the cost field of IVS structure.  */
5214
 
5215
static void
5216
iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5217
{
5218
  comp_cost cost = ivs->cand_use_cost;
5219
 
5220
  cost.cost += ivs->cand_cost;
5221
 
5222
  cost.cost += ivopts_global_cost_for_size (data,
5223
                                            ivs->n_regs + ivs->num_used_inv_expr);
5224
 
5225
  ivs->cost = cost;
5226
}
5227
 
5228
/* Remove invariants in set INVS to set IVS.  */
5229
 
5230
static void
5231
iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5232
{
5233
  bitmap_iterator bi;
5234
  unsigned iid;
5235
 
5236
  if (!invs)
5237
    return;
5238
 
5239
  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5240
    {
5241
      ivs->n_invariant_uses[iid]--;
5242
      if (ivs->n_invariant_uses[iid] == 0)
5243
        ivs->n_regs--;
5244
    }
5245
}
5246
 
5247
/* Set USE not to be expressed by any candidate in IVS.  */
5248
 
5249
static void
5250
iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5251
                 struct iv_use *use)
5252
{
5253
  unsigned uid = use->id, cid;
5254
  struct cost_pair *cp;
5255
 
5256
  cp = ivs->cand_for_use[uid];
5257
  if (!cp)
5258
    return;
5259
  cid = cp->cand->id;
5260
 
5261
  ivs->bad_uses++;
5262
  ivs->cand_for_use[uid] = NULL;
5263
  ivs->n_cand_uses[cid]--;
5264
 
5265
  if (ivs->n_cand_uses[cid] == 0)
5266
    {
5267
      bitmap_clear_bit (ivs->cands, cid);
5268
      /* Do not count the pseudocandidates.  */
5269
      if (cp->cand->iv)
5270
        ivs->n_regs--;
5271
      ivs->n_cands--;
5272
      ivs->cand_cost -= cp->cand->cost;
5273
 
5274
      iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5275
    }
5276
 
5277
  ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5278
 
5279
  iv_ca_set_remove_invariants (ivs, cp->depends_on);
5280
 
5281
  if (cp->inv_expr_id != -1)
5282
    {
5283
      ivs->used_inv_expr[cp->inv_expr_id]--;
5284
      if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5285
        ivs->num_used_inv_expr--;
5286
    }
5287
  iv_ca_recount_cost (data, ivs);
5288
}
5289
 
5290
/* Add invariants in set INVS to set IVS.  */
5291
 
5292
static void
5293
iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5294
{
5295
  bitmap_iterator bi;
5296
  unsigned iid;
5297
 
5298
  if (!invs)
5299
    return;
5300
 
5301
  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5302
    {
5303
      ivs->n_invariant_uses[iid]++;
5304
      if (ivs->n_invariant_uses[iid] == 1)
5305
        ivs->n_regs++;
5306
    }
5307
}
5308
 
5309
/* Set cost pair for USE in set IVS to CP.  */
5310
 
5311
static void
5312
iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5313
              struct iv_use *use, struct cost_pair *cp)
5314
{
5315
  unsigned uid = use->id, cid;
5316
 
5317
  if (ivs->cand_for_use[uid] == cp)
5318
    return;
5319
 
5320
  if (ivs->cand_for_use[uid])
5321
    iv_ca_set_no_cp (data, ivs, use);
5322
 
5323
  if (cp)
5324
    {
5325
      cid = cp->cand->id;
5326
 
5327
      ivs->bad_uses--;
5328
      ivs->cand_for_use[uid] = cp;
5329
      ivs->n_cand_uses[cid]++;
5330
      if (ivs->n_cand_uses[cid] == 1)
5331
        {
5332
          bitmap_set_bit (ivs->cands, cid);
5333
          /* Do not count the pseudocandidates.  */
5334
          if (cp->cand->iv)
5335
            ivs->n_regs++;
5336
          ivs->n_cands++;
5337
          ivs->cand_cost += cp->cand->cost;
5338
 
5339
          iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5340
        }
5341
 
5342
      ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5343
      iv_ca_set_add_invariants (ivs, cp->depends_on);
5344
 
5345
      if (cp->inv_expr_id != -1)
5346
        {
5347
          ivs->used_inv_expr[cp->inv_expr_id]++;
5348
          if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5349
            ivs->num_used_inv_expr++;
5350
        }
5351
      iv_ca_recount_cost (data, ivs);
5352
    }
5353
}
5354
 
5355
/* Extend set IVS by expressing USE by some of the candidates in it
5356
   if possible. All important candidates will be considered
5357
   if IMPORTANT_CANDIDATES is true.  */
5358
 
5359
static void
5360
iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5361
               struct iv_use *use, bool important_candidates)
5362
{
5363
  struct cost_pair *best_cp = NULL, *cp;
5364
  bitmap_iterator bi;
5365
  bitmap cands;
5366
  unsigned i;
5367
 
5368
  gcc_assert (ivs->upto >= use->id);
5369
 
5370
  if (ivs->upto == use->id)
5371
    {
5372
      ivs->upto++;
5373
      ivs->bad_uses++;
5374
    }
5375
 
5376
  cands = (important_candidates ? data->important_candidates : ivs->cands);
5377
  EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5378
    {
5379
      struct iv_cand *cand = iv_cand (data, i);
5380
 
5381
      cp = get_use_iv_cost (data, use, cand);
5382
 
5383
      if (cheaper_cost_pair (cp, best_cp))
5384
        best_cp = cp;
5385
    }
5386
 
5387
  iv_ca_set_cp (data, ivs, use, best_cp);
5388
}
5389
 
5390
/* Get cost for assignment IVS.  */
5391
 
5392
static comp_cost
5393
iv_ca_cost (struct iv_ca *ivs)
5394
{
5395
  /* This was a conditional expression but it triggered a bug in
5396
     Sun C 5.5.  */
5397
  if (ivs->bad_uses)
5398
    return infinite_cost;
5399
  else
5400
    return ivs->cost;
5401
}
5402
 
5403
/* Returns true if all dependences of CP are among invariants in IVS.  */
5404
 
5405
static bool
5406
iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5407
{
5408
  unsigned i;
5409
  bitmap_iterator bi;
5410
 
5411
  if (!cp->depends_on)
5412
    return true;
5413
 
5414
  EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5415
    {
5416
      if (ivs->n_invariant_uses[i] == 0)
5417
        return false;
5418
    }
5419
 
5420
  return true;
5421
}
5422
 
5423
/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5424
   it before NEXT_CHANGE.  */
5425
 
5426
static struct iv_ca_delta *
5427
iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5428
                 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5429
{
5430
  struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5431
 
5432
  change->use = use;
5433
  change->old_cp = old_cp;
5434
  change->new_cp = new_cp;
5435
  change->next_change = next_change;
5436
 
5437
  return change;
5438
}
5439
 
5440
/* Joins two lists of changes L1 and L2.  Destructive -- old lists
5441
   are rewritten.  */
5442
 
5443
static struct iv_ca_delta *
5444
iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5445
{
5446
  struct iv_ca_delta *last;
5447
 
5448
  if (!l2)
5449
    return l1;
5450
 
5451
  if (!l1)
5452
    return l2;
5453
 
5454
  for (last = l1; last->next_change; last = last->next_change)
5455
    continue;
5456
  last->next_change = l2;
5457
 
5458
  return l1;
5459
}
5460
 
5461
/* Reverse the list of changes DELTA, forming the inverse to it.  */
5462
 
5463
static struct iv_ca_delta *
5464
iv_ca_delta_reverse (struct iv_ca_delta *delta)
5465
{
5466
  struct iv_ca_delta *act, *next, *prev = NULL;
5467
  struct cost_pair *tmp;
5468
 
5469
  for (act = delta; act; act = next)
5470
    {
5471
      next = act->next_change;
5472
      act->next_change = prev;
5473
      prev = act;
5474
 
5475
      tmp = act->old_cp;
5476
      act->old_cp = act->new_cp;
5477
      act->new_cp = tmp;
5478
    }
5479
 
5480
  return prev;
5481
}
5482
 
5483
/* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5484
   reverted instead.  */
5485
 
5486
static void
5487
iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5488
                    struct iv_ca_delta *delta, bool forward)
5489
{
5490
  struct cost_pair *from, *to;
5491
  struct iv_ca_delta *act;
5492
 
5493
  if (!forward)
5494
    delta = iv_ca_delta_reverse (delta);
5495
 
5496
  for (act = delta; act; act = act->next_change)
5497
    {
5498
      from = act->old_cp;
5499
      to = act->new_cp;
5500
      gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5501
      iv_ca_set_cp (data, ivs, act->use, to);
5502
    }
5503
 
5504
  if (!forward)
5505
    iv_ca_delta_reverse (delta);
5506
}
5507
 
5508
/* Returns true if CAND is used in IVS.  */
5509
 
5510
static bool
5511
iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5512
{
5513
  return ivs->n_cand_uses[cand->id] > 0;
5514
}
5515
 
5516
/* Returns number of induction variable candidates in the set IVS.  */
5517
 
5518
static unsigned
5519
iv_ca_n_cands (struct iv_ca *ivs)
5520
{
5521
  return ivs->n_cands;
5522
}
5523
 
5524
/* Free the list of changes DELTA.  */
5525
 
5526
static void
5527
iv_ca_delta_free (struct iv_ca_delta **delta)
5528
{
5529
  struct iv_ca_delta *act, *next;
5530
 
5531
  for (act = *delta; act; act = next)
5532
    {
5533
      next = act->next_change;
5534
      free (act);
5535
    }
5536
 
5537
  *delta = NULL;
5538
}
5539
 
5540
/* Allocates new iv candidates assignment.  */
5541
 
5542
static struct iv_ca *
5543
iv_ca_new (struct ivopts_data *data)
5544
{
5545
  struct iv_ca *nw = XNEW (struct iv_ca);
5546
 
5547
  nw->upto = 0;
5548
  nw->bad_uses = 0;
5549
  nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5550
  nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5551
  nw->cands = BITMAP_ALLOC (NULL);
5552
  nw->n_cands = 0;
5553
  nw->n_regs = 0;
5554
  nw->cand_use_cost = zero_cost;
5555
  nw->cand_cost = 0;
5556
  nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5557
  nw->cost = zero_cost;
5558
  nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5559
  nw->num_used_inv_expr = 0;
5560
 
5561
  return nw;
5562
}
5563
 
5564
/* Free memory occupied by the set IVS.  */
5565
 
5566
static void
5567
iv_ca_free (struct iv_ca **ivs)
5568
{
5569
  free ((*ivs)->cand_for_use);
5570
  free ((*ivs)->n_cand_uses);
5571
  BITMAP_FREE ((*ivs)->cands);
5572
  free ((*ivs)->n_invariant_uses);
5573
  free ((*ivs)->used_inv_expr);
5574
  free (*ivs);
5575
  *ivs = NULL;
5576
}
5577
 
5578
/* Dumps IVS to FILE.  */
5579
 
5580
static void
5581
iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5582
{
5583
  const char *pref = "  invariants ";
5584
  unsigned i;
5585
  comp_cost cost = iv_ca_cost (ivs);
5586
 
5587
  fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5588
  fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5589
           ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5590
  bitmap_print (file, ivs->cands, "  candidates: ","\n");
5591
 
5592
   for (i = 0; i < ivs->upto; i++)
5593
    {
5594
      struct iv_use *use = iv_use (data, i);
5595
      struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5596
      if (cp)
5597
        fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5598
                 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5599
      else
5600
        fprintf (file, "   use:%d --> ??\n", use->id);
5601
    }
5602
 
5603
  for (i = 1; i <= data->max_inv_id; i++)
5604
    if (ivs->n_invariant_uses[i])
5605
      {
5606
        fprintf (file, "%s%d", pref, i);
5607
        pref = ", ";
5608
      }
5609
  fprintf (file, "\n\n");
5610
}
5611
 
5612
/* Try changing candidate in IVS to CAND for each use.  Return cost of the
5613
   new set, and store differences in DELTA.  Number of induction variables
5614
   in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5615
   the function will try to find a solution with mimimal iv candidates.  */
5616
 
5617
static comp_cost
5618
iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5619
              struct iv_cand *cand, struct iv_ca_delta **delta,
5620
              unsigned *n_ivs, bool min_ncand)
5621
{
5622
  unsigned i;
5623
  comp_cost cost;
5624
  struct iv_use *use;
5625
  struct cost_pair *old_cp, *new_cp;
5626
 
5627
  *delta = NULL;
5628
  for (i = 0; i < ivs->upto; i++)
5629
    {
5630
      use = iv_use (data, i);
5631
      old_cp = iv_ca_cand_for_use (ivs, use);
5632
 
5633
      if (old_cp
5634
          && old_cp->cand == cand)
5635
        continue;
5636
 
5637
      new_cp = get_use_iv_cost (data, use, cand);
5638
      if (!new_cp)
5639
        continue;
5640
 
5641
      if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5642
        continue;
5643
 
5644
      if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5645
        continue;
5646
 
5647
      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5648
    }
5649
 
5650
  iv_ca_delta_commit (data, ivs, *delta, true);
5651
  cost = iv_ca_cost (ivs);
5652
  if (n_ivs)
5653
    *n_ivs = iv_ca_n_cands (ivs);
5654
  iv_ca_delta_commit (data, ivs, *delta, false);
5655
 
5656
  return cost;
5657
}
5658
 
5659
/* Try narrowing set IVS by removing CAND.  Return the cost of
5660
   the new set and store the differences in DELTA.  */
5661
 
5662
static comp_cost
5663
iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5664
              struct iv_cand *cand, struct iv_ca_delta **delta)
5665
{
5666
  unsigned i, ci;
5667
  struct iv_use *use;
5668
  struct cost_pair *old_cp, *new_cp, *cp;
5669
  bitmap_iterator bi;
5670
  struct iv_cand *cnd;
5671
  comp_cost cost;
5672
 
5673
  *delta = NULL;
5674
  for (i = 0; i < n_iv_uses (data); i++)
5675
    {
5676
      use = iv_use (data, i);
5677
 
5678
      old_cp = iv_ca_cand_for_use (ivs, use);
5679
      if (old_cp->cand != cand)
5680
        continue;
5681
 
5682
      new_cp = NULL;
5683
 
5684
      if (data->consider_all_candidates)
5685
        {
5686
          EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5687
            {
5688
              if (ci == cand->id)
5689
                continue;
5690
 
5691
              cnd = iv_cand (data, ci);
5692
 
5693
              cp = get_use_iv_cost (data, use, cnd);
5694
              if (!cp)
5695
                continue;
5696
 
5697
              if (!iv_ca_has_deps (ivs, cp))
5698
                continue;
5699
 
5700
              if (!cheaper_cost_pair (cp, new_cp))
5701
                continue;
5702
 
5703
              new_cp = cp;
5704
            }
5705
        }
5706
      else
5707
        {
5708
          EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5709
            {
5710
              if (ci == cand->id)
5711
                continue;
5712
 
5713
              cnd = iv_cand (data, ci);
5714
 
5715
              cp = get_use_iv_cost (data, use, cnd);
5716
              if (!cp)
5717
                continue;
5718
              if (!iv_ca_has_deps (ivs, cp))
5719
                continue;
5720
 
5721
              if (!cheaper_cost_pair (cp, new_cp))
5722
                continue;
5723
 
5724
              new_cp = cp;
5725
            }
5726
        }
5727
 
5728
      if (!new_cp)
5729
        {
5730
          iv_ca_delta_free (delta);
5731
          return infinite_cost;
5732
        }
5733
 
5734
      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5735
    }
5736
 
5737
  iv_ca_delta_commit (data, ivs, *delta, true);
5738
  cost = iv_ca_cost (ivs);
5739
  iv_ca_delta_commit (data, ivs, *delta, false);
5740
 
5741
  return cost;
5742
}
5743
 
5744
/* Try optimizing the set of candidates IVS by removing candidates different
5745
   from to EXCEPT_CAND from it.  Return cost of the new set, and store
5746
   differences in DELTA.  */
5747
 
5748
static comp_cost
5749
iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5750
             struct iv_cand *except_cand, struct iv_ca_delta **delta)
5751
{
5752
  bitmap_iterator bi;
5753
  struct iv_ca_delta *act_delta, *best_delta;
5754
  unsigned i;
5755
  comp_cost best_cost, acost;
5756
  struct iv_cand *cand;
5757
 
5758
  best_delta = NULL;
5759
  best_cost = iv_ca_cost (ivs);
5760
 
5761
  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5762
    {
5763
      cand = iv_cand (data, i);
5764
 
5765
      if (cand == except_cand)
5766
        continue;
5767
 
5768
      acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5769
 
5770
      if (compare_costs (acost, best_cost) < 0)
5771
        {
5772
          best_cost = acost;
5773
          iv_ca_delta_free (&best_delta);
5774
          best_delta = act_delta;
5775
        }
5776
      else
5777
        iv_ca_delta_free (&act_delta);
5778
    }
5779
 
5780
  if (!best_delta)
5781
    {
5782
      *delta = NULL;
5783
      return best_cost;
5784
    }
5785
 
5786
  /* Recurse to possibly remove other unnecessary ivs.  */
5787
  iv_ca_delta_commit (data, ivs, best_delta, true);
5788
  best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5789
  iv_ca_delta_commit (data, ivs, best_delta, false);
5790
  *delta = iv_ca_delta_join (best_delta, *delta);
5791
  return best_cost;
5792
}
5793
 
5794
/* Tries to extend the sets IVS in the best possible way in order
5795
   to express the USE.  If ORIGINALP is true, prefer candidates from
5796
   the original set of IVs, otherwise favor important candidates not
5797
   based on any memory object.  */
5798
 
5799
static bool
5800
try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5801
                  struct iv_use *use, bool originalp)
5802
{
5803
  comp_cost best_cost, act_cost;
5804
  unsigned i;
5805
  bitmap_iterator bi;
5806
  struct iv_cand *cand;
5807
  struct iv_ca_delta *best_delta = NULL, *act_delta;
5808
  struct cost_pair *cp;
5809
 
5810
  iv_ca_add_use (data, ivs, use, false);
5811
  best_cost = iv_ca_cost (ivs);
5812
 
5813
  cp = iv_ca_cand_for_use (ivs, use);
5814
  if (!cp)
5815
    {
5816
      ivs->upto--;
5817
      ivs->bad_uses--;
5818
      iv_ca_add_use (data, ivs, use, true);
5819
      best_cost = iv_ca_cost (ivs);
5820
      cp = iv_ca_cand_for_use (ivs, use);
5821
    }
5822
  if (cp)
5823
    {
5824
      best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5825
      iv_ca_set_no_cp (data, ivs, use);
5826
    }
5827
 
5828
  /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5829
     first try important candidates not based on any memory object.  Only if
5830
     this fails, try the specific ones.  Rationale -- in loops with many
5831
     variables the best choice often is to use just one generic biv.  If we
5832
     added here many ivs specific to the uses, the optimization algorithm later
5833
     would be likely to get stuck in a local minimum, thus causing us to create
5834
     too many ivs.  The approach from few ivs to more seems more likely to be
5835
     successful -- starting from few ivs, replacing an expensive use by a
5836
     specific iv should always be a win.  */
5837
  EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5838
    {
5839
      cand = iv_cand (data, i);
5840
 
5841
      if (originalp && cand->pos !=IP_ORIGINAL)
5842
        continue;
5843
 
5844
      if (!originalp && cand->iv->base_object != NULL_TREE)
5845
        continue;
5846
 
5847
      if (iv_ca_cand_used_p (ivs, cand))
5848
        continue;
5849
 
5850
      cp = get_use_iv_cost (data, use, cand);
5851
      if (!cp)
5852
        continue;
5853
 
5854
      iv_ca_set_cp (data, ivs, use, cp);
5855
      act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5856
                               true);
5857
      iv_ca_set_no_cp (data, ivs, use);
5858
      act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5859
 
5860
      if (compare_costs (act_cost, best_cost) < 0)
5861
        {
5862
          best_cost = act_cost;
5863
 
5864
          iv_ca_delta_free (&best_delta);
5865
          best_delta = act_delta;
5866
        }
5867
      else
5868
        iv_ca_delta_free (&act_delta);
5869
    }
5870
 
5871
  if (infinite_cost_p (best_cost))
5872
    {
5873
      for (i = 0; i < use->n_map_members; i++)
5874
        {
5875
          cp = use->cost_map + i;
5876
          cand = cp->cand;
5877
          if (!cand)
5878
            continue;
5879
 
5880
          /* Already tried this.  */
5881
          if (cand->important)
5882
            {
5883
              if (originalp && cand->pos == IP_ORIGINAL)
5884
                continue;
5885
              if (!originalp && cand->iv->base_object == NULL_TREE)
5886
                continue;
5887
            }
5888
 
5889
          if (iv_ca_cand_used_p (ivs, cand))
5890
            continue;
5891
 
5892
          act_delta = NULL;
5893
          iv_ca_set_cp (data, ivs, use, cp);
5894
          act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5895
          iv_ca_set_no_cp (data, ivs, use);
5896
          act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5897
                                       cp, act_delta);
5898
 
5899
          if (compare_costs (act_cost, best_cost) < 0)
5900
            {
5901
              best_cost = act_cost;
5902
 
5903
              if (best_delta)
5904
                iv_ca_delta_free (&best_delta);
5905
              best_delta = act_delta;
5906
            }
5907
          else
5908
            iv_ca_delta_free (&act_delta);
5909
        }
5910
    }
5911
 
5912
  iv_ca_delta_commit (data, ivs, best_delta, true);
5913
  iv_ca_delta_free (&best_delta);
5914
 
5915
  return !infinite_cost_p (best_cost);
5916
}
5917
 
5918
/* Finds an initial assignment of candidates to uses.  */
5919
 
5920
static struct iv_ca *
5921
get_initial_solution (struct ivopts_data *data, bool originalp)
5922
{
5923
  struct iv_ca *ivs = iv_ca_new (data);
5924
  unsigned i;
5925
 
5926
  for (i = 0; i < n_iv_uses (data); i++)
5927
    if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5928
      {
5929
        iv_ca_free (&ivs);
5930
        return NULL;
5931
      }
5932
 
5933
  return ivs;
5934
}
5935
 
5936
/* Tries to improve set of induction variables IVS.  */
5937
 
5938
static bool
5939
try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5940
{
5941
  unsigned i, n_ivs;
5942
  comp_cost acost, best_cost = iv_ca_cost (ivs);
5943
  struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5944
  struct iv_cand *cand;
5945
 
5946
  /* Try extending the set of induction variables by one.  */
5947
  for (i = 0; i < n_iv_cands (data); i++)
5948
    {
5949
      cand = iv_cand (data, i);
5950
 
5951
      if (iv_ca_cand_used_p (ivs, cand))
5952
        continue;
5953
 
5954
      acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5955
      if (!act_delta)
5956
        continue;
5957
 
5958
      /* If we successfully added the candidate and the set is small enough,
5959
         try optimizing it by removing other candidates.  */
5960
      if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5961
        {
5962
          iv_ca_delta_commit (data, ivs, act_delta, true);
5963
          acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5964
          iv_ca_delta_commit (data, ivs, act_delta, false);
5965
          act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5966
        }
5967
 
5968
      if (compare_costs (acost, best_cost) < 0)
5969
        {
5970
          best_cost = acost;
5971
          iv_ca_delta_free (&best_delta);
5972
          best_delta = act_delta;
5973
        }
5974
      else
5975
        iv_ca_delta_free (&act_delta);
5976
    }
5977
 
5978
  if (!best_delta)
5979
    {
5980
      /* Try removing the candidates from the set instead.  */
5981
      best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5982
 
5983
      /* Nothing more we can do.  */
5984
      if (!best_delta)
5985
        return false;
5986
    }
5987
 
5988
  iv_ca_delta_commit (data, ivs, best_delta, true);
5989
  gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5990
  iv_ca_delta_free (&best_delta);
5991
  return true;
5992
}
5993
 
5994
/* Attempts to find the optimal set of induction variables.  We do simple
5995
   greedy heuristic -- we try to replace at most one candidate in the selected
5996
   solution and remove the unused ivs while this improves the cost.  */
5997
 
5998
static struct iv_ca *
5999
find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6000
{
6001
  struct iv_ca *set;
6002
 
6003
  /* Get the initial solution.  */
6004
  set = get_initial_solution (data, originalp);
6005
  if (!set)
6006
    {
6007
      if (dump_file && (dump_flags & TDF_DETAILS))
6008
        fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6009
      return NULL;
6010
    }
6011
 
6012
  if (dump_file && (dump_flags & TDF_DETAILS))
6013
    {
6014
      fprintf (dump_file, "Initial set of candidates:\n");
6015
      iv_ca_dump (data, dump_file, set);
6016
    }
6017
 
6018
  while (try_improve_iv_set (data, set))
6019
    {
6020
      if (dump_file && (dump_flags & TDF_DETAILS))
6021
        {
6022
          fprintf (dump_file, "Improved to:\n");
6023
          iv_ca_dump (data, dump_file, set);
6024
        }
6025
    }
6026
 
6027
  return set;
6028
}
6029
 
6030
static struct iv_ca *
6031
find_optimal_iv_set (struct ivopts_data *data)
6032
{
6033
  unsigned i;
6034
  struct iv_ca *set, *origset;
6035
  struct iv_use *use;
6036
  comp_cost cost, origcost;
6037
 
6038
  /* Determine the cost based on a strategy that starts with original IVs,
6039
     and try again using a strategy that prefers candidates not based
6040
     on any IVs.  */
6041
  origset = find_optimal_iv_set_1 (data, true);
6042
  set = find_optimal_iv_set_1 (data, false);
6043
 
6044
  if (!origset && !set)
6045
    return NULL;
6046
 
6047
  origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6048
  cost = set ? iv_ca_cost (set) : infinite_cost;
6049
 
6050
  if (dump_file && (dump_flags & TDF_DETAILS))
6051
    {
6052
      fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6053
               origcost.cost, origcost.complexity);
6054
      fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6055
               cost.cost, cost.complexity);
6056
    }
6057
 
6058
  /* Choose the one with the best cost.  */
6059
  if (compare_costs (origcost, cost) <= 0)
6060
    {
6061
      if (set)
6062
        iv_ca_free (&set);
6063
      set = origset;
6064
    }
6065
  else if (origset)
6066
    iv_ca_free (&origset);
6067
 
6068
  for (i = 0; i < n_iv_uses (data); i++)
6069
    {
6070
      use = iv_use (data, i);
6071
      use->selected = iv_ca_cand_for_use (set, use)->cand;
6072
    }
6073
 
6074
  return set;
6075
}
6076
 
6077
/* Creates a new induction variable corresponding to CAND.  */
6078
 
6079
static void
6080
create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6081
{
6082
  gimple_stmt_iterator incr_pos;
6083
  tree base;
6084
  bool after = false;
6085
 
6086
  if (!cand->iv)
6087
    return;
6088
 
6089
  switch (cand->pos)
6090
    {
6091
    case IP_NORMAL:
6092
      incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6093
      break;
6094
 
6095
    case IP_END:
6096
      incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6097
      after = true;
6098
      break;
6099
 
6100
    case IP_AFTER_USE:
6101
      after = true;
6102
      /* fall through */
6103
    case IP_BEFORE_USE:
6104
      incr_pos = gsi_for_stmt (cand->incremented_at);
6105
      break;
6106
 
6107
    case IP_ORIGINAL:
6108
      /* Mark that the iv is preserved.  */
6109
      name_info (data, cand->var_before)->preserve_biv = true;
6110
      name_info (data, cand->var_after)->preserve_biv = true;
6111
 
6112
      /* Rewrite the increment so that it uses var_before directly.  */
6113
      find_interesting_uses_op (data, cand->var_after)->selected = cand;
6114
      return;
6115
    }
6116
 
6117
  gimple_add_tmp_var (cand->var_before);
6118
  add_referenced_var (cand->var_before);
6119
 
6120
  base = unshare_expr (cand->iv->base);
6121
 
6122
  create_iv (base, unshare_expr (cand->iv->step),
6123
             cand->var_before, data->current_loop,
6124
             &incr_pos, after, &cand->var_before, &cand->var_after);
6125
}
6126
 
6127
/* Creates new induction variables described in SET.  */
6128
 
6129
static void
6130
create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6131
{
6132
  unsigned i;
6133
  struct iv_cand *cand;
6134
  bitmap_iterator bi;
6135
 
6136
  EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6137
    {
6138
      cand = iv_cand (data, i);
6139
      create_new_iv (data, cand);
6140
    }
6141
 
6142
  if (dump_file && (dump_flags & TDF_DETAILS))
6143
    {
6144
      fprintf (dump_file, "\nSelected IV set: \n");
6145
      EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6146
        {
6147
          cand = iv_cand (data, i);
6148
          dump_cand (dump_file, cand);
6149
        }
6150
      fprintf (dump_file, "\n");
6151
    }
6152
}
6153
 
6154
/* Rewrites USE (definition of iv used in a nonlinear expression)
6155
   using candidate CAND.  */
6156
 
6157
static void
6158
rewrite_use_nonlinear_expr (struct ivopts_data *data,
6159
                            struct iv_use *use, struct iv_cand *cand)
6160
{
6161
  tree comp;
6162
  tree op, tgt;
6163
  gimple ass;
6164
  gimple_stmt_iterator bsi;
6165
 
6166
  /* An important special case -- if we are asked to express value of
6167
     the original iv by itself, just exit; there is no need to
6168
     introduce a new computation (that might also need casting the
6169
     variable to unsigned and back).  */
6170
  if (cand->pos == IP_ORIGINAL
6171
      && cand->incremented_at == use->stmt)
6172
    {
6173
      tree step, ctype, utype;
6174
      enum tree_code incr_code = PLUS_EXPR, old_code;
6175
 
6176
      gcc_assert (is_gimple_assign (use->stmt));
6177
      gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6178
 
6179
      step = cand->iv->step;
6180
      ctype = TREE_TYPE (step);
6181
      utype = TREE_TYPE (cand->var_after);
6182
      if (TREE_CODE (step) == NEGATE_EXPR)
6183
        {
6184
          incr_code = MINUS_EXPR;
6185
          step = TREE_OPERAND (step, 0);
6186
        }
6187
 
6188
      /* Check whether we may leave the computation unchanged.
6189
         This is the case only if it does not rely on other
6190
         computations in the loop -- otherwise, the computation
6191
         we rely upon may be removed in remove_unused_ivs,
6192
         thus leading to ICE.  */
6193
      old_code = gimple_assign_rhs_code (use->stmt);
6194
      if (old_code == PLUS_EXPR
6195
          || old_code == MINUS_EXPR
6196
          || old_code == POINTER_PLUS_EXPR)
6197
        {
6198
          if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6199
            op = gimple_assign_rhs2 (use->stmt);
6200
          else if (old_code != MINUS_EXPR
6201
                   && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6202
            op = gimple_assign_rhs1 (use->stmt);
6203
          else
6204
            op = NULL_TREE;
6205
        }
6206
      else
6207
        op = NULL_TREE;
6208
 
6209
      if (op
6210
          && (TREE_CODE (op) == INTEGER_CST
6211
              || operand_equal_p (op, step, 0)))
6212
        return;
6213
 
6214
      /* Otherwise, add the necessary computations to express
6215
         the iv.  */
6216
      op = fold_convert (ctype, cand->var_before);
6217
      comp = fold_convert (utype,
6218
                           build2 (incr_code, ctype, op,
6219
                                   unshare_expr (step)));
6220
    }
6221
  else
6222
    {
6223
      comp = get_computation (data->current_loop, use, cand);
6224
      gcc_assert (comp != NULL_TREE);
6225
    }
6226
 
6227
  switch (gimple_code (use->stmt))
6228
    {
6229
    case GIMPLE_PHI:
6230
      tgt = PHI_RESULT (use->stmt);
6231
 
6232
      /* If we should keep the biv, do not replace it.  */
6233
      if (name_info (data, tgt)->preserve_biv)
6234
        return;
6235
 
6236
      bsi = gsi_after_labels (gimple_bb (use->stmt));
6237
      break;
6238
 
6239
    case GIMPLE_ASSIGN:
6240
      tgt = gimple_assign_lhs (use->stmt);
6241
      bsi = gsi_for_stmt (use->stmt);
6242
      break;
6243
 
6244
    default:
6245
      gcc_unreachable ();
6246
    }
6247
 
6248
  if (!valid_gimple_rhs_p (comp)
6249
      || (gimple_code (use->stmt) != GIMPLE_PHI
6250
          /* We can't allow re-allocating the stmt as it might be pointed
6251
             to still.  */
6252
          && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6253
              >= gimple_num_ops (gsi_stmt (bsi)))))
6254
    {
6255
      comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6256
                                       true, GSI_SAME_STMT);
6257
      if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6258
        {
6259
          duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6260
          /* As this isn't a plain copy we have to reset alignment
6261
             information.  */
6262
          if (SSA_NAME_PTR_INFO (comp))
6263
            {
6264
              SSA_NAME_PTR_INFO (comp)->align = 1;
6265
              SSA_NAME_PTR_INFO (comp)->misalign = 0;
6266
            }
6267
        }
6268
    }
6269
 
6270
  if (gimple_code (use->stmt) == GIMPLE_PHI)
6271
    {
6272
      ass = gimple_build_assign (tgt, comp);
6273
      gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6274
 
6275
      bsi = gsi_for_stmt (use->stmt);
6276
      remove_phi_node (&bsi, false);
6277
    }
6278
  else
6279
    {
6280
      gimple_assign_set_rhs_from_tree (&bsi, comp);
6281
      use->stmt = gsi_stmt (bsi);
6282
    }
6283
}
6284
 
6285
/* Performs a peephole optimization to reorder the iv update statement with
6286
   a mem ref to enable instruction combining in later phases. The mem ref uses
6287
   the iv value before the update, so the reordering transformation requires
6288
   adjustment of the offset. CAND is the selected IV_CAND.
6289
 
6290
   Example:
6291
 
6292
   t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6293
   iv2 = iv1 + 1;
6294
 
6295
   if (t < val)      (1)
6296
     goto L;
6297
   goto Head;
6298
 
6299
 
6300
   directly propagating t over to (1) will introduce overlapping live range
6301
   thus increase register pressure. This peephole transform it into:
6302
 
6303
 
6304
   iv2 = iv1 + 1;
6305
   t = MEM_REF (base, iv2, 8, 8);
6306
   if (t < val)
6307
     goto L;
6308
   goto Head;
6309
*/
6310
 
6311
static void
6312
adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6313
{
6314
  tree var_after;
6315
  gimple iv_update, stmt;
6316
  basic_block bb;
6317
  gimple_stmt_iterator gsi, gsi_iv;
6318
 
6319
  if (cand->pos != IP_NORMAL)
6320
    return;
6321
 
6322
  var_after = cand->var_after;
6323
  iv_update = SSA_NAME_DEF_STMT (var_after);
6324
 
6325
  bb = gimple_bb (iv_update);
6326
  gsi = gsi_last_nondebug_bb (bb);
6327
  stmt = gsi_stmt (gsi);
6328
 
6329
  /* Only handle conditional statement for now.  */
6330
  if (gimple_code (stmt) != GIMPLE_COND)
6331
    return;
6332
 
6333
  gsi_prev_nondebug (&gsi);
6334
  stmt = gsi_stmt (gsi);
6335
  if (stmt != iv_update)
6336
    return;
6337
 
6338
  gsi_prev_nondebug (&gsi);
6339
  if (gsi_end_p (gsi))
6340
    return;
6341
 
6342
  stmt = gsi_stmt (gsi);
6343
  if (gimple_code (stmt) != GIMPLE_ASSIGN)
6344
    return;
6345
 
6346
  if (stmt != use->stmt)
6347
    return;
6348
 
6349
  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6350
    return;
6351
 
6352
  if (dump_file && (dump_flags & TDF_DETAILS))
6353
    {
6354
      fprintf (dump_file, "Reordering \n");
6355
      print_gimple_stmt (dump_file, iv_update, 0, 0);
6356
      print_gimple_stmt (dump_file, use->stmt, 0, 0);
6357
      fprintf (dump_file, "\n");
6358
    }
6359
 
6360
  gsi = gsi_for_stmt (use->stmt);
6361
  gsi_iv = gsi_for_stmt (iv_update);
6362
  gsi_move_before (&gsi_iv, &gsi);
6363
 
6364
  cand->pos = IP_BEFORE_USE;
6365
  cand->incremented_at = use->stmt;
6366
}
6367
 
6368
/* Rewrites USE (address that is an iv) using candidate CAND.  */
6369
 
6370
static void
6371
rewrite_use_address (struct ivopts_data *data,
6372
                     struct iv_use *use, struct iv_cand *cand)
6373
{
6374
  aff_tree aff;
6375
  gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6376
  tree base_hint = NULL_TREE;
6377
  tree ref, iv;
6378
  bool ok;
6379
 
6380
  adjust_iv_update_pos (cand, use);
6381
  ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6382
  gcc_assert (ok);
6383
  unshare_aff_combination (&aff);
6384
 
6385
  /* To avoid undefined overflow problems, all IV candidates use unsigned
6386
     integer types.  The drawback is that this makes it impossible for
6387
     create_mem_ref to distinguish an IV that is based on a memory object
6388
     from one that represents simply an offset.
6389
 
6390
     To work around this problem, we pass a hint to create_mem_ref that
6391
     indicates which variable (if any) in aff is an IV based on a memory
6392
     object.  Note that we only consider the candidate.  If this is not
6393
     based on an object, the base of the reference is in some subexpression
6394
     of the use -- but these will use pointer types, so they are recognized
6395
     by the create_mem_ref heuristics anyway.  */
6396
  if (cand->iv->base_object)
6397
    base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6398
 
6399
  iv = var_at_stmt (data->current_loop, cand, use->stmt);
6400
  ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6401
                        reference_alias_ptr_type (*use->op_p),
6402
                        iv, base_hint, data->speed);
6403
  copy_ref_info (ref, *use->op_p);
6404
  *use->op_p = ref;
6405
}
6406
 
6407
/* Rewrites USE (the condition such that one of the arguments is an iv) using
6408
   candidate CAND.  */
6409
 
6410
static void
6411
rewrite_use_compare (struct ivopts_data *data,
6412
                     struct iv_use *use, struct iv_cand *cand)
6413
{
6414
  tree comp, *var_p, op, bound;
6415
  gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6416
  enum tree_code compare;
6417
  struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6418
  bool ok;
6419
 
6420
  bound = cp->value;
6421
  if (bound)
6422
    {
6423
      tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6424
      tree var_type = TREE_TYPE (var);
6425
      gimple_seq stmts;
6426
 
6427
      if (dump_file && (dump_flags & TDF_DETAILS))
6428
        {
6429
          fprintf (dump_file, "Replacing exit test: ");
6430
          print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6431
        }
6432
      compare = cp->comp;
6433
      bound = unshare_expr (fold_convert (var_type, bound));
6434
      op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6435
      if (stmts)
6436
        gsi_insert_seq_on_edge_immediate (
6437
                loop_preheader_edge (data->current_loop),
6438
                stmts);
6439
 
6440
      gimple_cond_set_lhs (use->stmt, var);
6441
      gimple_cond_set_code (use->stmt, compare);
6442
      gimple_cond_set_rhs (use->stmt, op);
6443
      return;
6444
    }
6445
 
6446
  /* The induction variable elimination failed; just express the original
6447
     giv.  */
6448
  comp = get_computation (data->current_loop, use, cand);
6449
  gcc_assert (comp != NULL_TREE);
6450
 
6451
  ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6452
  gcc_assert (ok);
6453
 
6454
  *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6455
                                     true, GSI_SAME_STMT);
6456
}
6457
 
6458
/* Rewrites USE using candidate CAND.  */
6459
 
6460
static void
6461
rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6462
{
6463
  switch (use->type)
6464
    {
6465
      case USE_NONLINEAR_EXPR:
6466
        rewrite_use_nonlinear_expr (data, use, cand);
6467
        break;
6468
 
6469
      case USE_ADDRESS:
6470
        rewrite_use_address (data, use, cand);
6471
        break;
6472
 
6473
      case USE_COMPARE:
6474
        rewrite_use_compare (data, use, cand);
6475
        break;
6476
 
6477
      default:
6478
        gcc_unreachable ();
6479
    }
6480
 
6481
  update_stmt (use->stmt);
6482
}
6483
 
6484
/* Rewrite the uses using the selected induction variables.  */
6485
 
6486
static void
6487
rewrite_uses (struct ivopts_data *data)
6488
{
6489
  unsigned i;
6490
  struct iv_cand *cand;
6491
  struct iv_use *use;
6492
 
6493
  for (i = 0; i < n_iv_uses (data); i++)
6494
    {
6495
      use = iv_use (data, i);
6496
      cand = use->selected;
6497
      gcc_assert (cand);
6498
 
6499
      rewrite_use (data, use, cand);
6500
    }
6501
}
6502
 
6503
/* Removes the ivs that are not used after rewriting.  */
6504
 
6505
static void
6506
remove_unused_ivs (struct ivopts_data *data)
6507
{
6508
  unsigned j;
6509
  bitmap_iterator bi;
6510
  bitmap toremove = BITMAP_ALLOC (NULL);
6511
 
6512
  /* Figure out an order in which to release SSA DEFs so that we don't
6513
     release something that we'd have to propagate into a debug stmt
6514
     afterwards.  */
6515
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6516
    {
6517
      struct version_info *info;
6518
 
6519
      info = ver_info (data, j);
6520
      if (info->iv
6521
          && !integer_zerop (info->iv->step)
6522
          && !info->inv_id
6523
          && !info->iv->have_use_for
6524
          && !info->preserve_biv)
6525
        bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6526
    }
6527
 
6528
  release_defs_bitset (toremove);
6529
 
6530
  BITMAP_FREE (toremove);
6531
}
6532
 
6533
/* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6534
   for pointer_map_traverse.  */
6535
 
6536
static bool
6537
free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6538
                      void *data ATTRIBUTE_UNUSED)
6539
{
6540
  struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6541
 
6542
  free (niter);
6543
  return true;
6544
}
6545
 
6546
/* Frees data allocated by the optimization of a single loop.  */
6547
 
6548
static void
6549
free_loop_data (struct ivopts_data *data)
6550
{
6551
  unsigned i, j;
6552
  bitmap_iterator bi;
6553
  tree obj;
6554
 
6555
  if (data->niters)
6556
    {
6557
      pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6558
      pointer_map_destroy (data->niters);
6559
      data->niters = NULL;
6560
    }
6561
 
6562
  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6563
    {
6564
      struct version_info *info;
6565
 
6566
      info = ver_info (data, i);
6567
      free (info->iv);
6568
      info->iv = NULL;
6569
      info->has_nonlin_use = false;
6570
      info->preserve_biv = false;
6571
      info->inv_id = 0;
6572
    }
6573
  bitmap_clear (data->relevant);
6574
  bitmap_clear (data->important_candidates);
6575
 
6576
  for (i = 0; i < n_iv_uses (data); i++)
6577
    {
6578
      struct iv_use *use = iv_use (data, i);
6579
 
6580
      free (use->iv);
6581
      BITMAP_FREE (use->related_cands);
6582
      for (j = 0; j < use->n_map_members; j++)
6583
        if (use->cost_map[j].depends_on)
6584
          BITMAP_FREE (use->cost_map[j].depends_on);
6585
      free (use->cost_map);
6586
      free (use);
6587
    }
6588
  VEC_truncate (iv_use_p, data->iv_uses, 0);
6589
 
6590
  for (i = 0; i < n_iv_cands (data); i++)
6591
    {
6592
      struct iv_cand *cand = iv_cand (data, i);
6593
 
6594
      free (cand->iv);
6595
      if (cand->depends_on)
6596
        BITMAP_FREE (cand->depends_on);
6597
      free (cand);
6598
    }
6599
  VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6600
 
6601
  if (data->version_info_size < num_ssa_names)
6602
    {
6603
      data->version_info_size = 2 * num_ssa_names;
6604
      free (data->version_info);
6605
      data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6606
    }
6607
 
6608
  data->max_inv_id = 0;
6609
 
6610
  FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6611
    SET_DECL_RTL (obj, NULL_RTX);
6612
 
6613
  VEC_truncate (tree, decl_rtl_to_reset, 0);
6614
 
6615
  htab_empty (data->inv_expr_tab);
6616
  data->inv_expr_id = 0;
6617
}
6618
 
6619
/* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6620
   loop tree.  */
6621
 
6622
static void
6623
tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6624
{
6625
  free_loop_data (data);
6626
  free (data->version_info);
6627
  BITMAP_FREE (data->relevant);
6628
  BITMAP_FREE (data->important_candidates);
6629
 
6630
  VEC_free (tree, heap, decl_rtl_to_reset);
6631
  VEC_free (iv_use_p, heap, data->iv_uses);
6632
  VEC_free (iv_cand_p, heap, data->iv_candidates);
6633
  htab_delete (data->inv_expr_tab);
6634
}
6635
 
6636
/* Returns true if the loop body BODY includes any function calls.  */
6637
 
6638
static bool
6639
loop_body_includes_call (basic_block *body, unsigned num_nodes)
6640
{
6641
  gimple_stmt_iterator gsi;
6642
  unsigned i;
6643
 
6644
  for (i = 0; i < num_nodes; i++)
6645
    for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6646
      {
6647
        gimple stmt = gsi_stmt (gsi);
6648
        if (is_gimple_call (stmt)
6649
            && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6650
          return true;
6651
      }
6652
  return false;
6653
}
6654
 
6655
/* Optimizes the LOOP.  Returns true if anything changed.  */
6656
 
6657
static bool
6658
tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6659
{
6660
  bool changed = false;
6661
  struct iv_ca *iv_ca;
6662
  edge exit = single_dom_exit (loop);
6663
  basic_block *body;
6664
 
6665
  gcc_assert (!data->niters);
6666
  data->current_loop = loop;
6667
  data->speed = optimize_loop_for_speed_p (loop);
6668
 
6669
  if (dump_file && (dump_flags & TDF_DETAILS))
6670
    {
6671
      fprintf (dump_file, "Processing loop %d\n", loop->num);
6672
 
6673
      if (exit)
6674
        {
6675
          fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6676
                   exit->src->index, exit->dest->index);
6677
          print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6678
          fprintf (dump_file, "\n");
6679
        }
6680
 
6681
      fprintf (dump_file, "\n");
6682
    }
6683
 
6684
  body = get_loop_body (loop);
6685
  data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6686
  renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6687
  free (body);
6688
 
6689
  data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6690
 
6691
  /* For each ssa name determines whether it behaves as an induction variable
6692
     in some loop.  */
6693
  if (!find_induction_variables (data))
6694
    goto finish;
6695
 
6696
  /* Finds interesting uses (item 1).  */
6697
  find_interesting_uses (data);
6698
  if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6699
    goto finish;
6700
 
6701
  /* Finds candidates for the induction variables (item 2).  */
6702
  find_iv_candidates (data);
6703
 
6704
  /* Calculates the costs (item 3, part 1).  */
6705
  determine_iv_costs (data);
6706
  determine_use_iv_costs (data);
6707
  determine_set_costs (data);
6708
 
6709
  /* Find the optimal set of induction variables (item 3, part 2).  */
6710
  iv_ca = find_optimal_iv_set (data);
6711
  if (!iv_ca)
6712
    goto finish;
6713
  changed = true;
6714
 
6715
  /* Create the new induction variables (item 4, part 1).  */
6716
  create_new_ivs (data, iv_ca);
6717
  iv_ca_free (&iv_ca);
6718
 
6719
  /* Rewrite the uses (item 4, part 2).  */
6720
  rewrite_uses (data);
6721
 
6722
  /* Remove the ivs that are unused after rewriting.  */
6723
  remove_unused_ivs (data);
6724
 
6725
  /* We have changed the structure of induction variables; it might happen
6726
     that definitions in the scev database refer to some of them that were
6727
     eliminated.  */
6728
  scev_reset ();
6729
 
6730
finish:
6731
  free_loop_data (data);
6732
 
6733
  return changed;
6734
}
6735
 
6736
/* Main entry point.  Optimizes induction variables in loops.  */
6737
 
6738
void
6739
tree_ssa_iv_optimize (void)
6740
{
6741
  struct loop *loop;
6742
  struct ivopts_data data;
6743
  loop_iterator li;
6744
 
6745
  tree_ssa_iv_optimize_init (&data);
6746
 
6747
  /* Optimize the loops starting with the innermost ones.  */
6748
  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6749
    {
6750
      if (dump_file && (dump_flags & TDF_DETAILS))
6751
        flow_loop_dump (loop, dump_file, NULL, 1);
6752
 
6753
      tree_ssa_iv_optimize_loop (&data, loop);
6754
    }
6755
 
6756
  tree_ssa_iv_optimize_finalize (&data);
6757
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.