OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [gcse.c] - Blame information for rev 774

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Partial redundancy elimination / Hoisting for RTL.
2
   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
3
   2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it under
8
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 3, or (at your option) any later
10
version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13
WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
/* TODO
22
   - reordering of memory allocation and freeing to be more space efficient
23
   - do rough calc of how many regs are needed in each block, and a rough
24
     calc of how many regs are available in each class and use that to
25
     throttle back the code in cases where RTX_COST is minimal.
26
*/
27
 
28
/* References searched while implementing this.
29
 
30
   Compilers Principles, Techniques and Tools
31
   Aho, Sethi, Ullman
32
   Addison-Wesley, 1988
33
 
34
   Global Optimization by Suppression of Partial Redundancies
35
   E. Morel, C. Renvoise
36
   communications of the acm, Vol. 22, Num. 2, Feb. 1979
37
 
38
   A Portable Machine-Independent Global Optimizer - Design and Measurements
39
   Frederick Chow
40
   Stanford Ph.D. thesis, Dec. 1983
41
 
42
   A Fast Algorithm for Code Movement Optimization
43
   D.M. Dhamdhere
44
   SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
45
 
46
   A Solution to a Problem with Morel and Renvoise's
47
   Global Optimization by Suppression of Partial Redundancies
48
   K-H Drechsler, M.P. Stadel
49
   ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
50
 
51
   Practical Adaptation of the Global Optimization
52
   Algorithm of Morel and Renvoise
53
   D.M. Dhamdhere
54
   ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
55
 
56
   Efficiently Computing Static Single Assignment Form and the Control
57
   Dependence Graph
58
   R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
59
   ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
60
 
61
   Lazy Code Motion
62
   J. Knoop, O. Ruthing, B. Steffen
63
   ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
64
 
65
   What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
66
   Time for Reducible Flow Control
67
   Thomas Ball
68
   ACM Letters on Programming Languages and Systems,
69
   Vol. 2, Num. 1-4, Mar-Dec 1993
70
 
71
   An Efficient Representation for Sparse Sets
72
   Preston Briggs, Linda Torczon
73
   ACM Letters on Programming Languages and Systems,
74
   Vol. 2, Num. 1-4, Mar-Dec 1993
75
 
76
   A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
77
   K-H Drechsler, M.P. Stadel
78
   ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
79
 
80
   Partial Dead Code Elimination
81
   J. Knoop, O. Ruthing, B. Steffen
82
   ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
83
 
84
   Effective Partial Redundancy Elimination
85
   P. Briggs, K.D. Cooper
86
   ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
87
 
88
   The Program Structure Tree: Computing Control Regions in Linear Time
89
   R. Johnson, D. Pearson, K. Pingali
90
   ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
91
 
92
   Optimal Code Motion: Theory and Practice
93
   J. Knoop, O. Ruthing, B. Steffen
94
   ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
95
 
96
   The power of assignment motion
97
   J. Knoop, O. Ruthing, B. Steffen
98
   ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
99
 
100
   Global code motion / global value numbering
101
   C. Click
102
   ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
103
 
104
   Value Driven Redundancy Elimination
105
   L.T. Simpson
106
   Rice University Ph.D. thesis, Apr. 1996
107
 
108
   Value Numbering
109
   L.T. Simpson
110
   Massively Scalar Compiler Project, Rice University, Sep. 1996
111
 
112
   High Performance Compilers for Parallel Computing
113
   Michael Wolfe
114
   Addison-Wesley, 1996
115
 
116
   Advanced Compiler Design and Implementation
117
   Steven Muchnick
118
   Morgan Kaufmann, 1997
119
 
120
   Building an Optimizing Compiler
121
   Robert Morgan
122
   Digital Press, 1998
123
 
124
   People wishing to speed up the code here should read:
125
     Elimination Algorithms for Data Flow Analysis
126
     B.G. Ryder, M.C. Paull
127
     ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
128
 
129
     How to Analyze Large Programs Efficiently and Informatively
130
     D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
131
     ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
132
 
133
   People wishing to do something different can find various possibilities
134
   in the above papers and elsewhere.
135
*/
136
 
137
#include "config.h"
138
#include "system.h"
139
#include "coretypes.h"
140
#include "tm.h"
141
#include "diagnostic-core.h"
142
#include "toplev.h"
143
 
144
#include "rtl.h"
145
#include "tree.h"
146
#include "tm_p.h"
147
#include "regs.h"
148
#include "hard-reg-set.h"
149
#include "flags.h"
150
#include "insn-config.h"
151
#include "recog.h"
152
#include "basic-block.h"
153
#include "output.h"
154
#include "function.h"
155
#include "expr.h"
156
#include "except.h"
157
#include "ggc.h"
158
#include "params.h"
159
#include "cselib.h"
160
#include "intl.h"
161
#include "obstack.h"
162
#include "timevar.h"
163
#include "tree-pass.h"
164
#include "hashtab.h"
165
#include "df.h"
166
#include "dbgcnt.h"
167
#include "target.h"
168
#include "gcse.h"
169
 
170
/* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
171
   are a superset of those done by classic GCSE.
172
 
173
   Two passes of copy/constant propagation are done around PRE or hoisting
174
   because the first one enables more GCSE and the second one helps to clean
175
   up the copies that PRE and HOIST create.  This is needed more for PRE than
176
   for HOIST because code hoisting will try to use an existing register
177
   containing the common subexpression rather than create a new one.  This is
178
   harder to do for PRE because of the code motion (which HOIST doesn't do).
179
 
180
   Expressions we are interested in GCSE-ing are of the form
181
   (set (pseudo-reg) (expression)).
182
   Function want_to_gcse_p says what these are.
183
 
184
   In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
185
   This allows PRE to hoist expressions that are expressed in multiple insns,
186
   such as complex address calculations (e.g. for PIC code, or loads with a
187
   high part and a low part).
188
 
189
   PRE handles moving invariant expressions out of loops (by treating them as
190
   partially redundant).
191
 
192
   **********************
193
 
194
   We used to support multiple passes but there are diminishing returns in
195
   doing so.  The first pass usually makes 90% of the changes that are doable.
196
   A second pass can make a few more changes made possible by the first pass.
197
   Experiments show any further passes don't make enough changes to justify
198
   the expense.
199
 
200
   A study of spec92 using an unlimited number of passes:
201
   [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
202
   [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
203
   [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
204
 
205
   It was found doing copy propagation between each pass enables further
206
   substitutions.
207
 
208
   This study was done before expressions in REG_EQUAL notes were added as
209
   candidate expressions for optimization, and before the GIMPLE optimizers
210
   were added.  Probably, multiple passes is even less efficient now than
211
   at the time when the study was conducted.
212
 
213
   PRE is quite expensive in complicated functions because the DFA can take
214
   a while to converge.  Hence we only perform one pass.
215
 
216
   **********************
217
 
218
   The steps for PRE are:
219
 
220
   1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
221
 
222
   2) Perform the data flow analysis for PRE.
223
 
224
   3) Delete the redundant instructions
225
 
226
   4) Insert the required copies [if any] that make the partially
227
      redundant instructions fully redundant.
228
 
229
   5) For other reaching expressions, insert an instruction to copy the value
230
      to a newly created pseudo that will reach the redundant instruction.
231
 
232
   The deletion is done first so that when we do insertions we
233
   know which pseudo reg to use.
234
 
235
   Various papers have argued that PRE DFA is expensive (O(n^2)) and others
236
   argue it is not.  The number of iterations for the algorithm to converge
237
   is typically 2-4 so I don't view it as that expensive (relatively speaking).
238
 
239
   PRE GCSE depends heavily on the second CPROP pass to clean up the copies
240
   we create.  To make an expression reach the place where it's redundant,
241
   the result of the expression is copied to a new register, and the redundant
242
   expression is deleted by replacing it with this new register.  Classic GCSE
243
   doesn't have this problem as much as it computes the reaching defs of
244
   each register in each block and thus can try to use an existing
245
   register.  */
246
 
247
/* GCSE global vars.  */
248
 
249
struct target_gcse default_target_gcse;
250
#if SWITCHABLE_TARGET
251
struct target_gcse *this_target_gcse = &default_target_gcse;
252
#endif
253
 
254
/* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
255
int flag_rerun_cse_after_global_opts;
256
 
257
/* An obstack for our working variables.  */
258
static struct obstack gcse_obstack;
259
 
260
struct reg_use {rtx reg_rtx; };
261
 
262
/* Hash table of expressions.  */
263
 
264
struct expr
265
{
266
  /* The expression.  */
267
  rtx expr;
268
  /* Index in the available expression bitmaps.  */
269
  int bitmap_index;
270
  /* Next entry with the same hash.  */
271
  struct expr *next_same_hash;
272
  /* List of anticipatable occurrences in basic blocks in the function.
273
     An "anticipatable occurrence" is one that is the first occurrence in the
274
     basic block, the operands are not modified in the basic block prior
275
     to the occurrence and the output is not used between the start of
276
     the block and the occurrence.  */
277
  struct occr *antic_occr;
278
  /* List of available occurrence in basic blocks in the function.
279
     An "available occurrence" is one that is the last occurrence in the
280
     basic block and the operands are not modified by following statements in
281
     the basic block [including this insn].  */
282
  struct occr *avail_occr;
283
  /* Non-null if the computation is PRE redundant.
284
     The value is the newly created pseudo-reg to record a copy of the
285
     expression in all the places that reach the redundant copy.  */
286
  rtx reaching_reg;
287
  /* Maximum distance in instructions this expression can travel.
288
     We avoid moving simple expressions for more than a few instructions
289
     to keep register pressure under control.
290
     A value of "0" removes restrictions on how far the expression can
291
     travel.  */
292
  int max_distance;
293
};
294
 
295
/* Occurrence of an expression.
296
   There is one per basic block.  If a pattern appears more than once the
297
   last appearance is used [or first for anticipatable expressions].  */
298
 
299
struct occr
300
{
301
  /* Next occurrence of this expression.  */
302
  struct occr *next;
303
  /* The insn that computes the expression.  */
304
  rtx insn;
305
  /* Nonzero if this [anticipatable] occurrence has been deleted.  */
306
  char deleted_p;
307
  /* Nonzero if this [available] occurrence has been copied to
308
     reaching_reg.  */
309
  /* ??? This is mutually exclusive with deleted_p, so they could share
310
     the same byte.  */
311
  char copied_p;
312
};
313
 
314
typedef struct occr *occr_t;
315
DEF_VEC_P (occr_t);
316
DEF_VEC_ALLOC_P (occr_t, heap);
317
 
318
/* Expression hash tables.
319
   Each hash table is an array of buckets.
320
   ??? It is known that if it were an array of entries, structure elements
321
   `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
322
   not clear whether in the final analysis a sufficient amount of memory would
323
   be saved as the size of the available expression bitmaps would be larger
324
   [one could build a mapping table without holes afterwards though].
325
   Someday I'll perform the computation and figure it out.  */
326
 
327
struct hash_table_d
328
{
329
  /* The table itself.
330
     This is an array of `expr_hash_table_size' elements.  */
331
  struct expr **table;
332
 
333
  /* Size of the hash table, in elements.  */
334
  unsigned int size;
335
 
336
  /* Number of hash table elements.  */
337
  unsigned int n_elems;
338
};
339
 
340
/* Expression hash table.  */
341
static struct hash_table_d expr_hash_table;
342
 
343
/* This is a list of expressions which are MEMs and will be used by load
344
   or store motion.
345
   Load motion tracks MEMs which aren't killed by anything except itself,
346
   i.e. loads and stores to a single location.
347
   We can then allow movement of these MEM refs with a little special
348
   allowance. (all stores copy the same value to the reaching reg used
349
   for the loads).  This means all values used to store into memory must have
350
   no side effects so we can re-issue the setter value.  */
351
 
352
struct ls_expr
353
{
354
  struct expr * expr;           /* Gcse expression reference for LM.  */
355
  rtx pattern;                  /* Pattern of this mem.  */
356
  rtx pattern_regs;             /* List of registers mentioned by the mem.  */
357
  rtx loads;                    /* INSN list of loads seen.  */
358
  rtx stores;                   /* INSN list of stores seen.  */
359
  struct ls_expr * next;        /* Next in the list.  */
360
  int invalid;                  /* Invalid for some reason.  */
361
  int index;                    /* If it maps to a bitmap index.  */
362
  unsigned int hash_index;      /* Index when in a hash table.  */
363
  rtx reaching_reg;             /* Register to use when re-writing.  */
364
};
365
 
366
/* Head of the list of load/store memory refs.  */
367
static struct ls_expr * pre_ldst_mems = NULL;
368
 
369
/* Hashtable for the load/store memory refs.  */
370
static htab_t pre_ldst_table = NULL;
371
 
372
/* Bitmap containing one bit for each register in the program.
373
   Used when performing GCSE to track which registers have been set since
374
   the start of the basic block.  */
375
static regset reg_set_bitmap;
376
 
377
/* Array, indexed by basic block number for a list of insns which modify
378
   memory within that block.  */
379
static VEC (rtx,heap) **modify_mem_list;
380
static bitmap modify_mem_list_set;
381
 
382
typedef struct modify_pair_s
383
{
384
  rtx dest;                     /* A MEM.  */
385
  rtx dest_addr;                /* The canonical address of `dest'.  */
386
} modify_pair;
387
 
388
DEF_VEC_O(modify_pair);
389
DEF_VEC_ALLOC_O(modify_pair,heap);
390
 
391
/* This array parallels modify_mem_list, except that it stores MEMs
392
   being set and their canonicalized memory addresses.  */
393
static VEC (modify_pair,heap) **canon_modify_mem_list;
394
 
395
/* Bitmap indexed by block numbers to record which blocks contain
396
   function calls.  */
397
static bitmap blocks_with_calls;
398
 
399
/* Various variables for statistics gathering.  */
400
 
401
/* Memory used in a pass.
402
   This isn't intended to be absolutely precise.  Its intent is only
403
   to keep an eye on memory usage.  */
404
static int bytes_used;
405
 
406
/* GCSE substitutions made.  */
407
static int gcse_subst_count;
408
/* Number of copy instructions created.  */
409
static int gcse_create_count;
410
 
411
/* Doing code hoisting.  */
412
static bool doing_code_hoisting_p = false;
413
 
414
/* For available exprs */
415
static sbitmap *ae_kill;
416
 
417
static void compute_can_copy (void);
418
static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
419
static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
420
static void *gcse_alloc (unsigned long);
421
static void alloc_gcse_mem (void);
422
static void free_gcse_mem (void);
423
static void hash_scan_insn (rtx, struct hash_table_d *);
424
static void hash_scan_set (rtx, rtx, struct hash_table_d *);
425
static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
426
static void hash_scan_call (rtx, rtx, struct hash_table_d *);
427
static int want_to_gcse_p (rtx, int *);
428
static int oprs_unchanged_p (const_rtx, const_rtx, int);
429
static int oprs_anticipatable_p (const_rtx, const_rtx);
430
static int oprs_available_p (const_rtx, const_rtx);
431
static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
432
                                  struct hash_table_d *);
433
static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
434
static int expr_equiv_p (const_rtx, const_rtx);
435
static void record_last_reg_set_info (rtx, int);
436
static void record_last_mem_set_info (rtx);
437
static void record_last_set_info (rtx, const_rtx, void *);
438
static void compute_hash_table (struct hash_table_d *);
439
static void alloc_hash_table (struct hash_table_d *);
440
static void free_hash_table (struct hash_table_d *);
441
static void compute_hash_table_work (struct hash_table_d *);
442
static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
443
static void compute_transp (const_rtx, int, sbitmap *);
444
static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
445
                                      struct hash_table_d *);
446
static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
447
static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
448
static void canon_list_insert (rtx, const_rtx, void *);
449
static void alloc_pre_mem (int, int);
450
static void free_pre_mem (void);
451
static struct edge_list *compute_pre_data (void);
452
static int pre_expr_reaches_here_p (basic_block, struct expr *,
453
                                    basic_block);
454
static void insert_insn_end_basic_block (struct expr *, basic_block);
455
static void pre_insert_copy_insn (struct expr *, rtx);
456
static void pre_insert_copies (void);
457
static int pre_delete (void);
458
static int pre_gcse (struct edge_list *);
459
static int one_pre_gcse_pass (void);
460
static void add_label_notes (rtx, rtx);
461
static void alloc_code_hoist_mem (int, int);
462
static void free_code_hoist_mem (void);
463
static void compute_code_hoist_vbeinout (void);
464
static void compute_code_hoist_data (void);
465
static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
466
                                      int, int *);
467
static int hoist_code (void);
468
static int one_code_hoisting_pass (void);
469
static rtx process_insert_insn (struct expr *);
470
static int pre_edge_insert (struct edge_list *, struct expr **);
471
static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
472
                                         basic_block, char *);
473
static struct ls_expr * ldst_entry (rtx);
474
static void free_ldst_entry (struct ls_expr *);
475
static void free_ld_motion_mems (void);
476
static void print_ldst_list (FILE *);
477
static struct ls_expr * find_rtx_in_ldst (rtx);
478
static int simple_mem (const_rtx);
479
static void invalidate_any_buried_refs (rtx);
480
static void compute_ld_motion_mems (void);
481
static void trim_ld_motion_mems (void);
482
static void update_ld_motion_stores (struct expr *);
483
static void clear_modify_mem_tables (void);
484
static void free_modify_mem_tables (void);
485
static rtx gcse_emit_move_after (rtx, rtx, rtx);
486
static bool is_too_expensive (const char *);
487
 
488
#define GNEW(T)                 ((T *) gmalloc (sizeof (T)))
489
#define GCNEW(T)                ((T *) gcalloc (1, sizeof (T)))
490
 
491
#define GNEWVEC(T, N)           ((T *) gmalloc (sizeof (T) * (N)))
492
#define GCNEWVEC(T, N)          ((T *) gcalloc ((N), sizeof (T)))
493
 
494
#define GNEWVAR(T, S)           ((T *) gmalloc ((S)))
495
#define GCNEWVAR(T, S)          ((T *) gcalloc (1, (S)))
496
 
497
#define GOBNEW(T)               ((T *) gcse_alloc (sizeof (T)))
498
#define GOBNEWVAR(T, S)         ((T *) gcse_alloc ((S)))
499
 
500
/* Misc. utilities.  */
501
 
502
#define can_copy \
503
  (this_target_gcse->x_can_copy)
504
#define can_copy_init_p \
505
  (this_target_gcse->x_can_copy_init_p)
506
 
507
/* Compute which modes support reg/reg copy operations.  */
508
 
509
static void
510
compute_can_copy (void)
511
{
512
  int i;
513
#ifndef AVOID_CCMODE_COPIES
514
  rtx reg, insn;
515
#endif
516
  memset (can_copy, 0, NUM_MACHINE_MODES);
517
 
518
  start_sequence ();
519
  for (i = 0; i < NUM_MACHINE_MODES; i++)
520
    if (GET_MODE_CLASS (i) == MODE_CC)
521
      {
522
#ifdef AVOID_CCMODE_COPIES
523
        can_copy[i] = 0;
524
#else
525
        reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
526
        insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
527
        if (recog (PATTERN (insn), insn, NULL) >= 0)
528
          can_copy[i] = 1;
529
#endif
530
      }
531
    else
532
      can_copy[i] = 1;
533
 
534
  end_sequence ();
535
}
536
 
537
/* Returns whether the mode supports reg/reg copy operations.  */
538
 
539
bool
540
can_copy_p (enum machine_mode mode)
541
{
542
  if (! can_copy_init_p)
543
    {
544
      compute_can_copy ();
545
      can_copy_init_p = true;
546
    }
547
 
548
  return can_copy[mode] != 0;
549
}
550
 
551
/* Cover function to xmalloc to record bytes allocated.  */
552
 
553
static void *
554
gmalloc (size_t size)
555
{
556
  bytes_used += size;
557
  return xmalloc (size);
558
}
559
 
560
/* Cover function to xcalloc to record bytes allocated.  */
561
 
562
static void *
563
gcalloc (size_t nelem, size_t elsize)
564
{
565
  bytes_used += nelem * elsize;
566
  return xcalloc (nelem, elsize);
567
}
568
 
569
/* Cover function to obstack_alloc.  */
570
 
571
static void *
572
gcse_alloc (unsigned long size)
573
{
574
  bytes_used += size;
575
  return obstack_alloc (&gcse_obstack, size);
576
}
577
 
578
/* Allocate memory for the reg/memory set tracking tables.
579
   This is called at the start of each pass.  */
580
 
581
static void
582
alloc_gcse_mem (void)
583
{
584
  /* Allocate vars to track sets of regs.  */
585
  reg_set_bitmap = ALLOC_REG_SET (NULL);
586
 
587
  /* Allocate array to keep a list of insns which modify memory in each
588
     basic block.  */
589
  modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
590
  canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
591
                                    last_basic_block);
592
  modify_mem_list_set = BITMAP_ALLOC (NULL);
593
  blocks_with_calls = BITMAP_ALLOC (NULL);
594
}
595
 
596
/* Free memory allocated by alloc_gcse_mem.  */
597
 
598
static void
599
free_gcse_mem (void)
600
{
601
  FREE_REG_SET (reg_set_bitmap);
602
 
603
  free_modify_mem_tables ();
604
  BITMAP_FREE (modify_mem_list_set);
605
  BITMAP_FREE (blocks_with_calls);
606
}
607
 
608
/* Compute the local properties of each recorded expression.
609
 
610
   Local properties are those that are defined by the block, irrespective of
611
   other blocks.
612
 
613
   An expression is transparent in a block if its operands are not modified
614
   in the block.
615
 
616
   An expression is computed (locally available) in a block if it is computed
617
   at least once and expression would contain the same value if the
618
   computation was moved to the end of the block.
619
 
620
   An expression is locally anticipatable in a block if it is computed at
621
   least once and expression would contain the same value if the computation
622
   was moved to the beginning of the block.
623
 
624
   We call this routine for pre and code hoisting.  They all compute
625
   basically the same information and thus can easily share this code.
626
 
627
   TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
628
   properties.  If NULL, then it is not necessary to compute or record that
629
   particular property.
630
 
631
   TABLE controls which hash table to look at.  */
632
 
633
static void
634
compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
635
                          struct hash_table_d *table)
636
{
637
  unsigned int i;
638
 
639
  /* Initialize any bitmaps that were passed in.  */
640
  if (transp)
641
    {
642
      sbitmap_vector_ones (transp, last_basic_block);
643
    }
644
 
645
  if (comp)
646
    sbitmap_vector_zero (comp, last_basic_block);
647
  if (antloc)
648
    sbitmap_vector_zero (antloc, last_basic_block);
649
 
650
  for (i = 0; i < table->size; i++)
651
    {
652
      struct expr *expr;
653
 
654
      for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
655
        {
656
          int indx = expr->bitmap_index;
657
          struct occr *occr;
658
 
659
          /* The expression is transparent in this block if it is not killed.
660
             We start by assuming all are transparent [none are killed], and
661
             then reset the bits for those that are.  */
662
          if (transp)
663
            compute_transp (expr->expr, indx, transp);
664
 
665
          /* The occurrences recorded in antic_occr are exactly those that
666
             we want to set to nonzero in ANTLOC.  */
667
          if (antloc)
668
            for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
669
              {
670
                SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
671
 
672
                /* While we're scanning the table, this is a good place to
673
                   initialize this.  */
674
                occr->deleted_p = 0;
675
              }
676
 
677
          /* The occurrences recorded in avail_occr are exactly those that
678
             we want to set to nonzero in COMP.  */
679
          if (comp)
680
            for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
681
              {
682
                SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
683
 
684
                /* While we're scanning the table, this is a good place to
685
                   initialize this.  */
686
                occr->copied_p = 0;
687
              }
688
 
689
          /* While we're scanning the table, this is a good place to
690
             initialize this.  */
691
          expr->reaching_reg = 0;
692
        }
693
    }
694
}
695
 
696
/* Hash table support.  */
697
 
698
struct reg_avail_info
699
{
700
  basic_block last_bb;
701
  int first_set;
702
  int last_set;
703
};
704
 
705
static struct reg_avail_info *reg_avail_info;
706
static basic_block current_bb;
707
 
708
/* See whether X, the source of a set, is something we want to consider for
709
   GCSE.  */
710
 
711
static int
712
want_to_gcse_p (rtx x, int *max_distance_ptr)
713
{
714
#ifdef STACK_REGS
715
  /* On register stack architectures, don't GCSE constants from the
716
     constant pool, as the benefits are often swamped by the overhead
717
     of shuffling the register stack between basic blocks.  */
718
  if (IS_STACK_MODE (GET_MODE (x)))
719
    x = avoid_constant_pool_reference (x);
720
#endif
721
 
722
  /* GCSE'ing constants:
723
 
724
     We do not specifically distinguish between constant and non-constant
725
     expressions in PRE and Hoist.  We use set_src_cost below to limit
726
     the maximum distance simple expressions can travel.
727
 
728
     Nevertheless, constants are much easier to GCSE, and, hence,
729
     it is easy to overdo the optimizations.  Usually, excessive PRE and
730
     Hoisting of constant leads to increased register pressure.
731
 
732
     RA can deal with this by rematerialing some of the constants.
733
     Therefore, it is important that the back-end generates sets of constants
734
     in a way that allows reload rematerialize them under high register
735
     pressure, i.e., a pseudo register with REG_EQUAL to constant
736
     is set only once.  Failing to do so will result in IRA/reload
737
     spilling such constants under high register pressure instead of
738
     rematerializing them.  */
739
 
740
  switch (GET_CODE (x))
741
    {
742
    case REG:
743
    case SUBREG:
744
    case CALL:
745
      return 0;
746
 
747
    case CONST_INT:
748
    case CONST_DOUBLE:
749
    case CONST_FIXED:
750
    case CONST_VECTOR:
751
      if (!doing_code_hoisting_p)
752
        /* Do not PRE constants.  */
753
        return 0;
754
 
755
      /* FALLTHRU */
756
 
757
    default:
758
      if (doing_code_hoisting_p)
759
        /* PRE doesn't implement max_distance restriction.  */
760
        {
761
          int cost;
762
          int max_distance;
763
 
764
          gcc_assert (!optimize_function_for_speed_p (cfun)
765
                      && optimize_function_for_size_p (cfun));
766
          cost = set_src_cost (x, 0);
767
 
768
          if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
769
            {
770
              max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
771
              if (max_distance == 0)
772
                return 0;
773
 
774
              gcc_assert (max_distance > 0);
775
            }
776
          else
777
            max_distance = 0;
778
 
779
          if (max_distance_ptr)
780
            *max_distance_ptr = max_distance;
781
        }
782
 
783
      return can_assign_to_reg_without_clobbers_p (x);
784
    }
785
}
786
 
787
/* Used internally by can_assign_to_reg_without_clobbers_p.  */
788
 
789
static GTY(()) rtx test_insn;
790
 
791
/* Return true if we can assign X to a pseudo register such that the
792
   resulting insn does not result in clobbering a hard register as a
793
   side-effect.
794
 
795
   Additionally, if the target requires it, check that the resulting insn
796
   can be copied.  If it cannot, this means that X is special and probably
797
   has hidden side-effects we don't want to mess with.
798
 
799
   This function is typically used by code motion passes, to verify
800
   that it is safe to insert an insn without worrying about clobbering
801
   maybe live hard regs.  */
802
 
803
bool
804
can_assign_to_reg_without_clobbers_p (rtx x)
805
{
806
  int num_clobbers = 0;
807
  int icode;
808
 
809
  /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
810
  if (general_operand (x, GET_MODE (x)))
811
    return 1;
812
  else if (GET_MODE (x) == VOIDmode)
813
    return 0;
814
 
815
  /* Otherwise, check if we can make a valid insn from it.  First initialize
816
     our test insn if we haven't already.  */
817
  if (test_insn == 0)
818
    {
819
      test_insn
820
        = make_insn_raw (gen_rtx_SET (VOIDmode,
821
                                      gen_rtx_REG (word_mode,
822
                                                   FIRST_PSEUDO_REGISTER * 2),
823
                                      const0_rtx));
824
      NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
825
    }
826
 
827
  /* Now make an insn like the one we would make when GCSE'ing and see if
828
     valid.  */
829
  PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
830
  SET_SRC (PATTERN (test_insn)) = x;
831
 
832
  icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
833
  if (icode < 0)
834
    return false;
835
 
836
  if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
837
    return false;
838
 
839
  if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
840
    return false;
841
 
842
  return true;
843
}
844
 
845
/* Return nonzero if the operands of expression X are unchanged from the
846
   start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
847
   or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
848
 
849
static int
850
oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
851
{
852
  int i, j;
853
  enum rtx_code code;
854
  const char *fmt;
855
 
856
  if (x == 0)
857
    return 1;
858
 
859
  code = GET_CODE (x);
860
  switch (code)
861
    {
862
    case REG:
863
      {
864
        struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
865
 
866
        if (info->last_bb != current_bb)
867
          return 1;
868
        if (avail_p)
869
          return info->last_set < DF_INSN_LUID (insn);
870
        else
871
          return info->first_set >= DF_INSN_LUID (insn);
872
      }
873
 
874
    case MEM:
875
      if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
876
                                  x, avail_p))
877
        return 0;
878
      else
879
        return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
880
 
881
    case PRE_DEC:
882
    case PRE_INC:
883
    case POST_DEC:
884
    case POST_INC:
885
    case PRE_MODIFY:
886
    case POST_MODIFY:
887
      return 0;
888
 
889
    case PC:
890
    case CC0: /*FIXME*/
891
    case CONST:
892
    case CONST_INT:
893
    case CONST_DOUBLE:
894
    case CONST_FIXED:
895
    case CONST_VECTOR:
896
    case SYMBOL_REF:
897
    case LABEL_REF:
898
    case ADDR_VEC:
899
    case ADDR_DIFF_VEC:
900
      return 1;
901
 
902
    default:
903
      break;
904
    }
905
 
906
  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
907
    {
908
      if (fmt[i] == 'e')
909
        {
910
          /* If we are about to do the last recursive call needed at this
911
             level, change it into iteration.  This function is called enough
912
             to be worth it.  */
913
          if (i == 0)
914
            return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
915
 
916
          else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
917
            return 0;
918
        }
919
      else if (fmt[i] == 'E')
920
        for (j = 0; j < XVECLEN (x, i); j++)
921
          if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
922
            return 0;
923
    }
924
 
925
  return 1;
926
}
927
 
928
/* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p.  */
929
 
930
struct mem_conflict_info
931
{
932
  /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
933
     see if a memory store conflicts with this memory load.  */
934
  const_rtx mem;
935
 
936
  /* True if mems_conflict_for_gcse_p finds a conflict between two memory
937
     references.  */
938
  bool conflict;
939
};
940
 
941
/* DEST is the output of an instruction.  If it is a memory reference and
942
   possibly conflicts with the load found in DATA, then communicate this
943
   information back through DATA.  */
944
 
945
static void
946
mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
947
                          void *data)
948
{
949
  struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
950
 
951
  while (GET_CODE (dest) == SUBREG
952
         || GET_CODE (dest) == ZERO_EXTRACT
953
         || GET_CODE (dest) == STRICT_LOW_PART)
954
    dest = XEXP (dest, 0);
955
 
956
  /* If DEST is not a MEM, then it will not conflict with the load.  Note
957
     that function calls are assumed to clobber memory, but are handled
958
     elsewhere.  */
959
  if (! MEM_P (dest))
960
    return;
961
 
962
  /* If we are setting a MEM in our list of specially recognized MEMs,
963
     don't mark as killed this time.  */
964
  if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
965
    {
966
      if (!find_rtx_in_ldst (dest))
967
        mci->conflict = true;
968
      return;
969
    }
970
 
971
  if (true_dependence (dest, GET_MODE (dest), mci->mem))
972
    mci->conflict = true;
973
}
974
 
975
/* Return nonzero if the expression in X (a memory reference) is killed
976
   in block BB before or after the insn with the LUID in UID_LIMIT.
977
   AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
978
   before UID_LIMIT.
979
 
980
   To check the entire block, set UID_LIMIT to max_uid + 1 and
981
   AVAIL_P to 0.  */
982
 
983
static int
984
load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
985
                        int avail_p)
986
{
987
  VEC (rtx,heap) *list = modify_mem_list[bb->index];
988
  rtx setter;
989
  unsigned ix;
990
 
991
  /* If this is a readonly then we aren't going to be changing it.  */
992
  if (MEM_READONLY_P (x))
993
    return 0;
994
 
995
  FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
996
    {
997
      struct mem_conflict_info mci;
998
 
999
      /* Ignore entries in the list that do not apply.  */
1000
      if ((avail_p
1001
           && DF_INSN_LUID (setter) < uid_limit)
1002
          || (! avail_p
1003
              && DF_INSN_LUID (setter) > uid_limit))
1004
        continue;
1005
 
1006
      /* If SETTER is a call everything is clobbered.  Note that calls
1007
         to pure functions are never put on the list, so we need not
1008
         worry about them.  */
1009
      if (CALL_P (setter))
1010
        return 1;
1011
 
1012
      /* SETTER must be an INSN of some kind that sets memory.  Call
1013
         note_stores to examine each hunk of memory that is modified.  */
1014
      mci.mem = x;
1015
      mci.conflict = false;
1016
      note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1017
      if (mci.conflict)
1018
        return 1;
1019
    }
1020
  return 0;
1021
}
1022
 
1023
/* Return nonzero if the operands of expression X are unchanged from
1024
   the start of INSN's basic block up to but not including INSN.  */
1025
 
1026
static int
1027
oprs_anticipatable_p (const_rtx x, const_rtx insn)
1028
{
1029
  return oprs_unchanged_p (x, insn, 0);
1030
}
1031
 
1032
/* Return nonzero if the operands of expression X are unchanged from
1033
   INSN to the end of INSN's basic block.  */
1034
 
1035
static int
1036
oprs_available_p (const_rtx x, const_rtx insn)
1037
{
1038
  return oprs_unchanged_p (x, insn, 1);
1039
}
1040
 
1041
/* Hash expression X.
1042
 
1043
   MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1044
   indicating if a volatile operand is found or if the expression contains
1045
   something we don't want to insert in the table.  HASH_TABLE_SIZE is
1046
   the current size of the hash table to be probed.  */
1047
 
1048
static unsigned int
1049
hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1050
           int hash_table_size)
1051
{
1052
  unsigned int hash;
1053
 
1054
  *do_not_record_p = 0;
1055
 
1056
  hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1057
  return hash % hash_table_size;
1058
}
1059
 
1060
/* Return nonzero if exp1 is equivalent to exp2.  */
1061
 
1062
static int
1063
expr_equiv_p (const_rtx x, const_rtx y)
1064
{
1065
  return exp_equiv_p (x, y, 0, true);
1066
}
1067
 
1068
/* Insert expression X in INSN in the hash TABLE.
1069
   If it is already present, record it as the last occurrence in INSN's
1070
   basic block.
1071
 
1072
   MODE is the mode of the value X is being stored into.
1073
   It is only used if X is a CONST_INT.
1074
 
1075
   ANTIC_P is nonzero if X is an anticipatable expression.
1076
   AVAIL_P is nonzero if X is an available expression.
1077
 
1078
   MAX_DISTANCE is the maximum distance in instructions this expression can
1079
   be moved.  */
1080
 
1081
static void
1082
insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1083
                      int avail_p, int max_distance, struct hash_table_d *table)
1084
{
1085
  int found, do_not_record_p;
1086
  unsigned int hash;
1087
  struct expr *cur_expr, *last_expr = NULL;
1088
  struct occr *antic_occr, *avail_occr;
1089
 
1090
  hash = hash_expr (x, mode, &do_not_record_p, table->size);
1091
 
1092
  /* Do not insert expression in table if it contains volatile operands,
1093
     or if hash_expr determines the expression is something we don't want
1094
     to or can't handle.  */
1095
  if (do_not_record_p)
1096
    return;
1097
 
1098
  cur_expr = table->table[hash];
1099
  found = 0;
1100
 
1101
  while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1102
    {
1103
      /* If the expression isn't found, save a pointer to the end of
1104
         the list.  */
1105
      last_expr = cur_expr;
1106
      cur_expr = cur_expr->next_same_hash;
1107
    }
1108
 
1109
  if (! found)
1110
    {
1111
      cur_expr = GOBNEW (struct expr);
1112
      bytes_used += sizeof (struct expr);
1113
      if (table->table[hash] == NULL)
1114
        /* This is the first pattern that hashed to this index.  */
1115
        table->table[hash] = cur_expr;
1116
      else
1117
        /* Add EXPR to end of this hash chain.  */
1118
        last_expr->next_same_hash = cur_expr;
1119
 
1120
      /* Set the fields of the expr element.  */
1121
      cur_expr->expr = x;
1122
      cur_expr->bitmap_index = table->n_elems++;
1123
      cur_expr->next_same_hash = NULL;
1124
      cur_expr->antic_occr = NULL;
1125
      cur_expr->avail_occr = NULL;
1126
      gcc_assert (max_distance >= 0);
1127
      cur_expr->max_distance = max_distance;
1128
    }
1129
  else
1130
    gcc_assert (cur_expr->max_distance == max_distance);
1131
 
1132
  /* Now record the occurrence(s).  */
1133
  if (antic_p)
1134
    {
1135
      antic_occr = cur_expr->antic_occr;
1136
 
1137
      if (antic_occr
1138
          && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1139
        antic_occr = NULL;
1140
 
1141
      if (antic_occr)
1142
        /* Found another instance of the expression in the same basic block.
1143
           Prefer the currently recorded one.  We want the first one in the
1144
           block and the block is scanned from start to end.  */
1145
        ; /* nothing to do */
1146
      else
1147
        {
1148
          /* First occurrence of this expression in this basic block.  */
1149
          antic_occr = GOBNEW (struct occr);
1150
          bytes_used += sizeof (struct occr);
1151
          antic_occr->insn = insn;
1152
          antic_occr->next = cur_expr->antic_occr;
1153
          antic_occr->deleted_p = 0;
1154
          cur_expr->antic_occr = antic_occr;
1155
        }
1156
    }
1157
 
1158
  if (avail_p)
1159
    {
1160
      avail_occr = cur_expr->avail_occr;
1161
 
1162
      if (avail_occr
1163
          && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1164
        {
1165
          /* Found another instance of the expression in the same basic block.
1166
             Prefer this occurrence to the currently recorded one.  We want
1167
             the last one in the block and the block is scanned from start
1168
             to end.  */
1169
          avail_occr->insn = insn;
1170
        }
1171
      else
1172
        {
1173
          /* First occurrence of this expression in this basic block.  */
1174
          avail_occr = GOBNEW (struct occr);
1175
          bytes_used += sizeof (struct occr);
1176
          avail_occr->insn = insn;
1177
          avail_occr->next = cur_expr->avail_occr;
1178
          avail_occr->deleted_p = 0;
1179
          cur_expr->avail_occr = avail_occr;
1180
        }
1181
    }
1182
}
1183
 
1184
/* Scan SET present in INSN and add an entry to the hash TABLE.  */
1185
 
1186
static void
1187
hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1188
{
1189
  rtx src = SET_SRC (set);
1190
  rtx dest = SET_DEST (set);
1191
  rtx note;
1192
 
1193
  if (GET_CODE (src) == CALL)
1194
    hash_scan_call (src, insn, table);
1195
 
1196
  else if (REG_P (dest))
1197
    {
1198
      unsigned int regno = REGNO (dest);
1199
      int max_distance = 0;
1200
 
1201
      /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1202
 
1203
         This allows us to do a single GCSE pass and still eliminate
1204
         redundant constants, addresses or other expressions that are
1205
         constructed with multiple instructions.
1206
 
1207
         However, keep the original SRC if INSN is a simple reg-reg move.
1208
         In this case, there will almost always be a REG_EQUAL note on the
1209
         insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1210
         for INSN, we miss copy propagation opportunities and we perform the
1211
         same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1212
         do more than one PRE GCSE pass.
1213
 
1214
         Note that this does not impede profitable constant propagations.  We
1215
         "look through" reg-reg sets in lookup_avail_set.  */
1216
      note = find_reg_equal_equiv_note (insn);
1217
      if (note != 0
1218
          && REG_NOTE_KIND (note) == REG_EQUAL
1219
          && !REG_P (src)
1220
          && want_to_gcse_p (XEXP (note, 0), NULL))
1221
        src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1222
 
1223
      /* Only record sets of pseudo-regs in the hash table.  */
1224
      if (regno >= FIRST_PSEUDO_REGISTER
1225
          /* Don't GCSE something if we can't do a reg/reg copy.  */
1226
          && can_copy_p (GET_MODE (dest))
1227
          /* GCSE commonly inserts instruction after the insn.  We can't
1228
             do that easily for EH edges so disable GCSE on these for now.  */
1229
          /* ??? We can now easily create new EH landing pads at the
1230
             gimple level, for splitting edges; there's no reason we
1231
             can't do the same thing at the rtl level.  */
1232
          && !can_throw_internal (insn)
1233
          /* Is SET_SRC something we want to gcse?  */
1234
          && want_to_gcse_p (src, &max_distance)
1235
          /* Don't CSE a nop.  */
1236
          && ! set_noop_p (set)
1237
          /* Don't GCSE if it has attached REG_EQUIV note.
1238
             At this point this only function parameters should have
1239
             REG_EQUIV notes and if the argument slot is used somewhere
1240
             explicitly, it means address of parameter has been taken,
1241
             so we should not extend the lifetime of the pseudo.  */
1242
          && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1243
        {
1244
          /* An expression is not anticipatable if its operands are
1245
             modified before this insn or if this is not the only SET in
1246
             this insn.  The latter condition does not have to mean that
1247
             SRC itself is not anticipatable, but we just will not be
1248
             able to handle code motion of insns with multiple sets.  */
1249
          int antic_p = oprs_anticipatable_p (src, insn)
1250
                        && !multiple_sets (insn);
1251
          /* An expression is not available if its operands are
1252
             subsequently modified, including this insn.  It's also not
1253
             available if this is a branch, because we can't insert
1254
             a set after the branch.  */
1255
          int avail_p = (oprs_available_p (src, insn)
1256
                         && ! JUMP_P (insn));
1257
 
1258
          insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1259
                                max_distance, table);
1260
        }
1261
    }
1262
  /* In case of store we want to consider the memory value as available in
1263
     the REG stored in that memory. This makes it possible to remove
1264
     redundant loads from due to stores to the same location.  */
1265
  else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1266
      {
1267
        unsigned int regno = REGNO (src);
1268
        int max_distance = 0;
1269
 
1270
        /* Only record sets of pseudo-regs in the hash table.  */
1271
        if (regno >= FIRST_PSEUDO_REGISTER
1272
           /* Don't GCSE something if we can't do a reg/reg copy.  */
1273
           && can_copy_p (GET_MODE (src))
1274
           /* GCSE commonly inserts instruction after the insn.  We can't
1275
              do that easily for EH edges so disable GCSE on these for now.  */
1276
           && !can_throw_internal (insn)
1277
           /* Is SET_DEST something we want to gcse?  */
1278
           && want_to_gcse_p (dest, &max_distance)
1279
           /* Don't CSE a nop.  */
1280
           && ! set_noop_p (set)
1281
           /* Don't GCSE if it has attached REG_EQUIV note.
1282
              At this point this only function parameters should have
1283
              REG_EQUIV notes and if the argument slot is used somewhere
1284
              explicitly, it means address of parameter has been taken,
1285
              so we should not extend the lifetime of the pseudo.  */
1286
           && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1287
               || ! MEM_P (XEXP (note, 0))))
1288
             {
1289
               /* Stores are never anticipatable.  */
1290
               int antic_p = 0;
1291
               /* An expression is not available if its operands are
1292
                  subsequently modified, including this insn.  It's also not
1293
                  available if this is a branch, because we can't insert
1294
                  a set after the branch.  */
1295
               int avail_p = oprs_available_p (dest, insn)
1296
                             && ! JUMP_P (insn);
1297
 
1298
               /* Record the memory expression (DEST) in the hash table.  */
1299
               insert_expr_in_table (dest, GET_MODE (dest), insn,
1300
                                     antic_p, avail_p, max_distance, table);
1301
             }
1302
      }
1303
}
1304
 
1305
static void
1306
hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1307
                   struct hash_table_d *table ATTRIBUTE_UNUSED)
1308
{
1309
  /* Currently nothing to do.  */
1310
}
1311
 
1312
static void
1313
hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1314
                struct hash_table_d *table ATTRIBUTE_UNUSED)
1315
{
1316
  /* Currently nothing to do.  */
1317
}
1318
 
1319
/* Process INSN and add hash table entries as appropriate.  */
1320
 
1321
static void
1322
hash_scan_insn (rtx insn, struct hash_table_d *table)
1323
{
1324
  rtx pat = PATTERN (insn);
1325
  int i;
1326
 
1327
  /* Pick out the sets of INSN and for other forms of instructions record
1328
     what's been modified.  */
1329
 
1330
  if (GET_CODE (pat) == SET)
1331
    hash_scan_set (pat, insn, table);
1332
 
1333
  else if (GET_CODE (pat) == CLOBBER)
1334
    hash_scan_clobber (pat, insn, table);
1335
 
1336
  else if (GET_CODE (pat) == CALL)
1337
    hash_scan_call (pat, insn, table);
1338
 
1339
  else if (GET_CODE (pat) == PARALLEL)
1340
    for (i = 0; i < XVECLEN (pat, 0); i++)
1341
      {
1342
        rtx x = XVECEXP (pat, 0, i);
1343
 
1344
        if (GET_CODE (x) == SET)
1345
          hash_scan_set (x, insn, table);
1346
        else if (GET_CODE (x) == CLOBBER)
1347
          hash_scan_clobber (x, insn, table);
1348
        else if (GET_CODE (x) == CALL)
1349
          hash_scan_call (x, insn, table);
1350
      }
1351
}
1352
 
1353
/* Dump the hash table TABLE to file FILE under the name NAME.  */
1354
 
1355
static void
1356
dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1357
{
1358
  int i;
1359
  /* Flattened out table, so it's printed in proper order.  */
1360
  struct expr **flat_table;
1361
  unsigned int *hash_val;
1362
  struct expr *expr;
1363
 
1364
  flat_table = XCNEWVEC (struct expr *, table->n_elems);
1365
  hash_val = XNEWVEC (unsigned int, table->n_elems);
1366
 
1367
  for (i = 0; i < (int) table->size; i++)
1368
    for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1369
      {
1370
        flat_table[expr->bitmap_index] = expr;
1371
        hash_val[expr->bitmap_index] = i;
1372
      }
1373
 
1374
  fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1375
           name, table->size, table->n_elems);
1376
 
1377
  for (i = 0; i < (int) table->n_elems; i++)
1378
    if (flat_table[i] != 0)
1379
      {
1380
        expr = flat_table[i];
1381
        fprintf (file, "Index %d (hash value %d; max distance %d)\n  ",
1382
                 expr->bitmap_index, hash_val[i], expr->max_distance);
1383
        print_rtl (file, expr->expr);
1384
        fprintf (file, "\n");
1385
      }
1386
 
1387
  fprintf (file, "\n");
1388
 
1389
  free (flat_table);
1390
  free (hash_val);
1391
}
1392
 
1393
/* Record register first/last/block set information for REGNO in INSN.
1394
 
1395
   first_set records the first place in the block where the register
1396
   is set and is used to compute "anticipatability".
1397
 
1398
   last_set records the last place in the block where the register
1399
   is set and is used to compute "availability".
1400
 
1401
   last_bb records the block for which first_set and last_set are
1402
   valid, as a quick test to invalidate them.  */
1403
 
1404
static void
1405
record_last_reg_set_info (rtx insn, int regno)
1406
{
1407
  struct reg_avail_info *info = &reg_avail_info[regno];
1408
  int luid = DF_INSN_LUID (insn);
1409
 
1410
  info->last_set = luid;
1411
  if (info->last_bb != current_bb)
1412
    {
1413
      info->last_bb = current_bb;
1414
      info->first_set = luid;
1415
    }
1416
}
1417
 
1418
/* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1419
   Note we store a pair of elements in the list, so they have to be
1420
   taken off pairwise.  */
1421
 
1422
static void
1423
canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1424
                   void * v_insn)
1425
{
1426
  rtx dest_addr, insn;
1427
  int bb;
1428
  modify_pair *pair;
1429
 
1430
  while (GET_CODE (dest) == SUBREG
1431
      || GET_CODE (dest) == ZERO_EXTRACT
1432
      || GET_CODE (dest) == STRICT_LOW_PART)
1433
    dest = XEXP (dest, 0);
1434
 
1435
  /* If DEST is not a MEM, then it will not conflict with a load.  Note
1436
     that function calls are assumed to clobber memory, but are handled
1437
     elsewhere.  */
1438
 
1439
  if (! MEM_P (dest))
1440
    return;
1441
 
1442
  dest_addr = get_addr (XEXP (dest, 0));
1443
  dest_addr = canon_rtx (dest_addr);
1444
  insn = (rtx) v_insn;
1445
  bb = BLOCK_FOR_INSN (insn)->index;
1446
 
1447
  pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
1448
  pair->dest = dest;
1449
  pair->dest_addr = dest_addr;
1450
}
1451
 
1452
/* Record memory modification information for INSN.  We do not actually care
1453
   about the memory location(s) that are set, or even how they are set (consider
1454
   a CALL_INSN).  We merely need to record which insns modify memory.  */
1455
 
1456
static void
1457
record_last_mem_set_info (rtx insn)
1458
{
1459
  int bb = BLOCK_FOR_INSN (insn)->index;
1460
 
1461
  /* load_killed_in_block_p will handle the case of calls clobbering
1462
     everything.  */
1463
  VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1464
  bitmap_set_bit (modify_mem_list_set, bb);
1465
 
1466
  if (CALL_P (insn))
1467
    bitmap_set_bit (blocks_with_calls, bb);
1468
  else
1469
    note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1470
}
1471
 
1472
/* Called from compute_hash_table via note_stores to handle one
1473
   SET or CLOBBER in an insn.  DATA is really the instruction in which
1474
   the SET is taking place.  */
1475
 
1476
static void
1477
record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1478
{
1479
  rtx last_set_insn = (rtx) data;
1480
 
1481
  if (GET_CODE (dest) == SUBREG)
1482
    dest = SUBREG_REG (dest);
1483
 
1484
  if (REG_P (dest))
1485
    record_last_reg_set_info (last_set_insn, REGNO (dest));
1486
  else if (MEM_P (dest)
1487
           /* Ignore pushes, they clobber nothing.  */
1488
           && ! push_operand (dest, GET_MODE (dest)))
1489
    record_last_mem_set_info (last_set_insn);
1490
}
1491
 
1492
/* Top level function to create an expression hash table.
1493
 
1494
   Expression entries are placed in the hash table if
1495
   - they are of the form (set (pseudo-reg) src),
1496
   - src is something we want to perform GCSE on,
1497
   - none of the operands are subsequently modified in the block
1498
 
1499
   Currently src must be a pseudo-reg or a const_int.
1500
 
1501
   TABLE is the table computed.  */
1502
 
1503
static void
1504
compute_hash_table_work (struct hash_table_d *table)
1505
{
1506
  int i;
1507
 
1508
  /* re-Cache any INSN_LIST nodes we have allocated.  */
1509
  clear_modify_mem_tables ();
1510
  /* Some working arrays used to track first and last set in each block.  */
1511
  reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1512
 
1513
  for (i = 0; i < max_reg_num (); ++i)
1514
    reg_avail_info[i].last_bb = NULL;
1515
 
1516
  FOR_EACH_BB (current_bb)
1517
    {
1518
      rtx insn;
1519
      unsigned int regno;
1520
 
1521
      /* First pass over the instructions records information used to
1522
         determine when registers and memory are first and last set.  */
1523
      FOR_BB_INSNS (current_bb, insn)
1524
        {
1525
          if (!NONDEBUG_INSN_P (insn))
1526
            continue;
1527
 
1528
          if (CALL_P (insn))
1529
            {
1530
              for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1531
                if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1532
                  record_last_reg_set_info (insn, regno);
1533
 
1534
              if (! RTL_CONST_OR_PURE_CALL_P (insn))
1535
                record_last_mem_set_info (insn);
1536
            }
1537
 
1538
          note_stores (PATTERN (insn), record_last_set_info, insn);
1539
        }
1540
 
1541
      /* The next pass builds the hash table.  */
1542
      FOR_BB_INSNS (current_bb, insn)
1543
        if (NONDEBUG_INSN_P (insn))
1544
          hash_scan_insn (insn, table);
1545
    }
1546
 
1547
  free (reg_avail_info);
1548
  reg_avail_info = NULL;
1549
}
1550
 
1551
/* Allocate space for the set/expr hash TABLE.
1552
   It is used to determine the number of buckets to use.  */
1553
 
1554
static void
1555
alloc_hash_table (struct hash_table_d *table)
1556
{
1557
  int n;
1558
 
1559
  n = get_max_insn_count ();
1560
 
1561
  table->size = n / 4;
1562
  if (table->size < 11)
1563
    table->size = 11;
1564
 
1565
  /* Attempt to maintain efficient use of hash table.
1566
     Making it an odd number is simplest for now.
1567
     ??? Later take some measurements.  */
1568
  table->size |= 1;
1569
  n = table->size * sizeof (struct expr *);
1570
  table->table = GNEWVAR (struct expr *, n);
1571
}
1572
 
1573
/* Free things allocated by alloc_hash_table.  */
1574
 
1575
static void
1576
free_hash_table (struct hash_table_d *table)
1577
{
1578
  free (table->table);
1579
}
1580
 
1581
/* Compute the expression hash table TABLE.  */
1582
 
1583
static void
1584
compute_hash_table (struct hash_table_d *table)
1585
{
1586
  /* Initialize count of number of entries in hash table.  */
1587
  table->n_elems = 0;
1588
  memset (table->table, 0, table->size * sizeof (struct expr *));
1589
 
1590
  compute_hash_table_work (table);
1591
}
1592
 
1593
/* Expression tracking support.  */
1594
 
1595
/* Clear canon_modify_mem_list and modify_mem_list tables.  */
1596
static void
1597
clear_modify_mem_tables (void)
1598
{
1599
  unsigned i;
1600
  bitmap_iterator bi;
1601
 
1602
  EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1603
    {
1604
      VEC_free (rtx, heap, modify_mem_list[i]);
1605
      VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1606
    }
1607
  bitmap_clear (modify_mem_list_set);
1608
  bitmap_clear (blocks_with_calls);
1609
}
1610
 
1611
/* Release memory used by modify_mem_list_set.  */
1612
 
1613
static void
1614
free_modify_mem_tables (void)
1615
{
1616
  clear_modify_mem_tables ();
1617
  free (modify_mem_list);
1618
  free (canon_modify_mem_list);
1619
  modify_mem_list = 0;
1620
  canon_modify_mem_list = 0;
1621
}
1622
 
1623
/* For each block, compute whether X is transparent.  X is either an
1624
   expression or an assignment [though we don't care which, for this context
1625
   an assignment is treated as an expression].  For each block where an
1626
   element of X is modified, reset the INDX bit in BMAP.  */
1627
 
1628
static void
1629
compute_transp (const_rtx x, int indx, sbitmap *bmap)
1630
{
1631
  int i, j;
1632
  enum rtx_code code;
1633
  const char *fmt;
1634
 
1635
  /* repeat is used to turn tail-recursion into iteration since GCC
1636
     can't do it when there's no return value.  */
1637
 repeat:
1638
 
1639
  if (x == 0)
1640
    return;
1641
 
1642
  code = GET_CODE (x);
1643
  switch (code)
1644
    {
1645
    case REG:
1646
        {
1647
          df_ref def;
1648
          for (def = DF_REG_DEF_CHAIN (REGNO (x));
1649
               def;
1650
               def = DF_REF_NEXT_REG (def))
1651
            RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1652
        }
1653
 
1654
      return;
1655
 
1656
    case MEM:
1657
      if (! MEM_READONLY_P (x))
1658
        {
1659
          bitmap_iterator bi;
1660
          unsigned bb_index;
1661
 
1662
          /* First handle all the blocks with calls.  We don't need to
1663
             do any list walking for them.  */
1664
          EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1665
            {
1666
              RESET_BIT (bmap[bb_index], indx);
1667
            }
1668
 
1669
            /* Now iterate over the blocks which have memory modifications
1670
               but which do not have any calls.  */
1671
            EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1672
                                            blocks_with_calls,
1673
                                            0, bb_index, bi)
1674
              {
1675
                VEC (modify_pair,heap) *list
1676
                  = canon_modify_mem_list[bb_index];
1677
                modify_pair *pair;
1678
                unsigned ix;
1679
 
1680
                FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1681
                  {
1682
                    rtx dest = pair->dest;
1683
                    rtx dest_addr = pair->dest_addr;
1684
 
1685
                    if (canon_true_dependence (dest, GET_MODE (dest),
1686
                                               dest_addr, x, NULL_RTX))
1687
                      RESET_BIT (bmap[bb_index], indx);
1688
                  }
1689
              }
1690
        }
1691
 
1692
      x = XEXP (x, 0);
1693
      goto repeat;
1694
 
1695
    case PC:
1696
    case CC0: /*FIXME*/
1697
    case CONST:
1698
    case CONST_INT:
1699
    case CONST_DOUBLE:
1700
    case CONST_FIXED:
1701
    case CONST_VECTOR:
1702
    case SYMBOL_REF:
1703
    case LABEL_REF:
1704
    case ADDR_VEC:
1705
    case ADDR_DIFF_VEC:
1706
      return;
1707
 
1708
    default:
1709
      break;
1710
    }
1711
 
1712
  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1713
    {
1714
      if (fmt[i] == 'e')
1715
        {
1716
          /* If we are about to do the last recursive call
1717
             needed at this level, change it into iteration.
1718
             This function is called enough to be worth it.  */
1719
          if (i == 0)
1720
            {
1721
              x = XEXP (x, i);
1722
              goto repeat;
1723
            }
1724
 
1725
          compute_transp (XEXP (x, i), indx, bmap);
1726
        }
1727
      else if (fmt[i] == 'E')
1728
        for (j = 0; j < XVECLEN (x, i); j++)
1729
          compute_transp (XVECEXP (x, i, j), indx, bmap);
1730
    }
1731
}
1732
 
1733
/* Compute PRE+LCM working variables.  */
1734
 
1735
/* Local properties of expressions.  */
1736
 
1737
/* Nonzero for expressions that are transparent in the block.  */
1738
static sbitmap *transp;
1739
 
1740
/* Nonzero for expressions that are computed (available) in the block.  */
1741
static sbitmap *comp;
1742
 
1743
/* Nonzero for expressions that are locally anticipatable in the block.  */
1744
static sbitmap *antloc;
1745
 
1746
/* Nonzero for expressions where this block is an optimal computation
1747
   point.  */
1748
static sbitmap *pre_optimal;
1749
 
1750
/* Nonzero for expressions which are redundant in a particular block.  */
1751
static sbitmap *pre_redundant;
1752
 
1753
/* Nonzero for expressions which should be inserted on a specific edge.  */
1754
static sbitmap *pre_insert_map;
1755
 
1756
/* Nonzero for expressions which should be deleted in a specific block.  */
1757
static sbitmap *pre_delete_map;
1758
 
1759
/* Allocate vars used for PRE analysis.  */
1760
 
1761
static void
1762
alloc_pre_mem (int n_blocks, int n_exprs)
1763
{
1764
  transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1765
  comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1766
  antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1767
 
1768
  pre_optimal = NULL;
1769
  pre_redundant = NULL;
1770
  pre_insert_map = NULL;
1771
  pre_delete_map = NULL;
1772
  ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1773
 
1774
  /* pre_insert and pre_delete are allocated later.  */
1775
}
1776
 
1777
/* Free vars used for PRE analysis.  */
1778
 
1779
static void
1780
free_pre_mem (void)
1781
{
1782
  sbitmap_vector_free (transp);
1783
  sbitmap_vector_free (comp);
1784
 
1785
  /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
1786
 
1787
  if (pre_optimal)
1788
    sbitmap_vector_free (pre_optimal);
1789
  if (pre_redundant)
1790
    sbitmap_vector_free (pre_redundant);
1791
  if (pre_insert_map)
1792
    sbitmap_vector_free (pre_insert_map);
1793
  if (pre_delete_map)
1794
    sbitmap_vector_free (pre_delete_map);
1795
 
1796
  transp = comp = NULL;
1797
  pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1798
}
1799
 
1800
/* Remove certain expressions from anticipatable and transparent
1801
   sets of basic blocks that have incoming abnormal edge.
1802
   For PRE remove potentially trapping expressions to avoid placing
1803
   them on abnormal edges.  For hoisting remove memory references that
1804
   can be clobbered by calls.  */
1805
 
1806
static void
1807
prune_expressions (bool pre_p)
1808
{
1809
  sbitmap prune_exprs;
1810
  struct expr *expr;
1811
  unsigned int ui;
1812
  basic_block bb;
1813
 
1814
  prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1815
  sbitmap_zero (prune_exprs);
1816
  for (ui = 0; ui < expr_hash_table.size; ui++)
1817
    {
1818
      for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1819
        {
1820
          /* Note potentially trapping expressions.  */
1821
          if (may_trap_p (expr->expr))
1822
            {
1823
              SET_BIT (prune_exprs, expr->bitmap_index);
1824
              continue;
1825
            }
1826
 
1827
          if (!pre_p && MEM_P (expr->expr))
1828
            /* Note memory references that can be clobbered by a call.
1829
               We do not split abnormal edges in hoisting, so would
1830
               a memory reference get hoisted along an abnormal edge,
1831
               it would be placed /before/ the call.  Therefore, only
1832
               constant memory references can be hoisted along abnormal
1833
               edges.  */
1834
            {
1835
              if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1836
                  && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1837
                continue;
1838
 
1839
              if (MEM_READONLY_P (expr->expr)
1840
                  && !MEM_VOLATILE_P (expr->expr)
1841
                  && MEM_NOTRAP_P (expr->expr))
1842
                /* Constant memory reference, e.g., a PIC address.  */
1843
                continue;
1844
 
1845
              /* ??? Optimally, we would use interprocedural alias
1846
                 analysis to determine if this mem is actually killed
1847
                 by this call.  */
1848
 
1849
              SET_BIT (prune_exprs, expr->bitmap_index);
1850
            }
1851
        }
1852
    }
1853
 
1854
  FOR_EACH_BB (bb)
1855
    {
1856
      edge e;
1857
      edge_iterator ei;
1858
 
1859
      /* If the current block is the destination of an abnormal edge, we
1860
         kill all trapping (for PRE) and memory (for hoist) expressions
1861
         because we won't be able to properly place the instruction on
1862
         the edge.  So make them neither anticipatable nor transparent.
1863
         This is fairly conservative.
1864
 
1865
         ??? For hoisting it may be necessary to check for set-and-jump
1866
         instructions here, not just for abnormal edges.  The general problem
1867
         is that when an expression cannot not be placed right at the end of
1868
         a basic block we should account for any side-effects of a subsequent
1869
         jump instructions that could clobber the expression.  It would
1870
         be best to implement this check along the lines of
1871
         hoist_expr_reaches_here_p where the target block is already known
1872
         and, hence, there's no need to conservatively prune expressions on
1873
         "intermediate" set-and-jump instructions.  */
1874
      FOR_EACH_EDGE (e, ei, bb->preds)
1875
        if ((e->flags & EDGE_ABNORMAL)
1876
            && (pre_p || CALL_P (BB_END (e->src))))
1877
          {
1878
            sbitmap_difference (antloc[bb->index],
1879
                                antloc[bb->index], prune_exprs);
1880
            sbitmap_difference (transp[bb->index],
1881
                                transp[bb->index], prune_exprs);
1882
            break;
1883
          }
1884
    }
1885
 
1886
  sbitmap_free (prune_exprs);
1887
}
1888
 
1889
/* It may be necessary to insert a large number of insns on edges to
1890
   make the existing occurrences of expressions fully redundant.  This
1891
   routine examines the set of insertions and deletions and if the ratio
1892
   of insertions to deletions is too high for a particular expression, then
1893
   the expression is removed from the insertion/deletion sets.
1894
 
1895
   N_ELEMS is the number of elements in the hash table.  */
1896
 
1897
static void
1898
prune_insertions_deletions (int n_elems)
1899
{
1900
  sbitmap_iterator sbi;
1901
  sbitmap prune_exprs;
1902
 
1903
  /* We always use I to iterate over blocks/edges and J to iterate over
1904
     expressions.  */
1905
  unsigned int i, j;
1906
 
1907
  /* Counts for the number of times an expression needs to be inserted and
1908
     number of times an expression can be removed as a result.  */
1909
  int *insertions = GCNEWVEC (int, n_elems);
1910
  int *deletions = GCNEWVEC (int, n_elems);
1911
 
1912
  /* Set of expressions which require too many insertions relative to
1913
     the number of deletions achieved.  We will prune these out of the
1914
     insertion/deletion sets.  */
1915
  prune_exprs = sbitmap_alloc (n_elems);
1916
  sbitmap_zero (prune_exprs);
1917
 
1918
  /* Iterate over the edges counting the number of times each expression
1919
     needs to be inserted.  */
1920
  for (i = 0; i < (unsigned) n_edges; i++)
1921
    {
1922
      EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1923
        insertions[j]++;
1924
    }
1925
 
1926
  /* Similarly for deletions, but those occur in blocks rather than on
1927
     edges.  */
1928
  for (i = 0; i < (unsigned) last_basic_block; i++)
1929
    {
1930
      EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1931
        deletions[j]++;
1932
    }
1933
 
1934
  /* Now that we have accurate counts, iterate over the elements in the
1935
     hash table and see if any need too many insertions relative to the
1936
     number of evaluations that can be removed.  If so, mark them in
1937
     PRUNE_EXPRS.  */
1938
  for (j = 0; j < (unsigned) n_elems; j++)
1939
    if (deletions[j]
1940
        && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1941
      SET_BIT (prune_exprs, j);
1942
 
1943
  /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS.  */
1944
  EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1945
    {
1946
      for (i = 0; i < (unsigned) n_edges; i++)
1947
        RESET_BIT (pre_insert_map[i], j);
1948
 
1949
      for (i = 0; i < (unsigned) last_basic_block; i++)
1950
        RESET_BIT (pre_delete_map[i], j);
1951
    }
1952
 
1953
  sbitmap_free (prune_exprs);
1954
  free (insertions);
1955
  free (deletions);
1956
}
1957
 
1958
/* Top level routine to do the dataflow analysis needed by PRE.  */
1959
 
1960
static struct edge_list *
1961
compute_pre_data (void)
1962
{
1963
  struct edge_list *edge_list;
1964
  basic_block bb;
1965
 
1966
  compute_local_properties (transp, comp, antloc, &expr_hash_table);
1967
  prune_expressions (true);
1968
  sbitmap_vector_zero (ae_kill, last_basic_block);
1969
 
1970
  /* Compute ae_kill for each basic block using:
1971
 
1972
     ~(TRANSP | COMP)
1973
  */
1974
 
1975
  FOR_EACH_BB (bb)
1976
    {
1977
      sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1978
      sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1979
    }
1980
 
1981
  edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1982
                            ae_kill, &pre_insert_map, &pre_delete_map);
1983
  sbitmap_vector_free (antloc);
1984
  antloc = NULL;
1985
  sbitmap_vector_free (ae_kill);
1986
  ae_kill = NULL;
1987
 
1988
  prune_insertions_deletions (expr_hash_table.n_elems);
1989
 
1990
  return edge_list;
1991
}
1992
 
1993
/* PRE utilities */
1994
 
1995
/* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
1996
   block BB.
1997
 
1998
   VISITED is a pointer to a working buffer for tracking which BB's have
1999
   been visited.  It is NULL for the top-level call.
2000
 
2001
   We treat reaching expressions that go through blocks containing the same
2002
   reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2003
   2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2004
   2 as not reaching.  The intent is to improve the probability of finding
2005
   only one reaching expression and to reduce register lifetimes by picking
2006
   the closest such expression.  */
2007
 
2008
static int
2009
pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2010
                              basic_block bb, char *visited)
2011
{
2012
  edge pred;
2013
  edge_iterator ei;
2014
 
2015
  FOR_EACH_EDGE (pred, ei, bb->preds)
2016
    {
2017
      basic_block pred_bb = pred->src;
2018
 
2019
      if (pred->src == ENTRY_BLOCK_PTR
2020
          /* Has predecessor has already been visited?  */
2021
          || visited[pred_bb->index])
2022
        ;/* Nothing to do.  */
2023
 
2024
      /* Does this predecessor generate this expression?  */
2025
      else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2026
        {
2027
          /* Is this the occurrence we're looking for?
2028
             Note that there's only one generating occurrence per block
2029
             so we just need to check the block number.  */
2030
          if (occr_bb == pred_bb)
2031
            return 1;
2032
 
2033
          visited[pred_bb->index] = 1;
2034
        }
2035
      /* Ignore this predecessor if it kills the expression.  */
2036
      else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2037
        visited[pred_bb->index] = 1;
2038
 
2039
      /* Neither gen nor kill.  */
2040
      else
2041
        {
2042
          visited[pred_bb->index] = 1;
2043
          if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2044
            return 1;
2045
        }
2046
    }
2047
 
2048
  /* All paths have been checked.  */
2049
  return 0;
2050
}
2051
 
2052
/* The wrapper for pre_expr_reaches_here_work that ensures that any
2053
   memory allocated for that function is returned.  */
2054
 
2055
static int
2056
pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2057
{
2058
  int rval;
2059
  char *visited = XCNEWVEC (char, last_basic_block);
2060
 
2061
  rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2062
 
2063
  free (visited);
2064
  return rval;
2065
}
2066
 
2067
/* Generate RTL to copy an EXPR to its `reaching_reg' and return it.  */
2068
 
2069
static rtx
2070
process_insert_insn (struct expr *expr)
2071
{
2072
  rtx reg = expr->reaching_reg;
2073
  /* Copy the expression to make sure we don't have any sharing issues.  */
2074
  rtx exp = copy_rtx (expr->expr);
2075
  rtx pat;
2076
 
2077
  start_sequence ();
2078
 
2079
  /* If the expression is something that's an operand, like a constant,
2080
     just copy it to a register.  */
2081
  if (general_operand (exp, GET_MODE (reg)))
2082
    emit_move_insn (reg, exp);
2083
 
2084
  /* Otherwise, make a new insn to compute this expression and make sure the
2085
     insn will be recognized (this also adds any needed CLOBBERs).  */
2086
  else
2087
    {
2088
      rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2089
 
2090
      if (insn_invalid_p (insn))
2091
        gcc_unreachable ();
2092
    }
2093
 
2094
  pat = get_insns ();
2095
  end_sequence ();
2096
 
2097
  return pat;
2098
}
2099
 
2100
/* Add EXPR to the end of basic block BB.
2101
 
2102
   This is used by both the PRE and code hoisting.  */
2103
 
2104
static void
2105
insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2106
{
2107
  rtx insn = BB_END (bb);
2108
  rtx new_insn;
2109
  rtx reg = expr->reaching_reg;
2110
  int regno = REGNO (reg);
2111
  rtx pat, pat_end;
2112
 
2113
  pat = process_insert_insn (expr);
2114
  gcc_assert (pat && INSN_P (pat));
2115
 
2116
  pat_end = pat;
2117
  while (NEXT_INSN (pat_end) != NULL_RTX)
2118
    pat_end = NEXT_INSN (pat_end);
2119
 
2120
  /* If the last insn is a jump, insert EXPR in front [taking care to
2121
     handle cc0, etc. properly].  Similarly we need to care trapping
2122
     instructions in presence of non-call exceptions.  */
2123
 
2124
  if (JUMP_P (insn)
2125
      || (NONJUMP_INSN_P (insn)
2126
          && (!single_succ_p (bb)
2127
              || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2128
    {
2129
#ifdef HAVE_cc0
2130
      rtx note;
2131
#endif
2132
 
2133
      /* If this is a jump table, then we can't insert stuff here.  Since
2134
         we know the previous real insn must be the tablejump, we insert
2135
         the new instruction just before the tablejump.  */
2136
      if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2137
          || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2138
        insn = prev_active_insn (insn);
2139
 
2140
#ifdef HAVE_cc0
2141
      /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2142
         if cc0 isn't set.  */
2143
      note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2144
      if (note)
2145
        insn = XEXP (note, 0);
2146
      else
2147
        {
2148
          rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2149
          if (maybe_cc0_setter
2150
              && INSN_P (maybe_cc0_setter)
2151
              && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2152
            insn = maybe_cc0_setter;
2153
        }
2154
#endif
2155
      /* FIXME: What if something in cc0/jump uses value set in new insn?  */
2156
      new_insn = emit_insn_before_noloc (pat, insn, bb);
2157
    }
2158
 
2159
  /* Likewise if the last insn is a call, as will happen in the presence
2160
     of exception handling.  */
2161
  else if (CALL_P (insn)
2162
           && (!single_succ_p (bb)
2163
               || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2164
    {
2165
      /* Keeping in mind targets with small register classes and parameters
2166
         in registers, we search backward and place the instructions before
2167
         the first parameter is loaded.  Do this for everyone for consistency
2168
         and a presumption that we'll get better code elsewhere as well.  */
2169
 
2170
      /* Since different machines initialize their parameter registers
2171
         in different orders, assume nothing.  Collect the set of all
2172
         parameter registers.  */
2173
      insn = find_first_parameter_load (insn, BB_HEAD (bb));
2174
 
2175
      /* If we found all the parameter loads, then we want to insert
2176
         before the first parameter load.
2177
 
2178
         If we did not find all the parameter loads, then we might have
2179
         stopped on the head of the block, which could be a CODE_LABEL.
2180
         If we inserted before the CODE_LABEL, then we would be putting
2181
         the insn in the wrong basic block.  In that case, put the insn
2182
         after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
2183
      while (LABEL_P (insn)
2184
             || NOTE_INSN_BASIC_BLOCK_P (insn))
2185
        insn = NEXT_INSN (insn);
2186
 
2187
      new_insn = emit_insn_before_noloc (pat, insn, bb);
2188
    }
2189
  else
2190
    new_insn = emit_insn_after_noloc (pat, insn, bb);
2191
 
2192
  while (1)
2193
    {
2194
      if (INSN_P (pat))
2195
        add_label_notes (PATTERN (pat), new_insn);
2196
      if (pat == pat_end)
2197
        break;
2198
      pat = NEXT_INSN (pat);
2199
    }
2200
 
2201
  gcse_create_count++;
2202
 
2203
  if (dump_file)
2204
    {
2205
      fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2206
               bb->index, INSN_UID (new_insn));
2207
      fprintf (dump_file, "copying expression %d to reg %d\n",
2208
               expr->bitmap_index, regno);
2209
    }
2210
}
2211
 
2212
/* Insert partially redundant expressions on edges in the CFG to make
2213
   the expressions fully redundant.  */
2214
 
2215
static int
2216
pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2217
{
2218
  int e, i, j, num_edges, set_size, did_insert = 0;
2219
  sbitmap *inserted;
2220
 
2221
  /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2222
     if it reaches any of the deleted expressions.  */
2223
 
2224
  set_size = pre_insert_map[0]->size;
2225
  num_edges = NUM_EDGES (edge_list);
2226
  inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2227
  sbitmap_vector_zero (inserted, num_edges);
2228
 
2229
  for (e = 0; e < num_edges; e++)
2230
    {
2231
      int indx;
2232
      basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2233
 
2234
      for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2235
        {
2236
          SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2237
 
2238
          for (j = indx;
2239
               insert && j < (int) expr_hash_table.n_elems;
2240
               j++, insert >>= 1)
2241
            if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2242
              {
2243
                struct expr *expr = index_map[j];
2244
                struct occr *occr;
2245
 
2246
                /* Now look at each deleted occurrence of this expression.  */
2247
                for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2248
                  {
2249
                    if (! occr->deleted_p)
2250
                      continue;
2251
 
2252
                    /* Insert this expression on this edge if it would
2253
                       reach the deleted occurrence in BB.  */
2254
                    if (!TEST_BIT (inserted[e], j))
2255
                      {
2256
                        rtx insn;
2257
                        edge eg = INDEX_EDGE (edge_list, e);
2258
 
2259
                        /* We can't insert anything on an abnormal and
2260
                           critical edge, so we insert the insn at the end of
2261
                           the previous block. There are several alternatives
2262
                           detailed in Morgans book P277 (sec 10.5) for
2263
                           handling this situation.  This one is easiest for
2264
                           now.  */
2265
 
2266
                        if (eg->flags & EDGE_ABNORMAL)
2267
                          insert_insn_end_basic_block (index_map[j], bb);
2268
                        else
2269
                          {
2270
                            insn = process_insert_insn (index_map[j]);
2271
                            insert_insn_on_edge (insn, eg);
2272
                          }
2273
 
2274
                        if (dump_file)
2275
                          {
2276
                            fprintf (dump_file, "PRE: edge (%d,%d), ",
2277
                                     bb->index,
2278
                                     INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2279
                            fprintf (dump_file, "copy expression %d\n",
2280
                                     expr->bitmap_index);
2281
                          }
2282
 
2283
                        update_ld_motion_stores (expr);
2284
                        SET_BIT (inserted[e], j);
2285
                        did_insert = 1;
2286
                        gcse_create_count++;
2287
                      }
2288
                  }
2289
              }
2290
        }
2291
    }
2292
 
2293
  sbitmap_vector_free (inserted);
2294
  return did_insert;
2295
}
2296
 
2297
/* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2298
   Given "old_reg <- expr" (INSN), instead of adding after it
2299
     reaching_reg <- old_reg
2300
   it's better to do the following:
2301
     reaching_reg <- expr
2302
     old_reg      <- reaching_reg
2303
   because this way copy propagation can discover additional PRE
2304
   opportunities.  But if this fails, we try the old way.
2305
   When "expr" is a store, i.e.
2306
   given "MEM <- old_reg", instead of adding after it
2307
     reaching_reg <- old_reg
2308
   it's better to add it before as follows:
2309
     reaching_reg <- old_reg
2310
     MEM          <- reaching_reg.  */
2311
 
2312
static void
2313
pre_insert_copy_insn (struct expr *expr, rtx insn)
2314
{
2315
  rtx reg = expr->reaching_reg;
2316
  int regno = REGNO (reg);
2317
  int indx = expr->bitmap_index;
2318
  rtx pat = PATTERN (insn);
2319
  rtx set, first_set, new_insn;
2320
  rtx old_reg;
2321
  int i;
2322
 
2323
  /* This block matches the logic in hash_scan_insn.  */
2324
  switch (GET_CODE (pat))
2325
    {
2326
    case SET:
2327
      set = pat;
2328
      break;
2329
 
2330
    case PARALLEL:
2331
      /* Search through the parallel looking for the set whose
2332
         source was the expression that we're interested in.  */
2333
      first_set = NULL_RTX;
2334
      set = NULL_RTX;
2335
      for (i = 0; i < XVECLEN (pat, 0); i++)
2336
        {
2337
          rtx x = XVECEXP (pat, 0, i);
2338
          if (GET_CODE (x) == SET)
2339
            {
2340
              /* If the source was a REG_EQUAL or REG_EQUIV note, we
2341
                 may not find an equivalent expression, but in this
2342
                 case the PARALLEL will have a single set.  */
2343
              if (first_set == NULL_RTX)
2344
                first_set = x;
2345
              if (expr_equiv_p (SET_SRC (x), expr->expr))
2346
                {
2347
                  set = x;
2348
                  break;
2349
                }
2350
            }
2351
        }
2352
 
2353
      gcc_assert (first_set);
2354
      if (set == NULL_RTX)
2355
        set = first_set;
2356
      break;
2357
 
2358
    default:
2359
      gcc_unreachable ();
2360
    }
2361
 
2362
  if (REG_P (SET_DEST (set)))
2363
    {
2364
      old_reg = SET_DEST (set);
2365
      /* Check if we can modify the set destination in the original insn.  */
2366
      if (validate_change (insn, &SET_DEST (set), reg, 0))
2367
        {
2368
          new_insn = gen_move_insn (old_reg, reg);
2369
          new_insn = emit_insn_after (new_insn, insn);
2370
        }
2371
      else
2372
        {
2373
          new_insn = gen_move_insn (reg, old_reg);
2374
          new_insn = emit_insn_after (new_insn, insn);
2375
        }
2376
    }
2377
  else /* This is possible only in case of a store to memory.  */
2378
    {
2379
      old_reg = SET_SRC (set);
2380
      new_insn = gen_move_insn (reg, old_reg);
2381
 
2382
      /* Check if we can modify the set source in the original insn.  */
2383
      if (validate_change (insn, &SET_SRC (set), reg, 0))
2384
        new_insn = emit_insn_before (new_insn, insn);
2385
      else
2386
        new_insn = emit_insn_after (new_insn, insn);
2387
    }
2388
 
2389
  gcse_create_count++;
2390
 
2391
  if (dump_file)
2392
    fprintf (dump_file,
2393
             "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2394
              BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2395
              INSN_UID (insn), regno);
2396
}
2397
 
2398
/* Copy available expressions that reach the redundant expression
2399
   to `reaching_reg'.  */
2400
 
2401
static void
2402
pre_insert_copies (void)
2403
{
2404
  unsigned int i, added_copy;
2405
  struct expr *expr;
2406
  struct occr *occr;
2407
  struct occr *avail;
2408
 
2409
  /* For each available expression in the table, copy the result to
2410
     `reaching_reg' if the expression reaches a deleted one.
2411
 
2412
     ??? The current algorithm is rather brute force.
2413
     Need to do some profiling.  */
2414
 
2415
  for (i = 0; i < expr_hash_table.size; i++)
2416
    for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2417
      {
2418
        /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
2419
           we don't want to insert a copy here because the expression may not
2420
           really be redundant.  So only insert an insn if the expression was
2421
           deleted.  This test also avoids further processing if the
2422
           expression wasn't deleted anywhere.  */
2423
        if (expr->reaching_reg == NULL)
2424
          continue;
2425
 
2426
        /* Set when we add a copy for that expression.  */
2427
        added_copy = 0;
2428
 
2429
        for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2430
          {
2431
            if (! occr->deleted_p)
2432
              continue;
2433
 
2434
            for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2435
              {
2436
                rtx insn = avail->insn;
2437
 
2438
                /* No need to handle this one if handled already.  */
2439
                if (avail->copied_p)
2440
                  continue;
2441
 
2442
                /* Don't handle this one if it's a redundant one.  */
2443
                if (INSN_DELETED_P (insn))
2444
                  continue;
2445
 
2446
                /* Or if the expression doesn't reach the deleted one.  */
2447
                if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2448
                                               expr,
2449
                                               BLOCK_FOR_INSN (occr->insn)))
2450
                  continue;
2451
 
2452
                added_copy = 1;
2453
 
2454
                /* Copy the result of avail to reaching_reg.  */
2455
                pre_insert_copy_insn (expr, insn);
2456
                avail->copied_p = 1;
2457
              }
2458
          }
2459
 
2460
          if (added_copy)
2461
            update_ld_motion_stores (expr);
2462
      }
2463
}
2464
 
2465
/* Emit move from SRC to DEST noting the equivalence with expression computed
2466
   in INSN.  */
2467
 
2468
static rtx
2469
gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2470
{
2471
  rtx new_rtx;
2472
  rtx set = single_set (insn), set2;
2473
  rtx note;
2474
  rtx eqv;
2475
 
2476
  /* This should never fail since we're creating a reg->reg copy
2477
     we've verified to be valid.  */
2478
 
2479
  new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2480
 
2481
  /* Note the equivalence for local CSE pass.  */
2482
  set2 = single_set (new_rtx);
2483
  if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2484
    return new_rtx;
2485
  if ((note = find_reg_equal_equiv_note (insn)))
2486
    eqv = XEXP (note, 0);
2487
  else
2488
    eqv = SET_SRC (set);
2489
 
2490
  set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2491
 
2492
  return new_rtx;
2493
}
2494
 
2495
/* Delete redundant computations.
2496
   Deletion is done by changing the insn to copy the `reaching_reg' of
2497
   the expression into the result of the SET.  It is left to later passes
2498
   (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2499
 
2500
   Return nonzero if a change is made.  */
2501
 
2502
static int
2503
pre_delete (void)
2504
{
2505
  unsigned int i;
2506
  int changed;
2507
  struct expr *expr;
2508
  struct occr *occr;
2509
 
2510
  changed = 0;
2511
  for (i = 0; i < expr_hash_table.size; i++)
2512
    for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2513
      {
2514
        int indx = expr->bitmap_index;
2515
 
2516
        /* We only need to search antic_occr since we require ANTLOC != 0.  */
2517
        for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2518
          {
2519
            rtx insn = occr->insn;
2520
            rtx set;
2521
            basic_block bb = BLOCK_FOR_INSN (insn);
2522
 
2523
            /* We only delete insns that have a single_set.  */
2524
            if (TEST_BIT (pre_delete_map[bb->index], indx)
2525
                && (set = single_set (insn)) != 0
2526
                && dbg_cnt (pre_insn))
2527
              {
2528
                /* Create a pseudo-reg to store the result of reaching
2529
                   expressions into.  Get the mode for the new pseudo from
2530
                   the mode of the original destination pseudo.  */
2531
                if (expr->reaching_reg == NULL)
2532
                  expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2533
 
2534
                gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2535
                delete_insn (insn);
2536
                occr->deleted_p = 1;
2537
                changed = 1;
2538
                gcse_subst_count++;
2539
 
2540
                if (dump_file)
2541
                  {
2542
                    fprintf (dump_file,
2543
                             "PRE: redundant insn %d (expression %d) in ",
2544
                               INSN_UID (insn), indx);
2545
                    fprintf (dump_file, "bb %d, reaching reg is %d\n",
2546
                             bb->index, REGNO (expr->reaching_reg));
2547
                  }
2548
              }
2549
          }
2550
      }
2551
 
2552
  return changed;
2553
}
2554
 
2555
/* Perform GCSE optimizations using PRE.
2556
   This is called by one_pre_gcse_pass after all the dataflow analysis
2557
   has been done.
2558
 
2559
   This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2560
   lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2561
   Compiler Design and Implementation.
2562
 
2563
   ??? A new pseudo reg is created to hold the reaching expression.  The nice
2564
   thing about the classical approach is that it would try to use an existing
2565
   reg.  If the register can't be adequately optimized [i.e. we introduce
2566
   reload problems], one could add a pass here to propagate the new register
2567
   through the block.
2568
 
2569
   ??? We don't handle single sets in PARALLELs because we're [currently] not
2570
   able to copy the rest of the parallel when we insert copies to create full
2571
   redundancies from partial redundancies.  However, there's no reason why we
2572
   can't handle PARALLELs in the cases where there are no partial
2573
   redundancies.  */
2574
 
2575
static int
2576
pre_gcse (struct edge_list *edge_list)
2577
{
2578
  unsigned int i;
2579
  int did_insert, changed;
2580
  struct expr **index_map;
2581
  struct expr *expr;
2582
 
2583
  /* Compute a mapping from expression number (`bitmap_index') to
2584
     hash table entry.  */
2585
 
2586
  index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2587
  for (i = 0; i < expr_hash_table.size; i++)
2588
    for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2589
      index_map[expr->bitmap_index] = expr;
2590
 
2591
  /* Delete the redundant insns first so that
2592
     - we know what register to use for the new insns and for the other
2593
       ones with reaching expressions
2594
     - we know which insns are redundant when we go to create copies  */
2595
 
2596
  changed = pre_delete ();
2597
  did_insert = pre_edge_insert (edge_list, index_map);
2598
 
2599
  /* In other places with reaching expressions, copy the expression to the
2600
     specially allocated pseudo-reg that reaches the redundant expr.  */
2601
  pre_insert_copies ();
2602
  if (did_insert)
2603
    {
2604
      commit_edge_insertions ();
2605
      changed = 1;
2606
    }
2607
 
2608
  free (index_map);
2609
  return changed;
2610
}
2611
 
2612
/* Top level routine to perform one PRE GCSE pass.
2613
 
2614
   Return nonzero if a change was made.  */
2615
 
2616
static int
2617
one_pre_gcse_pass (void)
2618
{
2619
  int changed = 0;
2620
 
2621
  gcse_subst_count = 0;
2622
  gcse_create_count = 0;
2623
 
2624
  /* Return if there's nothing to do, or it is too expensive.  */
2625
  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2626
      || is_too_expensive (_("PRE disabled")))
2627
    return 0;
2628
 
2629
  /* We need alias.  */
2630
  init_alias_analysis ();
2631
 
2632
  bytes_used = 0;
2633
  gcc_obstack_init (&gcse_obstack);
2634
  alloc_gcse_mem ();
2635
 
2636
  alloc_hash_table (&expr_hash_table);
2637
  add_noreturn_fake_exit_edges ();
2638
  if (flag_gcse_lm)
2639
    compute_ld_motion_mems ();
2640
 
2641
  compute_hash_table (&expr_hash_table);
2642
  if (flag_gcse_lm)
2643
    trim_ld_motion_mems ();
2644
  if (dump_file)
2645
    dump_hash_table (dump_file, "Expression", &expr_hash_table);
2646
 
2647
  if (expr_hash_table.n_elems > 0)
2648
    {
2649
      struct edge_list *edge_list;
2650
      alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2651
      edge_list = compute_pre_data ();
2652
      changed |= pre_gcse (edge_list);
2653
      free_edge_list (edge_list);
2654
      free_pre_mem ();
2655
    }
2656
 
2657
  if (flag_gcse_lm)
2658
    free_ld_motion_mems ();
2659
  remove_fake_exit_edges ();
2660
  free_hash_table (&expr_hash_table);
2661
 
2662
  free_gcse_mem ();
2663
  obstack_free (&gcse_obstack, NULL);
2664
 
2665
  /* We are finished with alias.  */
2666
  end_alias_analysis ();
2667
 
2668
  if (dump_file)
2669
    {
2670
      fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2671
               current_function_name (), n_basic_blocks, bytes_used);
2672
      fprintf (dump_file, "%d substs, %d insns created\n",
2673
               gcse_subst_count, gcse_create_count);
2674
    }
2675
 
2676
  return changed;
2677
}
2678
 
2679
/* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2680
   to INSN.  If such notes are added to an insn which references a
2681
   CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
2682
   that note, because the following loop optimization pass requires
2683
   them.  */
2684
 
2685
/* ??? If there was a jump optimization pass after gcse and before loop,
2686
   then we would not need to do this here, because jump would add the
2687
   necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
2688
 
2689
static void
2690
add_label_notes (rtx x, rtx insn)
2691
{
2692
  enum rtx_code code = GET_CODE (x);
2693
  int i, j;
2694
  const char *fmt;
2695
 
2696
  if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2697
    {
2698
      /* This code used to ignore labels that referred to dispatch tables to
2699
         avoid flow generating (slightly) worse code.
2700
 
2701
         We no longer ignore such label references (see LABEL_REF handling in
2702
         mark_jump_label for additional information).  */
2703
 
2704
      /* There's no reason for current users to emit jump-insns with
2705
         such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2706
         notes.  */
2707
      gcc_assert (!JUMP_P (insn));
2708
      add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2709
 
2710
      if (LABEL_P (XEXP (x, 0)))
2711
        LABEL_NUSES (XEXP (x, 0))++;
2712
 
2713
      return;
2714
    }
2715
 
2716
  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2717
    {
2718
      if (fmt[i] == 'e')
2719
        add_label_notes (XEXP (x, i), insn);
2720
      else if (fmt[i] == 'E')
2721
        for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2722
          add_label_notes (XVECEXP (x, i, j), insn);
2723
    }
2724
}
2725
 
2726
/* Code Hoisting variables and subroutines.  */
2727
 
2728
/* Very busy expressions.  */
2729
static sbitmap *hoist_vbein;
2730
static sbitmap *hoist_vbeout;
2731
 
2732
/* ??? We could compute post dominators and run this algorithm in
2733
   reverse to perform tail merging, doing so would probably be
2734
   more effective than the tail merging code in jump.c.
2735
 
2736
   It's unclear if tail merging could be run in parallel with
2737
   code hoisting.  It would be nice.  */
2738
 
2739
/* Allocate vars used for code hoisting analysis.  */
2740
 
2741
static void
2742
alloc_code_hoist_mem (int n_blocks, int n_exprs)
2743
{
2744
  antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2745
  transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2746
  comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2747
 
2748
  hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2749
  hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2750
}
2751
 
2752
/* Free vars used for code hoisting analysis.  */
2753
 
2754
static void
2755
free_code_hoist_mem (void)
2756
{
2757
  sbitmap_vector_free (antloc);
2758
  sbitmap_vector_free (transp);
2759
  sbitmap_vector_free (comp);
2760
 
2761
  sbitmap_vector_free (hoist_vbein);
2762
  sbitmap_vector_free (hoist_vbeout);
2763
 
2764
  free_dominance_info (CDI_DOMINATORS);
2765
}
2766
 
2767
/* Compute the very busy expressions at entry/exit from each block.
2768
 
2769
   An expression is very busy if all paths from a given point
2770
   compute the expression.  */
2771
 
2772
static void
2773
compute_code_hoist_vbeinout (void)
2774
{
2775
  int changed, passes;
2776
  basic_block bb;
2777
 
2778
  sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2779
  sbitmap_vector_zero (hoist_vbein, last_basic_block);
2780
 
2781
  passes = 0;
2782
  changed = 1;
2783
 
2784
  while (changed)
2785
    {
2786
      changed = 0;
2787
 
2788
      /* We scan the blocks in the reverse order to speed up
2789
         the convergence.  */
2790
      FOR_EACH_BB_REVERSE (bb)
2791
        {
2792
          if (bb->next_bb != EXIT_BLOCK_PTR)
2793
            {
2794
              sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2795
                                             hoist_vbein, bb->index);
2796
 
2797
              /* Include expressions in VBEout that are calculated
2798
                 in BB and available at its end.  */
2799
              sbitmap_a_or_b (hoist_vbeout[bb->index],
2800
                              hoist_vbeout[bb->index], comp[bb->index]);
2801
            }
2802
 
2803
          changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2804
                                              antloc[bb->index],
2805
                                              hoist_vbeout[bb->index],
2806
                                              transp[bb->index]);
2807
        }
2808
 
2809
      passes++;
2810
    }
2811
 
2812
  if (dump_file)
2813
    {
2814
      fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2815
 
2816
      FOR_EACH_BB (bb)
2817
        {
2818
          fprintf (dump_file, "vbein (%d): ", bb->index);
2819
          dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2820
          fprintf (dump_file, "vbeout(%d): ", bb->index);
2821
          dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2822
        }
2823
    }
2824
}
2825
 
2826
/* Top level routine to do the dataflow analysis needed by code hoisting.  */
2827
 
2828
static void
2829
compute_code_hoist_data (void)
2830
{
2831
  compute_local_properties (transp, comp, antloc, &expr_hash_table);
2832
  prune_expressions (false);
2833
  compute_code_hoist_vbeinout ();
2834
  calculate_dominance_info (CDI_DOMINATORS);
2835
  if (dump_file)
2836
    fprintf (dump_file, "\n");
2837
}
2838
 
2839
/* Determine if the expression identified by EXPR_INDEX would
2840
   reach BB unimpared if it was placed at the end of EXPR_BB.
2841
   Stop the search if the expression would need to be moved more
2842
   than DISTANCE instructions.
2843
 
2844
   It's unclear exactly what Muchnick meant by "unimpared".  It seems
2845
   to me that the expression must either be computed or transparent in
2846
   *every* block in the path(s) from EXPR_BB to BB.  Any other definition
2847
   would allow the expression to be hoisted out of loops, even if
2848
   the expression wasn't a loop invariant.
2849
 
2850
   Contrast this to reachability for PRE where an expression is
2851
   considered reachable if *any* path reaches instead of *all*
2852
   paths.  */
2853
 
2854
static int
2855
hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2856
                           char *visited, int distance, int *bb_size)
2857
{
2858
  edge pred;
2859
  edge_iterator ei;
2860
  int visited_allocated_locally = 0;
2861
 
2862
  /* Terminate the search if distance, for which EXPR is allowed to move,
2863
     is exhausted.  */
2864
  if (distance > 0)
2865
    {
2866
      distance -= bb_size[bb->index];
2867
 
2868
      if (distance <= 0)
2869
        return 0;
2870
    }
2871
  else
2872
    gcc_assert (distance == 0);
2873
 
2874
  if (visited == NULL)
2875
    {
2876
      visited_allocated_locally = 1;
2877
      visited = XCNEWVEC (char, last_basic_block);
2878
    }
2879
 
2880
  FOR_EACH_EDGE (pred, ei, bb->preds)
2881
    {
2882
      basic_block pred_bb = pred->src;
2883
 
2884
      if (pred->src == ENTRY_BLOCK_PTR)
2885
        break;
2886
      else if (pred_bb == expr_bb)
2887
        continue;
2888
      else if (visited[pred_bb->index])
2889
        continue;
2890
 
2891
      else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2892
        break;
2893
 
2894
      /* Not killed.  */
2895
      else
2896
        {
2897
          visited[pred_bb->index] = 1;
2898
          if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2899
                                           visited, distance, bb_size))
2900
            break;
2901
        }
2902
    }
2903
  if (visited_allocated_locally)
2904
    free (visited);
2905
 
2906
  return (pred == NULL);
2907
}
2908
 
2909
/* Find occurence in BB.  */
2910
 
2911
static struct occr *
2912
find_occr_in_bb (struct occr *occr, basic_block bb)
2913
{
2914
  /* Find the right occurrence of this expression.  */
2915
  while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2916
    occr = occr->next;
2917
 
2918
  return occr;
2919
}
2920
 
2921
/* Actually perform code hoisting.  */
2922
 
2923
static int
2924
hoist_code (void)
2925
{
2926
  basic_block bb, dominated;
2927
  VEC (basic_block, heap) *dom_tree_walk;
2928
  unsigned int dom_tree_walk_index;
2929
  VEC (basic_block, heap) *domby;
2930
  unsigned int i,j;
2931
  struct expr **index_map;
2932
  struct expr *expr;
2933
  int *to_bb_head;
2934
  int *bb_size;
2935
  int changed = 0;
2936
 
2937
  /* Compute a mapping from expression number (`bitmap_index') to
2938
     hash table entry.  */
2939
 
2940
  index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2941
  for (i = 0; i < expr_hash_table.size; i++)
2942
    for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2943
      index_map[expr->bitmap_index] = expr;
2944
 
2945
  /* Calculate sizes of basic blocks and note how far
2946
     each instruction is from the start of its block.  We then use this
2947
     data to restrict distance an expression can travel.  */
2948
 
2949
  to_bb_head = XCNEWVEC (int, get_max_uid ());
2950
  bb_size = XCNEWVEC (int, last_basic_block);
2951
 
2952
  FOR_EACH_BB (bb)
2953
    {
2954
      rtx insn;
2955
      int to_head;
2956
 
2957
      to_head = 0;
2958
      FOR_BB_INSNS (bb, insn)
2959
        {
2960
          /* Don't count debug instructions to avoid them affecting
2961
             decision choices.  */
2962
          if (NONDEBUG_INSN_P (insn))
2963
            to_bb_head[INSN_UID (insn)] = to_head++;
2964
        }
2965
 
2966
      bb_size[bb->index] = to_head;
2967
    }
2968
 
2969
  gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2970
              && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2971
                  == ENTRY_BLOCK_PTR->next_bb));
2972
 
2973
  dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2974
                                            ENTRY_BLOCK_PTR->next_bb);
2975
 
2976
  /* Walk over each basic block looking for potentially hoistable
2977
     expressions, nothing gets hoisted from the entry block.  */
2978
  FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2979
    {
2980
      domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2981
 
2982
      if (VEC_length (basic_block, domby) == 0)
2983
        continue;
2984
 
2985
      /* Examine each expression that is very busy at the exit of this
2986
         block.  These are the potentially hoistable expressions.  */
2987
      for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
2988
        {
2989
          if (TEST_BIT (hoist_vbeout[bb->index], i))
2990
            {
2991
              /* Current expression.  */
2992
              struct expr *expr = index_map[i];
2993
              /* Number of occurences of EXPR that can be hoisted to BB.  */
2994
              int hoistable = 0;
2995
              /* Basic blocks that have occurences reachable from BB.  */
2996
              bitmap_head _from_bbs, *from_bbs = &_from_bbs;
2997
              /* Occurences reachable from BB.  */
2998
              VEC (occr_t, heap) *occrs_to_hoist = NULL;
2999
              /* We want to insert the expression into BB only once, so
3000
                 note when we've inserted it.  */
3001
              int insn_inserted_p;
3002
              occr_t occr;
3003
 
3004
              bitmap_initialize (from_bbs, 0);
3005
 
3006
              /* If an expression is computed in BB and is available at end of
3007
                 BB, hoist all occurences dominated by BB to BB.  */
3008
              if (TEST_BIT (comp[bb->index], i))
3009
                {
3010
                  occr = find_occr_in_bb (expr->antic_occr, bb);
3011
 
3012
                  if (occr)
3013
                    {
3014
                      /* An occurence might've been already deleted
3015
                         while processing a dominator of BB.  */
3016
                      if (!occr->deleted_p)
3017
                        {
3018
                          gcc_assert (NONDEBUG_INSN_P (occr->insn));
3019
                          hoistable++;
3020
                        }
3021
                    }
3022
                  else
3023
                    hoistable++;
3024
                }
3025
 
3026
              /* We've found a potentially hoistable expression, now
3027
                 we look at every block BB dominates to see if it
3028
                 computes the expression.  */
3029
              FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3030
                {
3031
                  int max_distance;
3032
 
3033
                  /* Ignore self dominance.  */
3034
                  if (bb == dominated)
3035
                    continue;
3036
                  /* We've found a dominated block, now see if it computes
3037
                     the busy expression and whether or not moving that
3038
                     expression to the "beginning" of that block is safe.  */
3039
                  if (!TEST_BIT (antloc[dominated->index], i))
3040
                    continue;
3041
 
3042
                  occr = find_occr_in_bb (expr->antic_occr, dominated);
3043
                  gcc_assert (occr);
3044
 
3045
                  /* An occurence might've been already deleted
3046
                     while processing a dominator of BB.  */
3047
                  if (occr->deleted_p)
3048
                    continue;
3049
                  gcc_assert (NONDEBUG_INSN_P (occr->insn));
3050
 
3051
                  max_distance = expr->max_distance;
3052
                  if (max_distance > 0)
3053
                    /* Adjust MAX_DISTANCE to account for the fact that
3054
                       OCCR won't have to travel all of DOMINATED, but
3055
                       only part of it.  */
3056
                    max_distance += (bb_size[dominated->index]
3057
                                     - to_bb_head[INSN_UID (occr->insn)]);
3058
 
3059
                  /* Note if the expression would reach the dominated block
3060
                     unimpared if it was placed at the end of BB.
3061
 
3062
                     Keep track of how many times this expression is hoistable
3063
                     from a dominated block into BB.  */
3064
                  if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3065
                                                 max_distance, bb_size))
3066
                    {
3067
                      hoistable++;
3068
                      VEC_safe_push (occr_t, heap,
3069
                                     occrs_to_hoist, occr);
3070
                      bitmap_set_bit (from_bbs, dominated->index);
3071
                    }
3072
                }
3073
 
3074
              /* If we found more than one hoistable occurrence of this
3075
                 expression, then note it in the vector of expressions to
3076
                 hoist.  It makes no sense to hoist things which are computed
3077
                 in only one BB, and doing so tends to pessimize register
3078
                 allocation.  One could increase this value to try harder
3079
                 to avoid any possible code expansion due to register
3080
                 allocation issues; however experiments have shown that
3081
                 the vast majority of hoistable expressions are only movable
3082
                 from two successors, so raising this threshold is likely
3083
                 to nullify any benefit we get from code hoisting.  */
3084
              if (hoistable > 1 && dbg_cnt (hoist_insn))
3085
                {
3086
                  /* If (hoistable != VEC_length), then there is
3087
                     an occurence of EXPR in BB itself.  Don't waste
3088
                     time looking for LCA in this case.  */
3089
                  if ((unsigned) hoistable
3090
                      == VEC_length (occr_t, occrs_to_hoist))
3091
                    {
3092
                      basic_block lca;
3093
 
3094
                      lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3095
                                                              from_bbs);
3096
                      if (lca != bb)
3097
                        /* Punt, it's better to hoist these occurences to
3098
                           LCA.  */
3099
                        VEC_free (occr_t, heap, occrs_to_hoist);
3100
                    }
3101
                }
3102
              else
3103
                /* Punt, no point hoisting a single occurence.  */
3104
                VEC_free (occr_t, heap, occrs_to_hoist);
3105
 
3106
              insn_inserted_p = 0;
3107
 
3108
              /* Walk through occurences of I'th expressions we want
3109
                 to hoist to BB and make the transformations.  */
3110
              FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3111
                {
3112
                  rtx insn;
3113
                  rtx set;
3114
 
3115
                  gcc_assert (!occr->deleted_p);
3116
 
3117
                  insn = occr->insn;
3118
                  set = single_set (insn);
3119
                  gcc_assert (set);
3120
 
3121
                  /* Create a pseudo-reg to store the result of reaching
3122
                     expressions into.  Get the mode for the new pseudo
3123
                     from the mode of the original destination pseudo.
3124
 
3125
                     It is important to use new pseudos whenever we
3126
                     emit a set.  This will allow reload to use
3127
                     rematerialization for such registers.  */
3128
                  if (!insn_inserted_p)
3129
                    expr->reaching_reg
3130
                      = gen_reg_rtx_and_attrs (SET_DEST (set));
3131
 
3132
                  gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3133
                                        insn);
3134
                  delete_insn (insn);
3135
                  occr->deleted_p = 1;
3136
                  changed = 1;
3137
                  gcse_subst_count++;
3138
 
3139
                  if (!insn_inserted_p)
3140
                    {
3141
                      insert_insn_end_basic_block (expr, bb);
3142
                      insn_inserted_p = 1;
3143
                    }
3144
                }
3145
 
3146
              VEC_free (occr_t, heap, occrs_to_hoist);
3147
              bitmap_clear (from_bbs);
3148
            }
3149
        }
3150
      VEC_free (basic_block, heap, domby);
3151
    }
3152
 
3153
  VEC_free (basic_block, heap, dom_tree_walk);
3154
  free (bb_size);
3155
  free (to_bb_head);
3156
  free (index_map);
3157
 
3158
  return changed;
3159
}
3160
 
3161
/* Top level routine to perform one code hoisting (aka unification) pass
3162
 
3163
   Return nonzero if a change was made.  */
3164
 
3165
static int
3166
one_code_hoisting_pass (void)
3167
{
3168
  int changed = 0;
3169
 
3170
  gcse_subst_count = 0;
3171
  gcse_create_count = 0;
3172
 
3173
  /* Return if there's nothing to do, or it is too expensive.  */
3174
  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3175
      || is_too_expensive (_("GCSE disabled")))
3176
    return 0;
3177
 
3178
  doing_code_hoisting_p = true;
3179
 
3180
  /* We need alias.  */
3181
  init_alias_analysis ();
3182
 
3183
  bytes_used = 0;
3184
  gcc_obstack_init (&gcse_obstack);
3185
  alloc_gcse_mem ();
3186
 
3187
  alloc_hash_table (&expr_hash_table);
3188
  compute_hash_table (&expr_hash_table);
3189
  if (dump_file)
3190
    dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3191
 
3192
  if (expr_hash_table.n_elems > 0)
3193
    {
3194
      alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3195
      compute_code_hoist_data ();
3196
      changed = hoist_code ();
3197
      free_code_hoist_mem ();
3198
    }
3199
 
3200
  free_hash_table (&expr_hash_table);
3201
  free_gcse_mem ();
3202
  obstack_free (&gcse_obstack, NULL);
3203
 
3204
  /* We are finished with alias.  */
3205
  end_alias_analysis ();
3206
 
3207
  if (dump_file)
3208
    {
3209
      fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3210
               current_function_name (), n_basic_blocks, bytes_used);
3211
      fprintf (dump_file, "%d substs, %d insns created\n",
3212
               gcse_subst_count, gcse_create_count);
3213
    }
3214
 
3215
  doing_code_hoisting_p = false;
3216
 
3217
  return changed;
3218
}
3219
 
3220
/*  Here we provide the things required to do store motion towards the exit.
3221
    In order for this to be effective, gcse also needed to be taught how to
3222
    move a load when it is killed only by a store to itself.
3223
 
3224
            int i;
3225
            float a[10];
3226
 
3227
            void foo(float scale)
3228
            {
3229
              for (i=0; i<10; i++)
3230
                a[i] *= scale;
3231
            }
3232
 
3233
    'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3234
    the load out since its live around the loop, and stored at the bottom
3235
    of the loop.
3236
 
3237
      The 'Load Motion' referred to and implemented in this file is
3238
    an enhancement to gcse which when using edge based LCM, recognizes
3239
    this situation and allows gcse to move the load out of the loop.
3240
 
3241
      Once gcse has hoisted the load, store motion can then push this
3242
    load towards the exit, and we end up with no loads or stores of 'i'
3243
    in the loop.  */
3244
 
3245
static hashval_t
3246
pre_ldst_expr_hash (const void *p)
3247
{
3248
  int do_not_record_p = 0;
3249
  const struct ls_expr *const x = (const struct ls_expr *) p;
3250
  return
3251
    hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3252
}
3253
 
3254
static int
3255
pre_ldst_expr_eq (const void *p1, const void *p2)
3256
{
3257
  const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3258
    *const ptr2 = (const struct ls_expr *) p2;
3259
  return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3260
}
3261
 
3262
/* This will search the ldst list for a matching expression. If it
3263
   doesn't find one, we create one and initialize it.  */
3264
 
3265
static struct ls_expr *
3266
ldst_entry (rtx x)
3267
{
3268
  int do_not_record_p = 0;
3269
  struct ls_expr * ptr;
3270
  unsigned int hash;
3271
  void **slot;
3272
  struct ls_expr e;
3273
 
3274
  hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3275
                   NULL,  /*have_reg_qty=*/false);
3276
 
3277
  e.pattern = x;
3278
  slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3279
  if (*slot)
3280
    return (struct ls_expr *)*slot;
3281
 
3282
  ptr = XNEW (struct ls_expr);
3283
 
3284
  ptr->next         = pre_ldst_mems;
3285
  ptr->expr         = NULL;
3286
  ptr->pattern      = x;
3287
  ptr->pattern_regs = NULL_RTX;
3288
  ptr->loads        = NULL_RTX;
3289
  ptr->stores       = NULL_RTX;
3290
  ptr->reaching_reg = NULL_RTX;
3291
  ptr->invalid      = 0;
3292
  ptr->index        = 0;
3293
  ptr->hash_index   = hash;
3294
  pre_ldst_mems     = ptr;
3295
  *slot = ptr;
3296
 
3297
  return ptr;
3298
}
3299
 
3300
/* Free up an individual ldst entry.  */
3301
 
3302
static void
3303
free_ldst_entry (struct ls_expr * ptr)
3304
{
3305
  free_INSN_LIST_list (& ptr->loads);
3306
  free_INSN_LIST_list (& ptr->stores);
3307
 
3308
  free (ptr);
3309
}
3310
 
3311
/* Free up all memory associated with the ldst list.  */
3312
 
3313
static void
3314
free_ld_motion_mems (void)
3315
{
3316
  if (pre_ldst_table)
3317
    htab_delete (pre_ldst_table);
3318
  pre_ldst_table = NULL;
3319
 
3320
  while (pre_ldst_mems)
3321
    {
3322
      struct ls_expr * tmp = pre_ldst_mems;
3323
 
3324
      pre_ldst_mems = pre_ldst_mems->next;
3325
 
3326
      free_ldst_entry (tmp);
3327
    }
3328
 
3329
  pre_ldst_mems = NULL;
3330
}
3331
 
3332
/* Dump debugging info about the ldst list.  */
3333
 
3334
static void
3335
print_ldst_list (FILE * file)
3336
{
3337
  struct ls_expr * ptr;
3338
 
3339
  fprintf (file, "LDST list: \n");
3340
 
3341
  for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3342
    {
3343
      fprintf (file, "  Pattern (%3d): ", ptr->index);
3344
 
3345
      print_rtl (file, ptr->pattern);
3346
 
3347
      fprintf (file, "\n         Loads : ");
3348
 
3349
      if (ptr->loads)
3350
        print_rtl (file, ptr->loads);
3351
      else
3352
        fprintf (file, "(nil)");
3353
 
3354
      fprintf (file, "\n        Stores : ");
3355
 
3356
      if (ptr->stores)
3357
        print_rtl (file, ptr->stores);
3358
      else
3359
        fprintf (file, "(nil)");
3360
 
3361
      fprintf (file, "\n\n");
3362
    }
3363
 
3364
  fprintf (file, "\n");
3365
}
3366
 
3367
/* Returns 1 if X is in the list of ldst only expressions.  */
3368
 
3369
static struct ls_expr *
3370
find_rtx_in_ldst (rtx x)
3371
{
3372
  struct ls_expr e;
3373
  void **slot;
3374
  if (!pre_ldst_table)
3375
    return NULL;
3376
  e.pattern = x;
3377
  slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3378
  if (!slot || ((struct ls_expr *)*slot)->invalid)
3379
    return NULL;
3380
  return (struct ls_expr *) *slot;
3381
}
3382
 
3383
/* Load Motion for loads which only kill themselves.  */
3384
 
3385
/* Return true if x, a MEM, is a simple access with no side effects.
3386
   These are the types of loads we consider for the ld_motion list,
3387
   otherwise we let the usual aliasing take care of it.  */
3388
 
3389
static int
3390
simple_mem (const_rtx x)
3391
{
3392
  if (MEM_VOLATILE_P (x))
3393
    return 0;
3394
 
3395
  if (GET_MODE (x) == BLKmode)
3396
    return 0;
3397
 
3398
  /* If we are handling exceptions, we must be careful with memory references
3399
     that may trap.  If we are not, the behavior is undefined, so we may just
3400
     continue.  */
3401
  if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3402
    return 0;
3403
 
3404
  if (side_effects_p (x))
3405
    return 0;
3406
 
3407
  /* Do not consider function arguments passed on stack.  */
3408
  if (reg_mentioned_p (stack_pointer_rtx, x))
3409
    return 0;
3410
 
3411
  if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3412
    return 0;
3413
 
3414
  return 1;
3415
}
3416
 
3417
/* Make sure there isn't a buried reference in this pattern anywhere.
3418
   If there is, invalidate the entry for it since we're not capable
3419
   of fixing it up just yet.. We have to be sure we know about ALL
3420
   loads since the aliasing code will allow all entries in the
3421
   ld_motion list to not-alias itself.  If we miss a load, we will get
3422
   the wrong value since gcse might common it and we won't know to
3423
   fix it up.  */
3424
 
3425
static void
3426
invalidate_any_buried_refs (rtx x)
3427
{
3428
  const char * fmt;
3429
  int i, j;
3430
  struct ls_expr * ptr;
3431
 
3432
  /* Invalidate it in the list.  */
3433
  if (MEM_P (x) && simple_mem (x))
3434
    {
3435
      ptr = ldst_entry (x);
3436
      ptr->invalid = 1;
3437
    }
3438
 
3439
  /* Recursively process the insn.  */
3440
  fmt = GET_RTX_FORMAT (GET_CODE (x));
3441
 
3442
  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3443
    {
3444
      if (fmt[i] == 'e')
3445
        invalidate_any_buried_refs (XEXP (x, i));
3446
      else if (fmt[i] == 'E')
3447
        for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3448
          invalidate_any_buried_refs (XVECEXP (x, i, j));
3449
    }
3450
}
3451
 
3452
/* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
3453
   being defined as MEM loads and stores to symbols, with no side effects
3454
   and no registers in the expression.  For a MEM destination, we also
3455
   check that the insn is still valid if we replace the destination with a
3456
   REG, as is done in update_ld_motion_stores.  If there are any uses/defs
3457
   which don't match this criteria, they are invalidated and trimmed out
3458
   later.  */
3459
 
3460
static void
3461
compute_ld_motion_mems (void)
3462
{
3463
  struct ls_expr * ptr;
3464
  basic_block bb;
3465
  rtx insn;
3466
 
3467
  pre_ldst_mems = NULL;
3468
  pre_ldst_table
3469
    = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3470
 
3471
  FOR_EACH_BB (bb)
3472
    {
3473
      FOR_BB_INSNS (bb, insn)
3474
        {
3475
          if (NONDEBUG_INSN_P (insn))
3476
            {
3477
              if (GET_CODE (PATTERN (insn)) == SET)
3478
                {
3479
                  rtx src = SET_SRC (PATTERN (insn));
3480
                  rtx dest = SET_DEST (PATTERN (insn));
3481
 
3482
                  /* Check for a simple LOAD...  */
3483
                  if (MEM_P (src) && simple_mem (src))
3484
                    {
3485
                      ptr = ldst_entry (src);
3486
                      if (REG_P (dest))
3487
                        ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3488
                      else
3489
                        ptr->invalid = 1;
3490
                    }
3491
                  else
3492
                    {
3493
                      /* Make sure there isn't a buried load somewhere.  */
3494
                      invalidate_any_buried_refs (src);
3495
                    }
3496
 
3497
                  /* Check for stores. Don't worry about aliased ones, they
3498
                     will block any movement we might do later. We only care
3499
                     about this exact pattern since those are the only
3500
                     circumstance that we will ignore the aliasing info.  */
3501
                  if (MEM_P (dest) && simple_mem (dest))
3502
                    {
3503
                      ptr = ldst_entry (dest);
3504
 
3505
                      if (! MEM_P (src)
3506
                          && GET_CODE (src) != ASM_OPERANDS
3507
                          /* Check for REG manually since want_to_gcse_p
3508
                             returns 0 for all REGs.  */
3509
                          && can_assign_to_reg_without_clobbers_p (src))
3510
                        ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3511
                      else
3512
                        ptr->invalid = 1;
3513
                    }
3514
                }
3515
              else
3516
                invalidate_any_buried_refs (PATTERN (insn));
3517
            }
3518
        }
3519
    }
3520
}
3521
 
3522
/* Remove any references that have been either invalidated or are not in the
3523
   expression list for pre gcse.  */
3524
 
3525
static void
3526
trim_ld_motion_mems (void)
3527
{
3528
  struct ls_expr * * last = & pre_ldst_mems;
3529
  struct ls_expr * ptr = pre_ldst_mems;
3530
 
3531
  while (ptr != NULL)
3532
    {
3533
      struct expr * expr;
3534
 
3535
      /* Delete if entry has been made invalid.  */
3536
      if (! ptr->invalid)
3537
        {
3538
          /* Delete if we cannot find this mem in the expression list.  */
3539
          unsigned int hash = ptr->hash_index % expr_hash_table.size;
3540
 
3541
          for (expr = expr_hash_table.table[hash];
3542
               expr != NULL;
3543
               expr = expr->next_same_hash)
3544
            if (expr_equiv_p (expr->expr, ptr->pattern))
3545
              break;
3546
        }
3547
      else
3548
        expr = (struct expr *) 0;
3549
 
3550
      if (expr)
3551
        {
3552
          /* Set the expression field if we are keeping it.  */
3553
          ptr->expr = expr;
3554
          last = & ptr->next;
3555
          ptr = ptr->next;
3556
        }
3557
      else
3558
        {
3559
          *last = ptr->next;
3560
          htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3561
          free_ldst_entry (ptr);
3562
          ptr = * last;
3563
        }
3564
    }
3565
 
3566
  /* Show the world what we've found.  */
3567
  if (dump_file && pre_ldst_mems != NULL)
3568
    print_ldst_list (dump_file);
3569
}
3570
 
3571
/* This routine will take an expression which we are replacing with
3572
   a reaching register, and update any stores that are needed if
3573
   that expression is in the ld_motion list.  Stores are updated by
3574
   copying their SRC to the reaching register, and then storing
3575
   the reaching register into the store location. These keeps the
3576
   correct value in the reaching register for the loads.  */
3577
 
3578
static void
3579
update_ld_motion_stores (struct expr * expr)
3580
{
3581
  struct ls_expr * mem_ptr;
3582
 
3583
  if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3584
    {
3585
      /* We can try to find just the REACHED stores, but is shouldn't
3586
         matter to set the reaching reg everywhere...  some might be
3587
         dead and should be eliminated later.  */
3588
 
3589
      /* We replace (set mem expr) with (set reg expr) (set mem reg)
3590
         where reg is the reaching reg used in the load.  We checked in
3591
         compute_ld_motion_mems that we can replace (set mem expr) with
3592
         (set reg expr) in that insn.  */
3593
      rtx list = mem_ptr->stores;
3594
 
3595
      for ( ; list != NULL_RTX; list = XEXP (list, 1))
3596
        {
3597
          rtx insn = XEXP (list, 0);
3598
          rtx pat = PATTERN (insn);
3599
          rtx src = SET_SRC (pat);
3600
          rtx reg = expr->reaching_reg;
3601
          rtx copy;
3602
 
3603
          /* If we've already copied it, continue.  */
3604
          if (expr->reaching_reg == src)
3605
            continue;
3606
 
3607
          if (dump_file)
3608
            {
3609
              fprintf (dump_file, "PRE:  store updated with reaching reg ");
3610
              print_rtl (dump_file, reg);
3611
              fprintf (dump_file, ":\n  ");
3612
              print_inline_rtx (dump_file, insn, 8);
3613
              fprintf (dump_file, "\n");
3614
            }
3615
 
3616
          copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3617
          emit_insn_before (copy, insn);
3618
          SET_SRC (pat) = reg;
3619
          df_insn_rescan (insn);
3620
 
3621
          /* un-recognize this pattern since it's probably different now.  */
3622
          INSN_CODE (insn) = -1;
3623
          gcse_create_count++;
3624
        }
3625
    }
3626
}
3627
 
3628
/* Return true if the graph is too expensive to optimize. PASS is the
3629
   optimization about to be performed.  */
3630
 
3631
static bool
3632
is_too_expensive (const char *pass)
3633
{
3634
  /* Trying to perform global optimizations on flow graphs which have
3635
     a high connectivity will take a long time and is unlikely to be
3636
     particularly useful.
3637
 
3638
     In normal circumstances a cfg should have about twice as many
3639
     edges as blocks.  But we do not want to punish small functions
3640
     which have a couple switch statements.  Rather than simply
3641
     threshold the number of blocks, uses something with a more
3642
     graceful degradation.  */
3643
  if (n_edges > 20000 + n_basic_blocks * 4)
3644
    {
3645
      warning (OPT_Wdisabled_optimization,
3646
               "%s: %d basic blocks and %d edges/basic block",
3647
               pass, n_basic_blocks, n_edges / n_basic_blocks);
3648
 
3649
      return true;
3650
    }
3651
 
3652
  /* If allocating memory for the dataflow bitmaps would take up too much
3653
     storage it's better just to disable the optimization.  */
3654
  if ((n_basic_blocks
3655
       * SBITMAP_SET_SIZE (max_reg_num ())
3656
       * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3657
    {
3658
      warning (OPT_Wdisabled_optimization,
3659
               "%s: %d basic blocks and %d registers",
3660
               pass, n_basic_blocks, max_reg_num ());
3661
 
3662
      return true;
3663
    }
3664
 
3665
  return false;
3666
}
3667
 
3668
/* All the passes implemented in this file.  Each pass has its
3669
   own gate and execute function, and at the end of the file a
3670
   pass definition for passes.c.
3671
 
3672
   We do not construct an accurate cfg in functions which call
3673
   setjmp, so none of these passes runs if the function calls
3674
   setjmp.
3675
   FIXME: Should just handle setjmp via REG_SETJMP notes.  */
3676
 
3677
static bool
3678
gate_rtl_pre (void)
3679
{
3680
  return optimize > 0 && flag_gcse
3681
    && !cfun->calls_setjmp
3682
    && optimize_function_for_speed_p (cfun)
3683
    && dbg_cnt (pre);
3684
}
3685
 
3686
static unsigned int
3687
execute_rtl_pre (void)
3688
{
3689
  int changed;
3690
  delete_unreachable_blocks ();
3691
  df_analyze ();
3692
  changed = one_pre_gcse_pass ();
3693
  flag_rerun_cse_after_global_opts |= changed;
3694
  if (changed)
3695
    cleanup_cfg (0);
3696
  return 0;
3697
}
3698
 
3699
static bool
3700
gate_rtl_hoist (void)
3701
{
3702
  return optimize > 0 && flag_gcse
3703
    && !cfun->calls_setjmp
3704
    /* It does not make sense to run code hoisting unless we are optimizing
3705
       for code size -- it rarely makes programs faster, and can make then
3706
       bigger if we did PRE (when optimizing for space, we don't run PRE).  */
3707
    && optimize_function_for_size_p (cfun)
3708
    && dbg_cnt (hoist);
3709
}
3710
 
3711
static unsigned int
3712
execute_rtl_hoist (void)
3713
{
3714
  int changed;
3715
  delete_unreachable_blocks ();
3716
  df_analyze ();
3717
  changed = one_code_hoisting_pass ();
3718
  flag_rerun_cse_after_global_opts |= changed;
3719
  if (changed)
3720
    cleanup_cfg (0);
3721
  return 0;
3722
}
3723
 
3724
struct rtl_opt_pass pass_rtl_pre =
3725
{
3726
 {
3727
  RTL_PASS,
3728
  "rtl pre",                            /* name */
3729
  gate_rtl_pre,                         /* gate */
3730
  execute_rtl_pre,                      /* execute */
3731
  NULL,                                 /* sub */
3732
  NULL,                                 /* next */
3733
  0,                                    /* static_pass_number */
3734
  TV_PRE,                               /* tv_id */
3735
  PROP_cfglayout,                       /* properties_required */
3736
  0,                                    /* properties_provided */
3737
  0,                                    /* properties_destroyed */
3738
  0,                                    /* todo_flags_start */
3739
  TODO_df_finish | TODO_verify_rtl_sharing |
3740
  TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3741
 }
3742
};
3743
 
3744
struct rtl_opt_pass pass_rtl_hoist =
3745
{
3746
 {
3747
  RTL_PASS,
3748
  "hoist",                              /* name */
3749
  gate_rtl_hoist,                       /* gate */
3750
  execute_rtl_hoist,                    /* execute */
3751
  NULL,                                 /* sub */
3752
  NULL,                                 /* next */
3753
  0,                                    /* static_pass_number */
3754
  TV_HOIST,                             /* tv_id */
3755
  PROP_cfglayout,                       /* properties_required */
3756
  0,                                    /* properties_provided */
3757
  0,                                    /* properties_destroyed */
3758
  0,                                    /* todo_flags_start */
3759
  TODO_df_finish | TODO_verify_rtl_sharing |
3760
  TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3761
 }
3762
};
3763
 
3764
#include "gt-gcse.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.